Spaces:

OpenDILabCommunity
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /ding /policy /tests /test_common_utils.py

zjowowen

init space

3dfe8fb almost 2 years ago

raw

history blame contribute delete

5.63 kB

	import unittest
	import pytest
	import numpy as np
	import torch
	import treetensor.torch as ttorch

	from ding.policy.common_utils import default_preprocess_learn

	shape_test = [
	[2],
	[1],
	]

	dtype_test = [
	"int64",
	"float32",
	]

	data_type_test = [
	"numpy",
	"torch",
	"treetensor",
	]


	def get_action(shape, dtype, class_type):
	if class_type == "numpy":
	if dtype == "int64":
	dtype = np.int64
	elif dtype == "float32":
	dtype = np.float32
	return np.random.randn(*shape).astype(dtype)
	else:
	if dtype == "int64":
	dtype = torch.int64
	elif dtype == "float32":
	dtype = torch.float32

	if class_type == "torch":
	return torch.randn(*shape).type(dtype)
	elif class_type == "treetensor":
	return ttorch.randn(*shape).type(dtype)


	@pytest.mark.unittest
	def test_default_preprocess_learn_action():

	for shape in shape_test:
	for dtype in dtype_test:
	for data_type in data_type_test:

	data = [
	{
	'obs': np.random.randn(4, 84, 84),
	'action': get_action(shape, dtype, data_type),
	'reward': 1.0,
	'next_obs': np.random.randn(4, 84, 84),
	'done': False,
	'weight': 1.0,
	'value': 1.0,
	'adv': 1.0,
	} for _ in range(10)
	]
	use_priority_IS_weight = False
	use_priority = False
	use_nstep = False
	ignore_done = False
	data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done)

	assert data['obs'].shape == torch.Size([10, 4, 84, 84])
	if dtype in ["int64"] and shape[0] == 1:
	assert data['action'].shape == torch.Size([10])
	else:
	assert data['action'].shape == torch.Size([10, *shape])
	assert data['reward'].shape == torch.Size([10])
	assert data['next_obs'].shape == torch.Size([10, 4, 84, 84])
	assert data['done'].shape == torch.Size([10])
	assert data['weight'].shape == torch.Size([10])
	assert data['value'].shape == torch.Size([10])
	assert data['adv'].shape == torch.Size([10])


	@pytest.mark.unittest
	def test_default_preprocess_learn_reward_done_adv_1d():

	data = [
	{
	'obs': np.random.randn(4, 84, 84),
	'action': np.random.randn(2),
	'reward': np.array([1.0]),
	'next_obs': np.random.randn(4, 84, 84),
	'done': False,
	'value': np.array([1.0]),
	'adv': np.array([1.0]),
	} for _ in range(10)
	]
	use_priority_IS_weight = False
	use_priority = False
	use_nstep = False
	ignore_done = False
	data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done)

	assert data['reward'].shape == torch.Size([10])
	assert data['done'].shape == torch.Size([10])
	assert data['weight'] is None
	assert data['value'].shape == torch.Size([10])
	assert data['adv'].shape == torch.Size([10])


	@pytest.mark.unittest
	def test_default_preprocess_learn_ignore_done():
	data = [
	{
	'obs': np.random.randn(4, 84, 84),
	'action': np.random.randn(2),
	'reward': np.array([1.0]),
	'next_obs': np.random.randn(4, 84, 84),
	'done': True,
	'value': np.array([1.0]),
	'adv': np.array([1.0]),
	} for _ in range(10)
	]
	use_priority_IS_weight = False
	use_priority = False
	use_nstep = False
	ignore_done = True
	data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done)

	assert data['done'].dtype == torch.float32
	assert torch.sum(data['done']) == 0


	@pytest.mark.unittest
	def test_default_preprocess_learn_use_priority_IS_weight():
	data = [
	{
	'obs': np.random.randn(4, 84, 84),
	'action': np.random.randn(2),
	'reward': 1.0,
	'next_obs': np.random.randn(4, 84, 84),
	'done': False,
	'priority_IS': 1.0,
	'value': 1.0,
	'adv': 1.0,
	} for _ in range(10)
	]
	use_priority_IS_weight = True
	use_priority = True
	use_nstep = False
	ignore_done = False
	data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done)

	assert data['weight'].shape == torch.Size([10])
	assert torch.sum(data['weight']) == torch.tensor(10.0)


	@pytest.mark.unittest
	def test_default_preprocess_learn_nstep():
	data = [
	{
	'obs': np.random.randn(4, 84, 84),
	'action': np.random.randn(2),
	'reward': np.array([1.0, 2.0, 0.0]),
	'next_obs': np.random.randn(4, 84, 84),
	'done': False,
	'value': 1.0,
	'adv': 1.0,
	} for _ in range(10)
	]
	use_priority_IS_weight = False
	use_priority = False
	use_nstep = True
	ignore_done = False
	data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done)

	assert data['reward'].shape == torch.Size([3, 10])
	assert data['reward'][0][0] == torch.tensor(1.0)
	assert data['reward'][1][0] == torch.tensor(2.0)
	assert data['reward'][2][0] == torch.tensor(0.0)