Spaces:

OpenDILabCommunity
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /ding /torch_utils /tests /test_optimizer.py

zjowowen

init space

3dfe8fb almost 2 years ago

raw

history blame contribute delete

6.39 kB

	import torch
	import torch.nn as nn
	import torch.optim as optim
	from ding.torch_utils.optimizer_helper import Adam, RMSprop, calculate_grad_norm, \
	calculate_grad_norm_without_bias_two_norm, PCGrad, configure_weight_decay
	import pytest
	import time


	class LinearNet(nn.Module):

	def __init__(self, features_in=1, features_out=1):
	super().__init__()
	self.linear = nn.Linear(features_in, features_out)
	self._init_weight()

	def forward(self, x):
	return self.linear(x)

	def _init_weight(self):
	nn.init.constant_(self.linear.weight, val=1)
	nn.init.constant_(self.linear.bias, val=0)


	def try_optim_with(tname, t, optim_t):
	net = LinearNet()
	mse_fn = nn.L1Loss()
	if tname == 'grad_clip':
	if optim_t == 'rmsprop':
	optimizer = RMSprop(
	net.parameters(),
	grad_clip_type=t,
	clip_value=0.000001,
	clip_norm_type=1.2,
	lr=0.1,
	clip_momentum_timestep=2,
	ignore_momentum_timestep=2,
	clip_coef=0.5
	)
	else:
	optimizer = Adam(
	net.parameters(),
	grad_clip_type=t,
	clip_value=0.000001,
	clip_norm_type=1.2,
	lr=0.1,
	optim_type=optim_t,
	clip_momentum_timestep=2,
	ignore_momentum_timestep=2,
	clip_coef=0.5
	)
	if tname == 'grad_ignore':
	if optim_t == 'rmsprop':
	optimizer = RMSprop(
	net.parameters(),
	grad_ignore_type=t,
	clip_value=0.000001,
	ignore_value=0.000001,
	ignore_norm_type=1.2,
	lr=0.1,
	clip_momentum_timestep=2,
	ignore_momentum_timestep=2,
	)
	else:
	optimizer = Adam(
	net.parameters(),
	grad_ignore_type=t,
	clip_value=0.000001,
	ignore_value=0.000001,
	ignore_norm_type=1.2,
	lr=0.1,
	optim_type=optim_t,
	clip_momentum_timestep=2,
	ignore_momentum_timestep=2,
	ignore_coef=0.01
	)
	# 网络输入和标签
	x = torch.FloatTensor([120])
	x.requires_grad = True
	target_value = torch.FloatTensor([2])
	target_value.requires_grad = True
	# loss计算
	for _ in range(10):
	predict = net(x)
	loss = mse_fn(predict, target_value)
	loss.backward()
	optimizer.step()
	if t is not None and 'ignore' not in t:
	assert optimizer.get_grad() != 0.
	for _ in range(10):
	target_value = torch.FloatTensor([_ ** 2])
	target_value.requires_grad = True
	predict = net(x)
	loss = mse_fn(predict, target_value)
	loss.backward()
	optimizer.step()

	if t is None:
	print("weight without optimizer clip:" + str(net.linear.weight))
	else:
	print("weight with optimizer {} of type: {} is ".format(tname, t) + str(net.linear.weight))

	weight = net.linear.weight
	return weight


	@pytest.mark.unittest
	class TestAdam:

	def test_naive(self):
	support_type = {
	'optim': ['adam', 'adamw'],
	'grad_clip': [None, 'clip_momentum', 'clip_value', 'clip_norm', 'clip_momentum_norm'],
	'grad_norm': [None],
	'grad_ignore': [None, 'ignore_momentum', 'ignore_value', 'ignore_norm', 'ignore_momentum_norm'],
	}

	for optim_t in support_type['optim']:
	for tname in ['grad_clip', 'grad_ignore']:
	for t in support_type[tname]:
	try_optim_with(tname=tname, t=t, optim_t=optim_t)


	@pytest.mark.unittest
	class TestRMSprop:

	def test_naive(self):
	support_type = {
	'grad_clip': [None, 'clip_momentum', 'clip_value', 'clip_norm', 'clip_momentum_norm'],
	'grad_norm': [None],
	'grad_ignore': [None, 'ignore_momentum', 'ignore_value', 'ignore_norm', 'ignore_momentum_norm'],
	}

	for tname in ['grad_clip', 'grad_ignore']:
	for t in support_type[tname]:
	try_optim_with(tname=tname, t=t, optim_t='rmsprop')


	@pytest.mark.unittest
	class Test_calculate_grad_norm_with_without_bias:

	def test_two_functions(self):
	net = LinearNet()
	mse_fn = nn.L1Loss()
	optimizer = Adam(net.parameters(), )
	x = torch.FloatTensor([120])
	x.requires_grad = True
	target_value = torch.FloatTensor([2])
	target_value.requires_grad = True
	for _ in range(10):
	predict = net(x)
	loss = mse_fn(predict, target_value)
	loss.backward()
	optimizer.step()
	inf_norm = calculate_grad_norm(model=net, norm_type='inf')
	two_norm = calculate_grad_norm(model=net)
	two_norm_nobias = float(calculate_grad_norm_without_bias_two_norm(model=net))
	one_norm = calculate_grad_norm(model=net, norm_type=1)
	assert isinstance(two_norm, float)
	assert isinstance(inf_norm, float)
	assert isinstance(one_norm, float)
	assert isinstance(two_norm_nobias, float)


	@pytest.mark.unittest
	class TestPCGrad:

	def naive_test(self):
	x, y = torch.randn(2, 3), torch.randn(2, 4)
	net = LinearNet(3, 4)
	y_pred = net(x)
	pc_adam = PCGrad(optim.Adam(net.parameters()))
	pc_adam.zero_grad()
	loss1_fn, loss2_fn = nn.L1Loss(), nn.MSELoss()
	loss1, loss2 = loss1_fn(y_pred, y), loss2_fn(y_pred, y)

	pc_adam.pc_backward([loss1, loss2])
	for p in net.parameters():
	assert isinstance(p, torch.Tensor)


	@pytest.mark.unittest
	class TestWeightDecay:

	def test_wd(self):
	net = nn.Sequential(nn.Linear(3, 4), nn.LayerNorm(4))
	x = torch.randn(1, 3)
	group_params = configure_weight_decay(model=net, weight_decay=1e-4)
	assert group_params[0]['weight_decay'] == 1e-4
	assert group_params[1]['weight_decay'] == 0
	assert len(group_params[0]['params']) == 1
	assert len(group_params[1]['params']) == 3
	opt = Adam(group_params, lr=1e-2)
	opt.zero_grad()
	y = torch.sum(net(x))
	y.backward()
	opt.step()