def sgd_adagrad(parameters, sqrs, lr):
eps = 1e-10
for param, sqr in zip(parameters, sqrs):
? sqr[:] = sqr + param.grad.data ** 2
? div = lr / torch.sqrt(sqr + eps) * param.grad.data
? param.data = param.data - div
def sgd_adagrad(parameters, sqrs, lr):
eps = 1e-10
for param, sqr in zip(parameters, sqrs):
? sqr[:] = sqr + param.grad.data ** 2
? div = lr / torch.sqrt(sqr + eps) * param.grad.data
? param.data = param.data - div