====== 2024-11 Beyond the Boundaries of Proximal Policy Optimization ====== * https://arxiv.org/abs/2411.00666 with torch.no_grad(): mf = 0.3 lr = 0.7 for param_old, param, m in zip(model_old.parameters(), model.parameters(), momentum): grad = param - param_old m.copy_(mf * m + grad) param.copy_(param_old + lr * (m + mf * grad)) param_old.copy_(param) {{tag>PPO RL 2024 분산학습2 }}