continuous_control
문서의 이전 판입니다!
Continuous Control
A2C
python>
def cont_logprob(mu, var, actions):
import torch
import math
p1 = - ((mu - actions) ** 2) / (2 * var.clamp(min=1e-3))
p2 = - torch.log(torch.sqrt(2 * math.pi * var))
return p1 + p2
def cont_entropy(var, reduction):
import torch
import math
ENTROPY_BETA = 1e-4
entropy = -(torch.log(2 * math.pi * var) + 1) / 2
if reduction:
entropy = entropy.mean()
return ENTROPY_BETA * entropy
...
continuous_control.1581739694.txt.gz · 마지막으로 수정됨: (바깥 편집)