문서의 이전 판입니다!
Continuous Control
def cont_logprob(mu, var, actions):
import torch
import math
p1 = - ((mu - actions) ** 2) / (2 * var.clamp(min=1e-3))
p2 = - torch.log(torch.sqrt(2 * math.pi * var))
return p1 + p2
def cont_entropy(var):
import torch
import math
entropy = (torch.log(2 * math.pi * var) + 1) / 2
return entropy.sum(-1)