사용자 도구

사이트 도구


alpharank:code

$\alpha$-rank: example

alpharank.py
#!/usr/bin/env python
 
# https://arxiv.org/pdf/1909.09849.pdf
# https://github.com/deepmind/open_spiel/blob/master/open_spiel/python/egt/alpharank.py
 
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import scipy.linalg as la
from IPython import embed
 
 
def get_ranks(M, alpha, use_inf_alpha=False, inf_alpha_eps=0.01):
    # single population, alpha = /inf 경우
 
    #
    #  M --> C: compute transition matrix
    # 
 
    # transition matrix
    C = np.zeros_like(M, dtype=np.float32)
 
    n_strategies = M.shape[0]
    eta = 1 / (n_strategies - 1)
 
    for s in range(n_strategies):  # 현재 전략
        for r in range(n_strategies):  # 다음 전략
            if s != r:  # Compute off-diagonal fixation probabilities
                payoff_rs = M[r, s]
                payoff_sr = M[s, r]
                if use_inf_alpha:
                    if np.isclose(payoff_rs, payoff_sr, atol=1e-14):
                        C[s, r] = eta * 0.5
                    elif payoff_rs > payoff_sr:
                        # r이 s보다 payoff가 높으므로, s -> r 확률 높음
                        C[s, r] = eta * (1 - inf_alpha_eps)
                    else:
                        # s -> r 확률 낮음
                        C[s, r] = eta * inf_alpha_eps
                else:
                    u = alpha * (payoff_rs - payoff_sr)
                    C[s, r] = (1-np.exp(-u)) / (1-np.exp(-n_strategies * u))
        C[s, s] = 1 - sum(C[s, :])  # Diagonals
 
    #
    #  c --> pi
    #
    eigenvals, eigenvecs, _ = la.eig(C, left=True, right=True)
    mask = abs(eigenvals - 1.) < 1e-6
    eigenvecs = eigenvecs[:, mask]
    num_stationary_eigenvecs = np.shape(eigenvecs)[1]
    if num_stationary_eigenvecs != 1:
        raise ValueError(
            f'Expected 1 stationary distribution, but found {num_stationary_eigenvecs}'
        )
    eigenvecs *= 1. / sum(eigenvecs)
    pi = eigenvecs.real.flatten()
 
    # ranks
    ranks = dict()
    for s in range(n_strategies):
        k = f'{pi[s]:.5f}'
        ranks.setdefault(k, list())
        ranks[k].append(s)
 
    return C, pi, ranks
 
 
if __name__ == '__main__':
 
    # payoff table, symmetry game, zero-sum
    M = np.array([
        [0, -1, 1],
        [1, 0, -1],
        [-1, 1, 0],
    ])
 
    # M = np.array([
    #     [0,  -0.5,    1],
    #     [0.5,   0, -0.1],
    #     [-1,  0.1,    0],
    # ])
 
    C, pi, ranks = get_ranks(M, alpha=0.5)
 
    # print ranks
    for rank, pi_ in enumerate(sorted(ranks.keys(), reverse=True)):
        print(f'rank: {rank+1}, score: {pi_}, strategy: {ranks[pi_]}')
 
    edges = list()
    for s in range(M.shape[0]):
        for r in range(M.shape[1]):
            if s != r:  # s == r 인 경우 생략
                if C[s, r] > 1 / M.shape[0]:  # 전이확률이 평균보다 작으면 생략
                    edges.append((s, r, C[s, r]))
 
    DG = nx.DiGraph()
    DG.add_weighted_edges_from(edges)
 
    options = {
        'node_color': 'white',
        'edge_color': 'grey',
        'with_labels': True, 
        'edge_labels': {(u, v): d["weight"] for u, v, d in DG.edges(data=True)}
    }
    pos = nx.spring_layout(DG)
    nx.draw(DG, pos=pos, **options)
    nx.draw_networkx_edge_labels(DG, pos=pos, **options)
    plt.show()
alpharank/code.txt · 마지막으로 수정됨: 2024/03/23 02:42 저자 127.0.0.1