#!/usr/bin/env python3
import random
import argparse
import collections

SEED = 2


def get_action(state: int, total_states: int) -> int:
    if state == 1:
        return 1
    if state == total_states:
        return 0
    return random.choice([0, 1])


def do_action(state: int, action: int) -> int:
    """
    Symulacja akcji z danego stanu
    """
    # akcja "w lewo" zawsze koczy si sukcesem i powoduje pjcie w lewo
    if action == 0:
        return state-1

    if state == 1:
        return random.choices([1, 2], weights=[0.4, 0.6])[0]
    # pozostae stany s takie same
    delta = random.choices([-1, 0, 1], weights=[0.05, 0.6, 0.35])[0]
    return state + delta


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-n", "--steps", type=int, default=100, help="Liczba krokw do symulowania, domylnie=100")
    parser.add_argument("--episode-length", type=int, default=10, help="Ograniczenie epizodu, domylnie=10")
    parser.add_argument("--seed", type=int, default=SEED, help="Ziarno generatora losowego, domylnie=%d" % SEED)
    parser.add_argument("--env-len", type=int, default=6, help="Liczba stanw w rodowisku, domylnie=6")
    args = parser.parse_args()
    random.seed(args.seed)

    states_count = collections.Counter()
    state = 1
    episode_step = 0

    for _ in range(args.steps):
        action = get_action(state, args.env_len)
        state = do_action(state, action)
        states_count[state] += 1
        episode_step += 1
        if episode_step == args.episode_length:
            state = 1
            episode_step = 0

    for state in range(1, args.env_len+1):
        print("%d:\t%d" % (state, states_count[state]))
