using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class QL_Policy
{
    // Start is called before the first frame update
    private QL_Table q_table_ref;
    private QL_Function function;
    private float epsilon;
    public void Init(QL_Table table, float epsilon, QL_Function function)
    {
        q_table_ref = table;
        this.epsilon = epsilon;
        this.function = function;
    }

    //Policy is epsilon-greedy. When training ends, I can just import the trained 
    //Q-table and reduce the epsilon to 0 or a really low value.

    public QAction_Reward? CheckPolicyStart(string state)
    {
        if (UnityEngine.Random.Range(0.0f, 1.0f) < epsilon)
        {
            return function.GetRandomAction(state);
        }
        else
        {
            return null;
        }
    }
    public QAction_Reward CheckPolicy(string state)
    {
        if(UnityEngine.Random.Range(0.0f,1.0f) < epsilon)
        {
            return function.GetRandomAction(state);
        }
        else
        {
            return function.GetBestValue(state);
        }
    }
}
