using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class QL_Function
{
    QL_Table q_table_ref;
    QL_Policy policy;
    QL_Manager manager;
    float baseDiscount;
    int iterations;
    public void Init(QL_Table q_table, QL_Policy policy, QL_Manager manager, float baseDiscount, int iterations)
    {
        q_table_ref = q_table;
        this.policy = policy;
        this.baseDiscount = baseDiscount;
        this.iterations = iterations;
        this.manager = manager;
    }

    public QLActions ChooseAction(string state, QSimulationCallBack simulator)
    {
        QLActions toReturn = QLActions.Wait;
        float maxReward = -float.MaxValue;
        QAction_Reward? toReturnMaybe = policy.CheckPolicyStart(state);
        if(toReturnMaybe != null) 
        { 
            return ((QAction_Reward)toReturnMaybe).action; 
        }
        QAction_Reward[] possibilities = q_table_ref.GetAvailableOptions(state);
        foreach(QAction_Reward p in possibilities)
        {
            float reward = Q_Function_Calculate(state, p.action, 0, simulator);
            if (reward > maxReward)
            {
                toReturn = p.action;
                maxReward = reward;
            }
        }
        return toReturn;
    }
    public float Q_Function_Calculate(string state, QLActions action, int currentIteration, QSimulationCallBack simulator)
    {
        //This is a recursive method.
        //It will always return the result of itself + call itself until currentIteration == iterations, where it will just the value of action in state.
        //Since it needs the state it will always receive a state and action, get the reward for that.
        //Simulate state with action which returns a new state. Get POLICY QAction_reward for new state.
        //Call itself again with currentIteration +1.
        float toReturn = 0;
        toReturn += q_table_ref.GetReward(state, (int)action);
        if(currentIteration+1 == iterations) return toReturn;
        string newState = simulator(state, (int)action, currentIteration);
        QLActions newAction = policy.CheckPolicy(newState).action;
        toReturn += Mathf.Pow(baseDiscount, currentIteration) * Q_Function_Calculate(newState, newAction, currentIteration + 1, simulator);
        return toReturn;
    }


    public QAction_Reward GetBestValue(string state)
    {
        if (q_table_ref == null || policy == null) Debug.LogError("You forgot to init the QL_Function!");
        int bestIndex = 0;
        float bestReward = -float.MaxValue;
        QAction_Reward[] temp = q_table_ref.GetAvailableOptions(state);
        for(int i = 0; i < temp.Length; i++)
        {
            QAction_Reward option = temp[i];
            if (option.reward > bestReward) bestIndex = i;
        }
        return temp[bestIndex];
    }

    public QAction_Reward GetRandomAction(string state)
    {
        QAction_Reward[] temp = q_table_ref.GetAvailableOptions(state);
        return temp[UnityEngine.Random.Range(0, temp.Length)];
    }
}
