#ifndef __SHAREWIZ_NET_H__
#define __SHAREWIZ_NET_H__

#include <memory>
#include <vector>

// A Net class.
//
// To handle neural networks.

// There are several things to keep in mind when applying this agent in practice :
//   1. If the rewards are very sparse in the environment the agent will have trouble learning.
//      Right now there is no priority sweeping support, but one might imagine oversampling experience that have 
//      high TD errors.  It's not clear how this can be done in most principled way. 
//      Similarly, there are no eligibility traces right now though this could be added with a few modifications 
//      in future versions.
//   2. The exploration is rather naive, since a random action is taken once in a while.
//      If the environment requires longer sequences of precise actions to get a reward, the agent might have a 
//      lot of difficulty finding these by chance, and then also learning from them sufficiently.
//   3. DQN only supports a set number of discrete actions and it is not obvious how one can incorporate
//      (high - dimensional) continuous action spaces.

class Layer;

typedef std::shared_ptr<Layer> pLayerX;
typedef std::vector<pLayerX> pLayer;


class Neuron;
typedef std::shared_ptr<Neuron> pNeuronX;
typedef std::vector<pNeuronX> pNeuron;


class Net
{
private:
	//std::vector<double>& targetVals;

	double learning_rate; // eta.
                                       // Controls how much the weights are changed during a weight update.
                                       // The larger the value, the more the weights are changed.
                                       // This must be a real value between 0.0 and 10.0.
                                       // These values are commonly set from 0.5 to 0.7.
	double max_error_tollerance;

  double alpha = 0.1;                  // Learning rate.
                                       // Set this by trial and error.  That's Pretty much the best thing we have.
  double gamma = 0.4;                  // Discount factor (0 - 1).
                                       // If Gamma is closer to 0, the agent will tend to consider only 
                                       // immediate rewards.
                                       // If Gamma is closer to 1, the agent will consider future rewards 
                                       // with greater weight, willing to delay the reward.
  //double epsilon = 0.2;                // Initial epsilon for epsilon-greedy policy (0 - 1).
                                       // High epsilon(up to 1) will cause the agent to take more random actions.
                                       // It is a good idea to start with a high epsilon(e.g. 0.2 or even a bit higher) 
                                       // and decay it over time to be lower(e.g. 0.05).
  //double lambda = 0;                   // eligibility trace decay, [0,1). 0 = no eligibility traces.

  double goal_amount;                  // Used by DQN networks.  The goal amount to try to obtain.

	pLayer layers;

public:
	Net();
	Net(const std::vector<unsigned int>& topology);

	double getLearningRate(void);
	void setLearningRate(const double& learning_rate);

	double getMaxErrorTollerance(void);
	void setMaxErrorTollerance(const double& max_error_tollerance);

  double getAlpha(void);
  void setAlpha(const double& _alpha_amount);

  double getGamma(void);
  void setGamma(const double& _gamma_amount);

  double getGoalAmount(void);
  void setGoalAmount(const double& _goal_amount);

	void setTarget(const std::vector<double>& targetVals);

	void setTest();

  void connect(const std::vector< std::vector<double> > connections);
  void connect(int layerFrom, int neuronFrom, int layerTo, int neuronTo, double _R, int connection_idx = 1);

  void connectAll();
  void connectForward();
  void connectForward2();
  void connectAllInLayer(const pLayerX& layer);

  void DQN(void);
  double getMaxQ(pNeuronX state);

  pNeuronX getPolicy(pNeuronX currentState);
  void showPolicy(void);

	void feedForward(const std::vector<double>& inputVals);
	void backPropagate(const std::vector<double>& targetVals);
	void backPropagate2(const std::vector<double>& targetVals);


  int randomBetween(int lowestNumber, int highestNumber);

	void printOutput(void);
	void printResult(void);
};


#endif