net.h

#ifndef __SHAREWIZ_NET_H__
#define __SHAREWIZ_NET_H__
 
#include <memory>
#include <vector>
 
// A Net class.
//
// To handle neural networks.
 
// There are several things to keep in mind when applying this agent in practice :
//   1. If the rewards are very sparse in the environment the agent will have trouble learning.
//      Right now there is no priority sweeping support, but one might imagine oversampling experience that have 
//      high TD errors.  It's not clear how this can be done in most principled way. 
//      Similarly, there are no eligibility traces right now though this could be added with a few modifications 
//      in future versions.
//   2. The exploration is rather naive, since a random action is taken once in a while.
//      If the environment requires longer sequences of precise actions to get a reward, the agent might have a 
//      lot of difficulty finding these by chance, and then also learning from them sufficiently.
//   3. DQN only supports a set number of discrete actions and it is not obvious how one can incorporate
//      (high - dimensional) continuous action spaces.
 
class Layer;
 
typedef std::shared_ptr<Layer> pLayerX;
typedef std::vector<pLayerX> pLayer;
 
 
class Neuron;
typedef std::shared_ptr<Neuron> pNeuronX;
typedef std::vector<pNeuronX> pNeuron;
 
 
class Net
{
private:
	//std::vector<double>& targetVals;
 
  double discount_factor = 0.4;        // Discount factor (0 - 1).
                                       // [gamma].
                                       // If Discount factor is closer to 0, the agent will tend to consider only 
                                       // immediate rewards.
                                       // If Discount factor is closer to 1, the agent will consider future rewards 
                                       // with greater weight, willing to delay the reward.
 
  double global_bias = 1.0;            // A global bias value to add to every layer.
 
  double goal_amount = 100.0;          // Used by DQN networks.  The goal amount to try to obtain.
 
  double learning_rate = 0.5;          // Learning rate.
                                       // Set this by trial and error.  That's Pretty much the best thing we have.
                                       // [eta] (backprop), [alpha] (dqn)
                                       // Controls how much the weights are changed during a weight update.
                                       // The larger the value, the more the weights are changed.
                                       // This must be a real value between 0.0 and 1.0.
                                       // These values are commonly set from 0.5 to 0.7.
 
	double max_error_tollerance;
 
  double weight_decay = 0.00000001;    // How much to reduce weight each time they are updated.
 
  bool initial_random_weight = true;   // To overcome where all weights are equal therefore weight would not change
                                       // a small random weight is used initially.
 
 
  //double epsilon = 0.2;                // Initial epsilon for epsilon-greedy policy (0 - 1).
                                       // High epsilon(up to 1) will cause the agent to take more random actions.
                                       // It is a good idea to start with a high epsilon(e.g. 0.2 or even a bit higher) 
                                       // and decay it over time to be lower(e.g. 0.05).
 
  //double lambda = 0;                   // eligibility trace decay, [0,1). 0 = no eligibility traces.
 
 
 
	pLayer layers;
 
public:
	Net();
	Net(const std::vector<unsigned int>& topology);
 
  double getDiscountFactor(void);
  void setDiscountFactor(const double& _discount_factor);
 
  double getGoalAmount(void);
  void setGoalAmount(const double& _goal_amount);
 
	double getLearningRate(void);
	void setLearningRate(const double& learning_rate);
 
	double getMaxErrorTollerance(void);
	void setMaxErrorTollerance(const double& max_error_tollerance);
 
 
	void setTarget(const std::vector<double>& targetVals);
 
	void setTest();
 
  void connect(const std::vector< std::vector<double> > connections);
  void connect(int layerFrom, int neuronFrom, int layerTo, int neuronTo, double _R, int connection_idx = 1);
 
  void connectAll();
  void connectForward();
  void connectForward2();
  void connectAllInLayer(const pLayerX& layer);
 
  void DQN(void);
  double getMaxQ(pNeuronX state);
 
  pNeuronX getPolicy(pNeuronX currentState);
  void showPolicy(void);
 
	void feedForward(const std::vector<double>& inputVals);
	void backPropagate(const std::vector<double>& targetVals);
	void backPropagate2(const std::vector<double>& targetVals);
 
 
  int randomBetween(int lowestNumber, int highestNumber);
 
	void printOutput(void);
	void printResult(void);
};
 
 
#endif