#ifndef __SHAREWIZ_NET_H__ #define __SHAREWIZ_NET_H__ #include #include // A Net class. // // To handle neural networks. // There are several things to keep in mind when applying this agent in practice : // 1. If the rewards are very sparse in the environment the agent will have trouble learning. // Right now there is no priority sweeping support, but one might imagine oversampling experience that have // high TD errors. It's not clear how this can be done in most principled way. // Similarly, there are no eligibility traces right now though this could be added with a few modifications // in future versions. // 2. The exploration is rather naive, since a random action is taken once in a while. // If the environment requires longer sequences of precise actions to get a reward, the agent might have a // lot of difficulty finding these by chance, and then also learning from them sufficiently. // 3. DQN only supports a set number of discrete actions and it is not obvious how one can incorporate // (high - dimensional) continuous action spaces. class Layer; typedef std::shared_ptr pLayerX; typedef std::vector pLayer; class Neuron; typedef std::shared_ptr pNeuronX; typedef std::vector pNeuron; class Net { private: //std::vector& targetVals; double learning_rate; // eta. // Controls how much the weights are changed during a weight update. // The larger the value, the more the weights are changed. // This must be a real value between 0.0 and 10.0. // These values are commonly set from 0.5 to 0.7. double max_error_tollerance; double alpha = 0.1; // Learning rate. // Set this by trial and error. That's Pretty much the best thing we have. double gamma = 0.4; // Discount factor (0 - 1). // If Gamma is closer to 0, the agent will tend to consider only // immediate rewards. // If Gamma is closer to 1, the agent will consider future rewards // with greater weight, willing to delay the reward. //double epsilon = 0.2; // Initial epsilon for epsilon-greedy policy (0 - 1). // High epsilon(up to 1) will cause the agent to take more random actions. // It is a good idea to start with a high epsilon(e.g. 0.2 or even a bit higher) // and decay it over time to be lower(e.g. 0.05). //double lambda = 0; // eligibility trace decay, [0,1). 0 = no eligibility traces. double goal_amount; // Used by DQN networks. The goal amount to try to obtain. pLayer layers; public: Net(); Net(const std::vector& topology); double getLearningRate(void); void setLearningRate(const double& learning_rate); double getMaxErrorTollerance(void); void setMaxErrorTollerance(const double& max_error_tollerance); double getAlpha(void); void setAlpha(const double& _alpha_amount); double getGamma(void); void setGamma(const double& _gamma_amount); double getGoalAmount(void); void setGoalAmount(const double& _goal_amount); void setTarget(const std::vector& targetVals); void setTest(); void connect(const std::vector< std::vector > connections); void connect(int layerFrom, int neuronFrom, int layerTo, int neuronTo, double _R, int connection_idx = 1); void connectAll(); void connectForward(); void connectForward2(); void connectAllInLayer(const pLayerX& layer); void DQN(void); double getMaxQ(pNeuronX state); pNeuronX getPolicy(pNeuronX currentState); void showPolicy(void); void feedForward(const std::vector& inputVals); void backPropagate(const std::vector& targetVals); void backPropagate2(const std::vector& targetVals); int randomBetween(int lowestNumber, int highestNumber); void printOutput(void); void printResult(void); }; #endif