brain:net.h
net.h
#ifndef __SHAREWIZ_NET_H__ #define __SHAREWIZ_NET_H__ #include <memory> #include <vector> // A Net class. // // To handle neural networks. // There are several things to keep in mind when applying this agent in practice : // 1. If the rewards are very sparse in the environment the agent will have trouble learning. // Right now there is no priority sweeping support, but one might imagine oversampling experience that have // high TD errors. It's not clear how this can be done in most principled way. // Similarly, there are no eligibility traces right now though this could be added with a few modifications // in future versions. // 2. The exploration is rather naive, since a random action is taken once in a while. // If the environment requires longer sequences of precise actions to get a reward, the agent might have a // lot of difficulty finding these by chance, and then also learning from them sufficiently. // 3. DQN only supports a set number of discrete actions and it is not obvious how one can incorporate // (high - dimensional) continuous action spaces. class Layer; typedef std::shared_ptr<Layer> pLayerX; typedef std::vector<pLayerX> pLayer; class Neuron; typedef std::shared_ptr<Neuron> pNeuronX; typedef std::vector<pNeuronX> pNeuron; class Net { private: //std::vector<double>& targetVals; double discount_factor = 0.4; // Discount factor (0 - 1). // [gamma]. // If Discount factor is closer to 0, the agent will tend to consider only // immediate rewards. // If Discount factor is closer to 1, the agent will consider future rewards // with greater weight, willing to delay the reward. double global_bias = 1.0; // A global bias value to add to every layer. double goal_amount = 100.0; // Used by DQN networks. The goal amount to try to obtain. double learning_rate = 0.5; // Learning rate. // Set this by trial and error. That's Pretty much the best thing we have. // [eta] (backprop), [alpha] (dqn) // Controls how much the weights are changed during a weight update. // The larger the value, the more the weights are changed. // This must be a real value between 0.0 and 1.0. // These values are commonly set from 0.5 to 0.7. double max_error_tollerance; double weight_decay = 0.00000001; // How much to reduce weight each time they are updated. bool initial_random_weight = true; // To overcome where all weights are equal therefore weight would not change // a small random weight is used initially. //double epsilon = 0.2; // Initial epsilon for epsilon-greedy policy (0 - 1). // High epsilon(up to 1) will cause the agent to take more random actions. // It is a good idea to start with a high epsilon(e.g. 0.2 or even a bit higher) // and decay it over time to be lower(e.g. 0.05). //double lambda = 0; // eligibility trace decay, [0,1). 0 = no eligibility traces. pLayer layers; public: Net(); Net(const std::vector<unsigned int>& topology); double getDiscountFactor(void); void setDiscountFactor(const double& _discount_factor); double getGoalAmount(void); void setGoalAmount(const double& _goal_amount); double getLearningRate(void); void setLearningRate(const double& learning_rate); double getMaxErrorTollerance(void); void setMaxErrorTollerance(const double& max_error_tollerance); void setTarget(const std::vector<double>& targetVals); void setTest(); void connect(const std::vector< std::vector<double> > connections); void connect(int layerFrom, int neuronFrom, int layerTo, int neuronTo, double _R, int connection_idx = 1); void connectAll(); void connectForward(); void connectForward2(); void connectAllInLayer(const pLayerX& layer); void DQN(void); double getMaxQ(pNeuronX state); pNeuronX getPolicy(pNeuronX currentState); void showPolicy(void); void feedForward(const std::vector<double>& inputVals); void backPropagate(const std::vector<double>& targetVals); void backPropagate2(const std::vector<double>& targetVals); int randomBetween(int lowestNumber, int highestNumber); void printOutput(void); void printResult(void); }; #endif
brain/net.h.txt · Last modified: 2020/07/15 09:30 by 127.0.0.1