activation.cpp

#include <cmath>
 
// See https://en.wikipedia.org/wiki/Activation_function
 
#include "activation.h"
 
 
Activation::Activation()
{
  activation_type = ACTIVATION_SIGMOID;
}
 
 
Activation::Activation(Activation_Types _activation_type)
{
  activation_type = _activation_type;
}
 
 
Activation::~Activation()
{
}
 
 
 
double Activation::activate(const double& value, const bool derivative, const double& alpha)
{
  switch (activation_type)
  {
  case (ACTIVATION_ABS) :
    return (abs(value, derivative));
    break;
 
  case (ACTIVATION_ARCTAN) :
    return (arctan(value, derivative));
    break;
 
  case (ACTIVATION_BENT) :
    return (bent(value, derivative));
    break;
 
  case (ACTIVATION_BINARY_HALF_STEP) :
    return (binaryHalfStep(value, derivative));
    break;
 
  case (ACTIVATION_BINARY_STEP) :
    return (binaryStep(value, derivative));
    break;
 
  case (ACTIVATION_BOUNDED_RELU) :
    return (boundedRelu(value, derivative));
    break;
 
  case (ACTIVATION_ELU) :
    return (elu(value, derivative));
    break;
 
  case (ACTIVATION_GAUSSIAN) :
    return (gaussian(value, derivative));
    break;
 
  case (ACTIVATION_IDENTITY) :
    return (identity(value, derivative));
    break;
 
  case (ACTIVATION_LINEAR) :
    return (linear(value, derivative));
    break;
 
  case (ACTIVATION_LOG) :
    return (log(value, derivative));
    break;
 
  case (ACTIVATION_PRELU) :
    return (prelu(value, derivative));
    break;
 
  case (ACTIVATION_RELU) :
    return (relu(value, derivative));
    break;
 
  case (ACTIVATION_SCALED_TANH) :
    return (scaledTanh(value, derivative));
    break;
 
  case (ACTIVATION_SIGMOID) :
    return (sigmoid(value, derivative));
    break;
 
  case (ACTIVATION_SINC) :
    return (sinc(value, derivative));
    break;
 
  case (ACTIVATION_SINUSOID) :
    return (sinusoid(value, derivative));
    break;
 
  case (ACTIVATION_SOFT_EXPONENTIAL) :
    return (softExponential(value, alpha, derivative));
    break;
 
  case (ACTIVATION_SOFT_PLUS) :
    return (softPlus(value, derivative));
    break;
 
  case (ACTIVATION_SOFT_RELU) :
    return (softRelu(value, derivative));
    break;
 
  case (ACTIVATION_SOFT_SIGN) :
    return (softSign(value, derivative));
    break;
 
  case (ACTIVATION_SOFT_STEP) :
    return (softRelu(value, derivative));
    break;
 
  case (ACTIVATION_SQRT) :
    return (sqrt(value, derivative));
    break;
 
  case (ACTIVATION_SQUARE) :
    return (square(value, derivative));
    break;
 
  case (ACTIVATION_SQUASH) :
    return (squash(value, derivative));
    break;
 
  case (ACTIVATION_STEP) :
    return (step(value, derivative));
    break;
 
  case (ACTIVATION_TANH) :
    return (tanh(value, derivative));
    break;
 
  default:
    return (sigmoid(value, derivative));
    break;
  }
}
 
 
// Returns a value between ?
//
// f(x) = abs(x)
// derivative f(x) = todo
double Activation::abs(const double& value, const bool derivative)
{
  if (derivative)
    return value < 0 ? -1 : 1; // todo.
  else
    return std::abs(value);
}
 
 
// Returns a value between -pi/2 and +pi/2.
//
// f(x) = tan^-1(x)
// derivative f(x) = 1/(x^2+1)
double Activation::arctan(const double& value, const bool derivative)
{
  if (derivative)
    // return (std::cos(value) * std::cos(value)); // todo is this the same
    return 1 / ((value * value) + 1);
  else
    return std::atan(value); // todo is this the same as tan^-1(x)?
}
 
 
// Bent Identity.
//
// Returns a value between -infinity to +infinity.
//
// f(x) = ((sqrt(x^2 + 1) - 1)/2) + x
// derivative f(x) = ((x / 2 * sqrt(x^2+1)) + 1
double Activation::bent(const double& value, const bool derivative)
{
  if (derivative)
    return (value / 2 * std::sqrt((value * value) + 1)) + 1;
  else
    return ((std::sqrt((value * value) + 1) - 1) / 2) + value;
}
 
 
// Binary Half Step.
// See Binary Step.
//
// Returns a value between 0.0 and +1.0.
//
// f(x) = 0 for x<0; .5 for x==0; 1 for x>0
// derivative f(x) = 0 for x != 0; ? for x == 0
double Activation::binaryHalfStep(const double& value, const bool derivative)
{
  if (derivative)
  {
    if (value < 0)
      return 0;
    else
    if (value == 0)
      return value;
    else
    if (value <= 0.5)
      return 0.5;  // todo confirm this is correct for the derative
    else // value > 0.5
      return 0;
  }
  else
  {
    if (value < 0)
      return 0;
    else
    if (value == 0)
      return 0.5;
    else // value > 0
      return 1;
  }
}
 
 
// Binary Step.
// Also known as Step.
// Also known as Heaviside step.
//
// Returns a value between 0.0 and +1.0.
//
// f(x) = 0 for x<0; 1 for x>=0
// derivative f(x) = 0 for x != 0; ? for x == 0
double Activation::binaryStep(const double& value, const bool derivative)
{
  if (derivative)
    return value != 0 ? 0 : value; // todo confirm to return value for !0.
  else
    return value >= 0 ? 1 : 0;
}
 
 
// Constrains the value between 0 and 1, and favors 0 and 1 as local minimums during training.
//
// Returns a value between -1.0 and +1.0.
//
// f(x) = min(a, max(0, x))
// f(x) = min(max(x + 0.5, 0), 1)
double Activation::boundedRelu(const double& value, const bool derivative)
{
  if (derivative)
    return 0; // TODO
  else
    return 0; // TODO
}
 
 
// Exponential Linear Unit.
//
// The results of models with relu are pretty impressive and it has become very quickly the standard.
//
// However, even if it is not possible for ReLUs to saturate, they can turn "dead" which means they are 
// never activated because the pre-activation value is always negative.  
// For such units, no gradient can flow through the net. 
//
// Since the output of relu is always non-negative, their mean activation is always positive.
// A positive mean introduces a bias for the next layer which can slow down the learning.
//
// A solution is to use the elu, which acts like relu if value is positive, but for negative values it is a 
// function bounded by a fixed value "-1", for alpha=1.  This behavior helps to push the mean activation of 
// neurons closer to zero which is beneficial for learning and it helps to learn representations that are more 
// robust to noise.
//
// See http://www.picalike.com/blog/2015/11/28/relu-was-yesterday-tomorrow-comes-elu/.
//
// Returns a value between -alpha and +infinity.
//
// f(x) = x * (x > 0) + (x < 0) * (alpha * (T.exp(x) - 1)) //todo check if this is same formula as next line.
// f(a,x) = alpha*((e^x) - 1) for x<0; x for x>=0;
// derivative f(a,x) = f(x) + alpha for x<0; 1 for x>=0;
double Activation::elu(const double& value, const double& alpha, const bool derivative)
{
  if (derivative)
  {
    //double output = elu(value, alpha, false);
    //return output > 0 ? 1.0 : output + 1;
    return value >= 0 ? 1.0 : (alpha * (std::exp(value) - 1)) + 1;
  }
  else
    return value >= 0 ? value : alpha * (std::exp(value) - 1);
}
 
 
// Returns a value between 0.0 and +1.0.
//
// f(x) = exp(-x*-x)
// derivative f(x) = -2x(exp(-x*-x))
double Activation::gaussian(const double& value, const bool derivative)
{
  if (derivative)
    return -2 * value * std::exp(-value * -value);
  else
    return std::exp(-value * -value);
}
 
 
// Identity function.
//
// Returns a value between -infinity and +infinity.
//
// f(x) = x
// derivative f(x) = 1
double Activation::identity(const double& value, const bool derivative)
{
  if (derivative)
    return 1;
  else
    return value;
}
 
 
 
// Identity function.
//
// Returns a value between -infinity and +infinity.
//
// f(x) = x
// derivative f(x) = 1
double Activation::linear(const double& value, const bool derivative)
{
  if (derivative)
    return 1; 
  else
    return value; 
}
 
 
// Returns a value between .
//
// f(x) = 1 / (1 + e^-x)
double Activation::log(const double& value, const bool derivative)
{
  if (derivative)
    return 0; // TODO
  else
    return 1.0 / (1.0 + std::exp(-value));
 
  /*
  if (value < -45.0)
    return 0.0;
  else
  if (value > 45.0)
    return 1.0;
  else
    return 1.0 / (1.0 + std::exp(-value));
  */
}
 
 
// Parameteric Rectified Linear Unit.
//
// Returns a value between -infinity and +infinity.
//
// f(a,x) = ax for x<0; x for x>=0;
// derivative f(a,x) = a for x<0; 1 for x>=0 
double Activation::prelu(const double& value, const double& alpha, const bool derivative)
{
  if (derivative)
    return value >= 0 ? 1.0 : alpha;
  else
    return value >= 0 ? value : alpha * value;
}
 
 
// Rectified linear unit.
//
// Fast and non-saturating: max(x, 0). 
//
// The results of models with relu are pretty impressive and it has become very quickly the standard.
//
// However, even if it is not possible for ReLUs to saturate, they can turn "dead" which means they are 
// never activated because the pre-activation value is always negative.  
// For such units, no gradient can flow through the net. 
//
// Since the output of relu is always non-negative, their mean activation is always positive.
// A positive mean introduces a bias for the next layer which can slow down the learning.
//
// A solution is to use the elu, which acts like relu if value is positive, but for negative values it is a 
// function bounded by a fixed value "-1", for alpha=1.  This behavior helps to push the mean activation of 
// neurons closer to zero which is beneficial for learning and it helps to learn representations that are more 
// robust to noise.
//
// See http://www.picalike.com/blog/2015/11/28/relu-was-yesterday-tomorrow-comes-elu/.
//
// Returns a value between 0 and +infinity.
//
// f(x) = max(0, x)
// f(x) = 0 for x<0; x for x>=0
// derivative f(x) = 0 for x<0; 1 for x>=0 
double Activation::relu(const double& value, const bool derivative)
{
  if (derivative)
    return value >= 0 ? 1.0 : 0.0;
  else
    return value >= 0 ? value : 0.0;
}
 
 
 
// Returns a value between -1.0 and +1.0.
//
// f(x) = 1.7159 * tanh(0.66667 * x)
double Activation::scaledTanh(const double& value, const bool derivative)
{
  if (derivative) // TODO...
  {
    double tanh_value = std::tanh(value);
    return 0.66667f * (1.7159f - 1 / 1.7159f * tanh_value * tanh_value);
  }
  else
    return 1.7159 * std::tanh(0.66667 * value);
}
 
 
// Returns a value between 0.0 and 1.0.
double Activation::sigmoid(const double& value, const bool derivative)
{
  if (derivative)
    return sigmoid(value) * (1.0 - sigmoid(value));
  else
    return 1.0 / double((1.0 + exp(-value)));
}
 
 
/*
// Returns a value between 0.0 and 1.0.
double Activation::sigmoid(const double& value)
{
	return 1.0 / double((1.0 + exp(-value)));
}
 
 
double Activation::sigmoid_derivative(const double& value)
{
	return sigmoid(value) * (1.0 - sigmoid(value));
}
*/
 
 
double Activation::sigmoid_limit(double value, double positive_limit, double negative_limit)
{
	if (value < negative_limit)
		return 0.0;
	else
	if (value > positive_limit)
		return 1.0;
	else
		return 1.0 / (1.0 + std::exp(-value));
}
 
 
// Returns a value between ~-.217234 and 1.0.
//
// f(x) = 1 for x == 0; sin(x)/x for x != 0.
// derivative f(x) = 0 for x=0; (cos(x)/x) - (sin(x)/(x^2)) for x!= 0
double Activation::sinc(const double& value, const bool derivative)
{
  if (derivative)
    return value == 0 ? 0 : (std::cos(value) / value) - (std::sin(value) / (value * value)); // todo check if last part should just be value and not value*value.
  else
    return value == 0 ? 1 : std::sin(value)/value;
}
 
 
// Sinusoid.
//
// Returns a value between -1.0 and 1.0.
//
// f(x) = sin(x)
// derivative f(x) = cos(x)
double Activation::sinusoid(const double& value, const bool derivative)
{
  if (derivative)
    return std::cos(value);
  else
    return std::sin(value);
}
 
 
// Returns a value between -infinity and +infinity.
//
// f(a,x) = - (loge(1 - alpha * (x+alpha))) / alpha for alpha < 0
// f(a,x) = x for alpha == 0
// f(a,x) = ((exp(alpha*x) - 1) / alpha) + alpha for alpha > 0
// derivative f(x) = 1 / (1-alpha(alpha + x)) for alpha <0
// derivative f(x) = exp(alpha * x) for alpha >=0
double Activation::softExponential(const double& value, const double& alpha, const bool derivative)
{
  if (derivative)
    //return alpha >= 0 ? std::exp(alpha * value) : 1 / (1 + std::exp(-value));
    return alpha >= 0 ? std::exp(alpha * value) : 1 / (1 - alpha*(alpha + value));
  else
  {
    if (alpha < 0)
      return -((std::log(1 - alpha * (value + alpha)))) / alpha;  //todo check if std::log is to be used here for loge
    else
    if (alpha == 0)
      return value;
    else // alpha > 0.
      return ((std::exp(alpha*value) - 1) / alpha) + alpha;
  }
}
 
 
// Returns a value between 0 and +infinity.
//
// f(x) = log(1 + exp(x))
// derivative f(x) = 1 / (1 + exp(-x))
double Activation::softPlus(const double& value, const bool derivative)
{
  if (derivative)
    return 1 / (1 + std::exp(-value));
  else
    return std::log(1 + std::exp(value));
}
 
 
 
// Returns a value between .
//
// f(x) = log(1 + e^x)
double Activation::softRelu(const double& value, const bool derivative)
{
  if (derivative)
    return 0; // TODO
  else
    return 0; // TODO
}
 
 
// Returns a value between -1.0 and 1.0.
//
// f(x) = x/(1 + abs(x))
// derivative f(x) = 1/((1 + abs(x))*(1 + abs(x)))
double Activation::softSign(const double& value, const bool derivative)
{
  if (derivative)
    return 1 / ((1 + std::abs(value))*(1 + std::abs(value)));
  else
    return value / (1 + std::abs(value));
}
 
 
// Soft Step aka Logistic.
//
// Returns a value between 0.0 and +1.0.
//
// f(x) = 1 / (1 + e^-x)
// derivative f(x) = (1 / (1 + e^-x)) * (1 - (1 / (1 + e^-x)))
double Activation::softStep(const double& value, const bool derivative)
{
  if (derivative)
    return (1 / (1 + std::exp(-value))) * (1 - (1 / (1 + std::exp(-value))));
  else
    return 1 / (1 + std::exp(-value));
}
 
 
// Returns a value between -1.0 and +1.0.
//
// f(x) = sqrt(x)
double Activation::sqrt(const double& value, const bool derivative)
{
  if (derivative)
    return 0; // TODO
  else
    return std::sqrt(value); // TODO
}
 
 
// Returns a value between -1.0 and +1.0.
//
// f(x) = x^2
double Activation::square(const double& value, const bool derivative)
{
  if (derivative)
    return 0; // TODO
  else
    return value * value; // TODO
}
 
 
// Returns a value between -1.0 and +1.0.
//
// f(x) = 
double Activation::squash(const double& value, const bool derivative)
{
  if (derivative)
  {
    if (value > 0)
      return (value) / (1 + value);
    else
      return (value) / (1 - value);
  }
  else
    return (value) / (1 + std::abs(value));
}
 
 
// Binary Step.
//
// Returns a value between 0.0 and +1.0.
//
// f(x) = 0 for x<0; 1 for x>=0
//
// derivative f(x) = 0 for x != 0; ? for x == 0
double Activation::step(const double& value, const bool derivative)
{
  if (derivative)
    return value != 0 ? 0 : value; // todo confirm to return value for !0.
  else
    return value >= 0 ? 1 : 0;
}
 
 
 
// Returns a value between -1.0 and +1.0.
//
// f(x) = a*tanh(b*x)
//
// f(x) = tanh(x) = (2/(1+exp(-2*value))) - 1
// derivative f(x) = 1 - f(x) * f(x)
double Activation::tanh(const double& value, const bool derivative)
{
  if (derivative)
  {
    double tanh_value = std::tanh(value);
    return (1.0 - tanh_value * tanh_value);
    //return (1.0 - std::tanh(value)) * (1.0 + std::tanh(value));
  }
  else
    return std::tanh(value);
}
 
 
// Returns a value between -1.0 and +1.0.
double Activation::tanh_limit(double& value, double positive_limit, double negative_limit)
{
	if (value < negative_limit)
		return -1.0;
	else
	if (value > positive_limit)
		return 1.0;
	else
		return tanh(value);
 
 
  /*
  if (value < -45.0)
  return -1.0;
  else
  if (value > 45.0)
  return 1.0;
  else
  return std::tanh(value);
  */
 
}
 
 
Activation_Types Activation::getActivationType()
{
  return activation_type;
}
 
 
void Activation::setActivationType(Activation_Types _activation_type)
{
  activation_type = _activation_type;
}
 
 
 
 
 
 
 
/*
public double SoftMax(double x, string layer)
{
  // Determine max
  double max = double.MinValue;
  if (layer == "ih")
    max = (ihSum0 > ihSum1) ? ihSum0 : ihSum1;
  else 
  if (layer == "ho")
    max = (hoSum0 > hoSum1) ? hoSum0 : hoSum1;
 
  // Compute scale
  double scale = 0.0;
  if (layer == "ih")
    scale = Math.Exp(ihSum0 - max) + Math.Exp(ihSum1 - max);
  else 
  if (layer == "ho")
    scale = Math.Exp(hoSum0 - max ) + Math.Exp(hoSum1 - max);
 
  return Math.Exp(x - max) / scale;
}
 
*/