ref: e1a0b0dff9876de990e7c7b86e8b6ec97b707017
parent: 8a81df27372881fa2f41e196c72a6c58aafd9e2a
author: glenda <glenda@kingship>
date: Sun Nov 23 10:03:33 EST 2025
refactoring
--- a/ann/ann.c
+++ /dev/null
@@ -1,521 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-#define RAND_MAX 0xFFFF
-#define fmax(a,b) (a > b? a: b)
-
-#include "ann.h"
-
-#define ACTIVATION activation_leaky_relu
-#define GRADIENT gradient_leaky_relu
-
-double
-activation_sigmoid(Neuron *in)
-{- return 1.0/(1.0+exp(-in->sum));
-}
-
-double
-gradient_sigmoid(Neuron *in)
-{- double y = in->value;
- return y * (1.0 - y);
-}
-
-double
-activation_tanh(Neuron *in)
-{- return tanh(in->sum);
-}
-
-double
-gradient_tanh(Neuron *in)
-{- return 1.0 - in->value*in->value;
-}
-
-double
-activation_leaky_relu(Neuron *in)
-{- if (in->sum > 0)
- return in->sum;
- return in->sum * 0.01;
-}
-
-double
-gradient_leaky_relu(Neuron *in)
-{- if (in->sum > 0)
- return 1.0;
- return 0.01;
-}
-
-double
-activation_piece(Neuron *in)
-{- if (in->sum < -0.5)
- return 0.0;
- else if (in->sum > 0.5)
- return 1.0;
- return (in->sum + 0.5);
-}
-
-double
-gradient_piece(Neuron *in)
-{- if (in->sum > -0.5 && in->sum < 0.5)
- return 1.0;
- return 0.01;
-}
-
-Weights*
-weightsinitdoubles(Weights *in, double *init)
-{- int i, o;
-
- for (i = 0; i <= in->inputs; i++)
- for (o = 0; o < in->outputs; o++)
- in->values[i][o] = init[o];
-
- return in;
-}
-
-Weights*
-weightsinitdouble(Weights *in, double init)
-{- int i, o;
-
- for (i = 0; i <= in->inputs; i++)
- for (o = 0; o < in->outputs; o++)
- in->values[i][o] = init;
-
- return in;
-}
-
-Weights*
-weightsinitrandscale(Weights *in, double scale)
-{- int i, o;
-
- srand(time(0));
- for (i = 0; i <= in->inputs; i++)
- for (o = 0; o < in->outputs; o++)
- in->values[i][o] = (((double)rand()/RAND_MAX) - 0.5) * scale;
-
- return in;
-}
-
-Weights*
-weightsinitrand(Weights *in)
-{- weightsinitrandscale(in, 2.0);
- return in;
-}
-
-Neuron*
-neuroninit(Neuron *in, double (*activation)(Neuron*), double (*gradient)(Neuron*), double steepness)
-{- in->activation = activation;
- in->gradient = gradient;
- in->steepness = steepness;
- in->value = 1.0;
- in->sum = 0;
- return in;
-}
-
-Neuron*
-neuroncreate(double (*activation)(Neuron*), double (*gradient)(Neuron*), double steepness)
-{- Neuron *ret = calloc(1, sizeof(Neuron));
- neuroninit(ret, activation, gradient, steepness);
- return ret;
-}
-
-Layer*
-layercreate(int num_neurons, double(*activation)(Neuron*), double(*gradient)(Neuron*))
-{- Layer *ret = calloc(1, sizeof(Layer));
- int i;
-
- ret->n = num_neurons;
- ret->neurons = calloc(num_neurons+1, sizeof(Neuron*));
- for (i = 0; i <= ret->n; i++) {- ret->neurons[i] = neuroncreate(activation, gradient, 1.0);
- }
- return ret;
-}
-
-Weights*
-weightscreate(int inputs, int outputs, int initialize)
-{- int i;
- Weights *ret = calloc(1, sizeof(Weights));
- ret->inputs = inputs;
- ret->outputs = outputs;
- ret->values = calloc(inputs+1, sizeof(double*));
- for (i = 0; i <= inputs; i++)
- ret->values[i] = calloc(outputs, sizeof(double));
- if (initialize)
- weightsinitrand(ret);
- return ret;
-}
-
-Ann*
-anncreate(int num_layers, ...)
-{- Ann *ret = calloc(1, sizeof(Ann));
- va_list args;
- int arg;
- int i;
-
- va_start(args, num_layers);
- ret->n = num_layers;
- ret->rate = 0.7;
- ret->layers = calloc(num_layers, sizeof(Layer*));
- ret->weights = calloc(num_layers-1, sizeof(Weights*));
- ret->deltas = calloc(num_layers-1, sizeof(Weights*));
-
- for (i = 0; i < num_layers; i++) {- arg = va_arg(args, int);
- if (arg < 0 || arg > 1000000)
- arg = 0;
- ret->layers[i] = layercreate(arg, ACTIVATION, GRADIENT);
- if (i > 0) {- ret->weights[i-1] = weightscreate(ret->layers[i-1]->n, ret->layers[i]->n, 1);
- ret->deltas[i-1] = weightscreate(ret->layers[i-1]->n, ret->layers[i]->n, 0);
- }
- }
-
- va_end(args);
-
- return ret;
-}
-
-Ann*
-anncreatev(int num_layers, int *layers)
-{- Ann *ret = calloc(1, sizeof(Ann));
- int arg;
- int i;
-
- ret->n = num_layers;
- ret->rate = 0.7;
- ret->layers = calloc(num_layers, sizeof(Layer*));
- ret->weights = calloc(num_layers-1, sizeof(Weights*));
- ret->deltas = calloc(num_layers-1, sizeof(Weights*));
-
- for (i = 0; i < num_layers; i++) {- arg = layers[i];
- if (arg < 0 || arg > 1000000)
- arg = 0;
- if (i < (num_layers-1))
- ret->layers[i] = layercreate(arg, ACTIVATION, GRADIENT);
- else
- ret->layers[i] = layercreate(arg, activation_sigmoid, gradient_sigmoid);
- if (i > 0) {- ret->weights[i-1] = weightscreate(ret->layers[i-1]->n, ret->layers[i]->n, 1);
- ret->deltas[i-1] = weightscreate(ret->layers[i-1]->n, ret->layers[i]->n, 0);
- }
- }
-
- return ret;
-}
-
-double*
-annrun(Ann *ann, double *input)
-{- int l, i, o;
- int outputs = ann->layers[ann->n - 1]->n;
- double *ret = calloc(outputs, sizeof(double));
- Neuron *O;
- double sum;
-
- for (i = 0; i < ann->layers[0]->n; i++)
- ann->layers[0]->neurons[i]->value = input[i];
-
- for (l = 1; l < ann->n; l++) {- for (o = 0; o < ann->layers[l]->n; o++) {- O = ann->layers[l]->neurons[o];
- O->sum = ann->weights[l-1]->values[ann->weights[l-1]->inputs][o]; // bias
- sum = O->sum;
- #pragma omp parallel for reduction (+:sum)
- for (i = 0; i < ann->layers[l-1]->n; i++)
- sum += ann->layers[l-1]->neurons[i]->value * ann->weights[l-1]->values[i][o];
- if (sum < -300.0)
- sum = -300.0;
- else if (sum > 300.0)
- sum = 300.0;
- O->sum = sum;
- O->value = O->activation(O);
- }
- }
-
- for (o = 0; o < outputs; o++)
- ret[o] = ann->layers[ann->n - 1]->neurons[o]->value;
-
- return ret;
-}
-
-double
-torque(double input)
-{- if (input < -.9999999)
- return -17.0;
-
- if (input > .9999999)
- return 17.0;
-
- return log((1.0 + input) / (1.0 - input));
-}
-
-double
-anntrain(Ann *ann, double *inputs, double *outputs)
-{- double *error = annrun(ann, inputs);
- double ret = 0.0;
- int noutputs = ann->layers[ann->n-1]->n;
- double acc, sum;
- int o, i, w, n;
- Neuron *O, *I;
- Weights *W, *D, *D2;
-
- for (o = 0; o < noutputs; o++) {- // error = outputs[o] - result
- error[o] -= outputs[o];
- error[o] = -error[o];
- ret += pow(error[o], 2.0) * 0.5;
- error[o] = torque(error[o]);
- }
- D = ann->deltas[ann->n-2];
- weightsinitdoubles(D, error);
- for (i = 0; i < (ann->n-2); i++) {- D = ann->deltas[i];
- weightsinitdouble(D, 1.0);
- }
-
- // backpropagate MSE
- D2 = ann->deltas[ann->n-2];
- for (w = ann->n-2; w >= 0; w--) {- D = ann->deltas[w];
-
- for (o = 0; o < ann->layers[w+1]->n; o++) {- O = ann->layers[w+1]->neurons[o];
- acc = O->gradient(O) * O->steepness;
- sum = 1.0;
- if (D2 != D) {- W = ann->weights[w + 1];
- sum = 0.0;
- #pragma omp parallel for reduction (+:sum)
- for (n = 0; n < D2->outputs; n++)
- sum += D2->values[o][n] * W->values[o][n];
- }
- for (i = 0; i <= ann->layers[w]->n; i++) {- D->values[i][o] *= acc * sum;
- }
- }
-
- D2 = D;
- }
-
- // update weights
- for (w = 0; w < ann->n-1; w++) {- W = ann->weights[w];
- D = ann->deltas[w];
-
- for (i = 0; i <= W->inputs; i++) {- I = ann->layers[w]->neurons[i];
- for (o = 0; o < W->outputs; o++) {- W->values[i][o] += D->values[i][o] * ann->rate * I->value;
- }
- }
- }
-
- free(error);
- return ret;
-}
-
-Ann*
-adaminit(Ann *ann)
-{- int i;
- Adam *I = calloc(1, sizeof(Adam));
-
- I->rate = 0.001;
- I->beta1 = 0.9;
- I->beta2 = 0.999;
- I->epsilon = 10e-8;
- I->timestep = 0;
- I->first = calloc(ann->n-1, sizeof(Weights*));
- I->second = calloc(ann->n-1, sizeof(Weights*));
-
- for (i = 0; i < (ann->n-1); i++) {- I->first[i] = weightscreate(ann->layers[i]->n, ann->layers[i+1]->n, 0);
- I->second[i] = weightscreate(ann->layers[i]->n, ann->layers[i+1]->n, 0);
- }
-
- ann->internal = I;
-
- return ann;
-}
-
-double
-anntrain_adam(Ann *ann, double *inputs, double *outputs)
-{- double *error = annrun(ann, inputs);
- double ret = 0.0;
- int noutputs = ann->layers[ann->n-1]->n;
- double acc, sum, m, v;
- int o, i, w, n;
- Neuron *O, *I;
- Weights *W, *D, *D2, *M, *V;
- Adam *annI;
-
- if (ann->internal == 0)
- adaminit(ann);
- annI = ann->internal;
- annI->timestep++;
-
- for (o = 0; o < noutputs; o++) {- // error = outputs[o] - result
- error[o] -= outputs[o];
- error[o] = -error[o];
- ret += pow(error[o], 2.0) * 0.5;
- error[o] = torque(error[o]);
- }
- D = ann->deltas[ann->n-2];
- weightsinitdoubles(D, error);
- for (i = 0; i < (ann->n-2); i++) {- D = ann->deltas[i];
- weightsinitdouble(D, 1.0);
- }
-
- // backpropagate MSE
- D2 = ann->deltas[ann->n-2];
- for (w = ann->n-2; w >= 0; w--) {- D = ann->deltas[w];
- M = annI->first[w];
- V = annI->second[w];
-
- for (o = 0; o < ann->layers[w+1]->n; o++) {- O = ann->layers[w+1]->neurons[o];
- acc = O->gradient(O) * O->steepness;
- sum = 1.0;
- if (D2 != D) {- W = ann->weights[w+1];
- sum = 0.0;
- #pragma omp parallel for reduction (+:sum)
- for (n = 0; n < D2->outputs; n++)
- sum += D2->values[o][n] * W->values[o][n];
- }
- for (i = 0; i <= ann->layers[w]->n; i++) {- I = ann->layers[w]->neurons[i];
- D->values[i][o] *= acc * sum;
- M->values[i][o] *= annI->beta1;
- M->values[i][o] += (1.0 - annI->beta1) * D->values[i][o] * I->value;
- V->values[i][o] *= annI->beta2;
- V->values[i][o] += (1.0 - annI->beta2) * D->values[i][o] * D->values[i][o] * I->value * I->value;
- }
- }
-
- D2 = D;
- }
-
- // update weights
- for (w = 0; w < ann->n-1; w++) {- W = ann->weights[w];
- M = annI->first[w];
- V = annI->second[w];
-
- for (i = 0; i <= W->inputs; i++) {- for (o = 0; o < W->outputs; o++) {- m = M->values[i][o] / (annI->timestep < 100? (1.0 - pow(annI->beta1, annI->timestep)): 1.0);
- v = V->values[i][o] / (annI->timestep < 10000? (1.0 - pow(annI->beta2, annI->timestep)): 1.0);
- W->values[i][o] += (m / (sqrt(v) + annI->epsilon)) * annI->rate;
- }
- }
- }
-
- free(error);
- return ret;
-}
-
-double
-anntrain_adamax(Ann *ann, double *inputs, double *outputs)
-{- double *error = annrun(ann, inputs);
- double ret = 0.0;
- int noutputs = ann->layers[ann->n-1]->n;
- double acc, sum, m, v;
- int o, i, w, n;
- Neuron *O, *I;
- Weights *W, *D, *D2, *M, *V;
- Adam *annI;
-
- if (ann->internal == 0)
- adaminit(ann);
- annI = ann->internal;
- annI->rate = 0.002;
- annI->timestep++;
-
- for (o = 0; o < noutputs; o++) {- // error = outputs[o] - result
- error[o] -= outputs[o];
- error[o] = -error[o];
- ret += pow(error[o], 2.0) * 0.5;
- error[o] = torque(error[o]);
- }
- D = ann->deltas[ann->n-2];
- weightsinitdoubles(D, error);
- for (i = 0; i < (ann->n-2); i++) {- D = ann->deltas[i];
- weightsinitdouble(D, 1.0);
- }
-
- // backpropagate MSE
- D2 = ann->deltas[ann->n-2];
- for (w = ann->n-2; w >= 0; w--) {- D = ann->deltas[w];
- M = annI->first[w];
- V = annI->second[w];
-
- for (o = 0; o < ann->layers[w+1]->n; o++) {- O = ann->layers[w+1]->neurons[o];
- acc = O->gradient(O) * O->steepness;
- sum = 1.0;
- if (D2 != D) {- W = ann->weights[w+1];
- sum = 0.0;
- #pragma omp parallel for reduction (+:sum)
- for (n = 0; n < D2->outputs; n++)
- sum += D2->values[o][n] * W->values[o][n];
- }
- for (i = 0; i <= ann->layers[w]->n; i++) {- I = ann->layers[w]->neurons[i];
- D->values[i][o] *= acc * sum;
- M->values[i][o] *= annI->beta1;
- M->values[i][o] += (1.0 - annI->beta1) * D->values[i][o] * I->value;
- V->values[i][o] = fmax(V->values[i][o] * annI->beta2, fabs(D->values[i][o] * I->value));
- }
- }
-
- D2 = D;
- }
-
- // update weights
- for (w = 0; w < ann->n-1; w++) {- W = ann->weights[w];
- M = annI->first[w];
- V = annI->second[w];
-
- for (i = 0; i <= W->inputs; i++) {- for (o = 0; o < W->outputs; o++) {- m = M->values[i][o];
- v = V->values[i][o];
- W->values[i][o] += (annI->rate/(1.0 - (annI->timestep < 100? pow(annI->beta1, annI->timestep): 0.0))) * (m/v);
- }
- }
- }
-
- free(error);
- return ret;
-}
--- /dev/null
+++ b/ann/anncreate.c
@@ -1,0 +1,61 @@
+#include <u.h>
+#include <libc.h>
+
+#include "ann.h"
+
+Ann*
+anncreatev(int num_layers, int *layers)
+{+ Ann *ret = calloc(1, sizeof(Ann));
+ int arg;
+ int i;
+
+ ret->n = num_layers;
+ ret->rate = 0.7;
+ ret->layers = calloc(num_layers, sizeof(Layer*));
+ ret->weights = calloc(num_layers-1, sizeof(Weights*));
+ ret->deltas = calloc(num_layers-1, sizeof(Weights*));
+
+ for (i = 0; i < num_layers; i++) {+ arg = layers[i];
+ if (arg < 0 || arg > 1000000)
+ arg = 0;
+ if (i < (num_layers-1))
+ ret->layers[i] = layercreate(arg, activation_leaky_relu, gradient_leaky_relu);
+ else
+ ret->layers[i] = layercreate(arg, activation_sigmoid, gradient_sigmoid);
+ if (i > 0) {+ ret->weights[i-1] = weightscreate(ret->layers[i-1]->n, ret->layers[i]->n, 1);
+ ret->deltas[i-1] = weightscreate(ret->layers[i-1]->n, ret->layers[i]->n, 0);
+ }
+ }
+
+ return ret;
+}
+
+Ann*
+anncreate(int num_layers, ...)
+{+ Ann *ret;
+ va_list args;
+ int arg;
+ int i;
+ int *layers;
+
+ va_start(args, num_layers);
+
+ layers = calloc(sizeof(int), num_layers);
+
+ for (i = 0; i < num_layers; i++) {+ arg = va_arg(args, int);
+ layers[i] = arg;
+ }
+
+ va_end(args);
+
+ ret = anncreatev(num_layers, layers);
+
+ free(layers);
+
+ return ret;
+}
--- /dev/null
+++ b/ann/annfns.c
@@ -1,0 +1,63 @@
+#include <u.h>
+#include <libc.h>
+
+#include "ann.h"
+
+double
+activation_sigmoid(Neuron *in)
+{+ return 1.0/(1.0+exp(-in->sum));
+}
+
+double
+gradient_sigmoid(Neuron *in)
+{+ double y = in->value;
+ return y * (1.0 - y);
+}
+
+double
+activation_tanh(Neuron *in)
+{+ return tanh(in->sum);
+}
+
+double
+gradient_tanh(Neuron *in)
+{+ return 1.0 - in->value*in->value;
+}
+
+double
+activation_leaky_relu(Neuron *in)
+{+ if (in->sum > 0)
+ return in->sum;
+ return in->sum * 0.01;
+}
+
+double
+gradient_leaky_relu(Neuron *in)
+{+ if (in->sum > 0)
+ return 1.0;
+ return 0.01;
+}
+
+double
+activation_piece(Neuron *in)
+{+ if (in->sum < -0.5)
+ return 0.0;
+ else if (in->sum > 0.5)
+ return 1.0;
+ return (in->sum + 0.5);
+}
+
+double
+gradient_piece(Neuron *in)
+{+ if (in->sum > -0.5 && in->sum < 0.5)
+ return 1.0;
+ return 0.01;
+}
--- /dev/null
+++ b/ann/annlayers.c
@@ -1,0 +1,18 @@
+#include <u.h>
+#include <libc.h>
+
+#include "ann.h"
+
+Layer*
+layercreate(int num_neurons, double(*activation)(Neuron*), double(*gradient)(Neuron*))
+{+ Layer *ret = calloc(1, sizeof(Layer));
+ int i;
+
+ ret->n = num_neurons;
+ ret->neurons = calloc(num_neurons+1, sizeof(Neuron*));
+ for (i = 0; i <= ret->n; i++) {+ ret->neurons[i] = neuroncreate(activation, gradient, 1.0);
+ }
+ return ret;
+}
--- /dev/null
+++ b/ann/annneurons.c
@@ -1,0 +1,23 @@
+#include <u.h>
+#include <libc.h>
+
+#include "ann.h"
+
+Neuron*
+neuroninit(Neuron *in, double (*activation)(Neuron*), double (*gradient)(Neuron*), double steepness)
+{+ in->activation = activation;
+ in->gradient = gradient;
+ in->steepness = steepness;
+ in->value = 1.0;
+ in->sum = 0;
+ return in;
+}
+
+Neuron*
+neuroncreate(double (*activation)(Neuron*), double (*gradient)(Neuron*), double steepness)
+{+ Neuron *ret = calloc(1, sizeof(Neuron));
+ neuroninit(ret, activation, gradient, steepness);
+ return ret;
+}
--- /dev/null
+++ b/ann/annrun.c
@@ -1,0 +1,38 @@
+#include <u.h>
+#include <libc.h>
+
+#include "ann.h"
+
+double*
+annrun(Ann *ann, double *input)
+{+ int l, i, o;
+ int outputs = ann->layers[ann->n - 1]->n;
+ double *ret = calloc(outputs, sizeof(double));
+ Neuron *O;
+ double sum;
+
+ for (i = 0; i < ann->layers[0]->n; i++)
+ ann->layers[0]->neurons[i]->value = input[i];
+
+ for (l = 1; l < ann->n; l++) {+ for (o = 0; o < ann->layers[l]->n; o++) {+ O = ann->layers[l]->neurons[o];
+ O->sum = ann->weights[l-1]->values[ann->weights[l-1]->inputs][o]; // bias
+ sum = O->sum;
+ for (i = 0; i < ann->layers[l-1]->n; i++)
+ sum += ann->layers[l-1]->neurons[i]->value * ann->weights[l-1]->values[i][o];
+ if (sum < -300.0)
+ sum = -300.0;
+ else if (sum > 300.0)
+ sum = 300.0;
+ O->sum = sum;
+ O->value = O->activation(O);
+ }
+ }
+
+ for (o = 0; o < outputs; o++)
+ ret[o] = ann->layers[ann->n - 1]->neurons[o]->value;
+
+ return ret;
+}
--- /dev/null
+++ b/ann/anntrain.c
@@ -1,0 +1,81 @@
+#include <u.h>
+#include <libc.h>
+
+#include "ann.h"
+
+static double
+torque(double input)
+{+ if (input < -.9999999)
+ return -17.0;
+
+ if (input > .9999999)
+ return 17.0;
+
+ return log((1.0 + input) / (1.0 - input));
+}
+
+double
+anntrain(Ann *ann, double *inputs, double *outputs)
+{+ double *error = annrun(ann, inputs);
+ double ret = 0.0;
+ int noutputs = ann->layers[ann->n-1]->n;
+ double acc, sum;
+ int o, i, w, n;
+ Neuron *O, *I;
+ Weights *W, *D, *D2;
+
+ for (o = 0; o < noutputs; o++) {+ // error = outputs[o] - result
+ error[o] -= outputs[o];
+ error[o] = -error[o];
+ ret += pow(error[o], 2.0) * 0.5;
+ error[o] = torque(error[o]);
+ }
+ D = ann->deltas[ann->n-2];
+ weightsinitdoubles(D, error);
+ for (i = 0; i < (ann->n-2); i++) {+ D = ann->deltas[i];
+ weightsinitdouble(D, 1.0);
+ }
+
+ // backpropagate MSE
+ D2 = ann->deltas[ann->n-2];
+ for (w = ann->n-2; w >= 0; w--) {+ D = ann->deltas[w];
+
+ for (o = 0; o < ann->layers[w+1]->n; o++) {+ O = ann->layers[w+1]->neurons[o];
+ acc = O->gradient(O) * O->steepness;
+ sum = 1.0;
+ if (D2 != D) {+ W = ann->weights[w + 1];
+ sum = 0.0;
+ for (n = 0; n < D2->outputs; n++)
+ sum += D2->values[o][n] * W->values[o][n];
+ }
+ for (i = 0; i <= ann->layers[w]->n; i++) {+ D->values[i][o] *= acc * sum;
+ }
+ }
+
+ D2 = D;
+ }
+
+ // update weights
+ for (w = 0; w < ann->n-1; w++) {+ W = ann->weights[w];
+ D = ann->deltas[w];
+
+ for (i = 0; i <= W->inputs; i++) {+ I = ann->layers[w]->neurons[i];
+ for (o = 0; o < W->outputs; o++) {+ W->values[i][o] += D->values[i][o] * ann->rate * I->value;
+ }
+ }
+ }
+
+ free(error);
+ return ret;
+}
--- /dev/null
+++ b/ann/anntrainadam.c
@@ -1,0 +1,202 @@
+#include <u.h>
+#include <libc.h>
+
+#define fmax(a,b) (a > b? a: b)
+
+#include "ann.h"
+
+static double
+torque(double input)
+{+ if (input < -.9999999)
+ return -17.0;
+
+ if (input > .9999999)
+ return 17.0;
+
+ return log((1.0 + input) / (1.0 - input));
+}
+
+Ann*
+adaminit(Ann *ann)
+{+ int i;
+ Adam *I = calloc(1, sizeof(Adam));
+
+ I->rate = 0.001;
+ I->beta1 = 0.9;
+ I->beta2 = 0.999;
+ I->epsilon = 10e-8;
+ I->timestep = 0;
+ I->first = calloc(ann->n-1, sizeof(Weights*));
+ I->second = calloc(ann->n-1, sizeof(Weights*));
+
+ for (i = 0; i < (ann->n-1); i++) {+ I->first[i] = weightscreate(ann->layers[i]->n, ann->layers[i+1]->n, 0);
+ I->second[i] = weightscreate(ann->layers[i]->n, ann->layers[i+1]->n, 0);
+ }
+
+ ann->internal = I;
+
+ return ann;
+}
+
+double
+anntrain_adam(Ann *ann, double *inputs, double *outputs)
+{+ double *error = annrun(ann, inputs);
+ double ret = 0.0;
+ int noutputs = ann->layers[ann->n-1]->n;
+ double acc, sum, m, v;
+ int o, i, w, n;
+ Neuron *O, *I;
+ Weights *W, *D, *D2, *M, *V;
+ Adam *annI;
+
+ if (ann->internal == 0)
+ adaminit(ann);
+ annI = ann->internal;
+ annI->timestep++;
+
+ for (o = 0; o < noutputs; o++) {+ // error = outputs[o] - result
+ error[o] -= outputs[o];
+ error[o] = -error[o];
+ ret += pow(error[o], 2.0) * 0.5;
+ error[o] = torque(error[o]);
+ }
+ D = ann->deltas[ann->n-2];
+ weightsinitdoubles(D, error);
+ for (i = 0; i < (ann->n-2); i++) {+ D = ann->deltas[i];
+ weightsinitdouble(D, 1.0);
+ }
+
+ // backpropagate MSE
+ D2 = ann->deltas[ann->n-2];
+ for (w = ann->n-2; w >= 0; w--) {+ D = ann->deltas[w];
+ M = annI->first[w];
+ V = annI->second[w];
+
+ for (o = 0; o < ann->layers[w+1]->n; o++) {+ O = ann->layers[w+1]->neurons[o];
+ acc = O->gradient(O) * O->steepness;
+ sum = 1.0;
+ if (D2 != D) {+ W = ann->weights[w+1];
+ sum = 0.0;
+ for (n = 0; n < D2->outputs; n++)
+ sum += D2->values[o][n] * W->values[o][n];
+ }
+ for (i = 0; i <= ann->layers[w]->n; i++) {+ I = ann->layers[w]->neurons[i];
+ D->values[i][o] *= acc * sum;
+ M->values[i][o] *= annI->beta1;
+ M->values[i][o] += (1.0 - annI->beta1) * D->values[i][o] * I->value;
+ V->values[i][o] *= annI->beta2;
+ V->values[i][o] += (1.0 - annI->beta2) * D->values[i][o] * D->values[i][o] * I->value * I->value;
+ }
+ }
+
+ D2 = D;
+ }
+
+ // update weights
+ for (w = 0; w < ann->n-1; w++) {+ W = ann->weights[w];
+ M = annI->first[w];
+ V = annI->second[w];
+
+ for (i = 0; i <= W->inputs; i++) {+ for (o = 0; o < W->outputs; o++) {+ m = M->values[i][o] / (annI->timestep < 100? (1.0 - pow(annI->beta1, annI->timestep)): 1.0);
+ v = V->values[i][o] / (annI->timestep < 10000? (1.0 - pow(annI->beta2, annI->timestep)): 1.0);
+ W->values[i][o] += (m / (sqrt(v) + annI->epsilon)) * annI->rate;
+ }
+ }
+ }
+
+ free(error);
+ return ret;
+}
+
+double
+anntrain_adamax(Ann *ann, double *inputs, double *outputs)
+{+ double *error = annrun(ann, inputs);
+ double ret = 0.0;
+ int noutputs = ann->layers[ann->n-1]->n;
+ double acc, sum, m, v;
+ int o, i, w, n;
+ Neuron *O, *I;
+ Weights *W, *D, *D2, *M, *V;
+ Adam *annI;
+
+ if (ann->internal == 0)
+ adaminit(ann);
+ annI = ann->internal;
+ annI->rate = 0.002;
+ annI->timestep++;
+
+ for (o = 0; o < noutputs; o++) {+ // error = outputs[o] - result
+ error[o] -= outputs[o];
+ error[o] = -error[o];
+ ret += pow(error[o], 2.0) * 0.5;
+ error[o] = torque(error[o]);
+ }
+ D = ann->deltas[ann->n-2];
+ weightsinitdoubles(D, error);
+ for (i = 0; i < (ann->n-2); i++) {+ D = ann->deltas[i];
+ weightsinitdouble(D, 1.0);
+ }
+
+ // backpropagate MSE
+ D2 = ann->deltas[ann->n-2];
+ for (w = ann->n-2; w >= 0; w--) {+ D = ann->deltas[w];
+ M = annI->first[w];
+ V = annI->second[w];
+
+ for (o = 0; o < ann->layers[w+1]->n; o++) {+ O = ann->layers[w+1]->neurons[o];
+ acc = O->gradient(O) * O->steepness;
+ sum = 1.0;
+ if (D2 != D) {+ W = ann->weights[w+1];
+ sum = 0.0;
+ for (n = 0; n < D2->outputs; n++)
+ sum += D2->values[o][n] * W->values[o][n];
+ }
+ for (i = 0; i <= ann->layers[w]->n; i++) {+ I = ann->layers[w]->neurons[i];
+ D->values[i][o] *= acc * sum;
+ M->values[i][o] *= annI->beta1;
+ M->values[i][o] += (1.0 - annI->beta1) * D->values[i][o] * I->value;
+ V->values[i][o] = fmax(V->values[i][o] * annI->beta2, fabs(D->values[i][o] * I->value));
+ }
+ }
+
+ D2 = D;
+ }
+
+ // update weights
+ for (w = 0; w < ann->n-1; w++) {+ W = ann->weights[w];
+ M = annI->first[w];
+ V = annI->second[w];
+
+ for (i = 0; i <= W->inputs; i++) {+ for (o = 0; o < W->outputs; o++) {+ m = M->values[i][o];
+ v = V->values[i][o];
+ W->values[i][o] += (annI->rate/(1.0 - (annI->timestep < 100? pow(annI->beta1, annI->timestep): 0.0))) * (m/v);
+ }
+ }
+ }
+
+ free(error);
+ return ret;
+}
--- /dev/null
+++ b/ann/annweights.c
@@ -1,0 +1,65 @@
+#include <u.h>
+#include <libc.h>
+
+#define RAND_MAX 0xFFFF
+
+#include "ann.h"
+
+Weights*
+weightsinitdoubles(Weights *in, double *init)
+{+ int i, o;
+
+ for (i = 0; i <= in->inputs; i++)
+ for (o = 0; o < in->outputs; o++)
+ in->values[i][o] = init[o];
+
+ return in;
+}
+
+Weights*
+weightsinitdouble(Weights *in, double init)
+{+ int i, o;
+
+ for (i = 0; i <= in->inputs; i++)
+ for (o = 0; o < in->outputs; o++)
+ in->values[i][o] = init;
+
+ return in;
+}
+
+Weights*
+weightsinitrandscale(Weights *in, double scale)
+{+ int i, o;
+
+ srand(time(0));
+ for (i = 0; i <= in->inputs; i++)
+ for (o = 0; o < in->outputs; o++)
+ in->values[i][o] = (((double)rand()/RAND_MAX) - 0.5) * scale;
+
+ return in;
+}
+
+Weights*
+weightsinitrand(Weights *in)
+{+ weightsinitrandscale(in, 2.0);
+ return in;
+}
+
+Weights*
+weightscreate(int inputs, int outputs, int initialize)
+{+ int i;
+ Weights *ret = calloc(1, sizeof(Weights));
+ ret->inputs = inputs;
+ ret->outputs = outputs;
+ ret->values = calloc(inputs+1, sizeof(double*));
+ for (i = 0; i <= inputs; i++)
+ ret->values[i] = calloc(outputs, sizeof(double));
+ if (initialize)
+ weightsinitrand(ret);
+ return ret;
+}
--- a/ann/mkfile
+++ b/ann/mkfile
@@ -11,8 +11,8 @@
%.$O: %.c
$CC $CFLAGS $stem.c
-ann.a$O: ann.$O annio.$O
- ar vu ann.a$O ann.$O annio.$O
+ann.a$O: annio.$O annfns.$O annweights.$O annneurons.$O annlayers.$O annrun.$O anntrain.$O anntrainadam.$O anncreate.$O
+ ar vu ann.a$O annio.$O annfns.$O annweights.$O annneurons.$O annlayers.$O annrun.$O anntrain.$O anntrainadam.$O anncreate.$O
clean:V:
rm -f *.[$OS] [$OS].* *.a[$OS]
--
⑨