//--------------------------------------------------------------------- // name: mlp-manual.ck // desc: training (step-by-step) a multilayer perception (MLP): // (for a less manaul example, see mlp.ck) // // version: need chuck version 1.5.0.0 or higher // sorting: part of ChAI (ChucK for AI) // // uncomment for MLP API: // MLP.help(); // // author: Yikai Li // Ge Wang (https://ccrma.stanford.edu/~ge/) // date: Winter 2023 //--------------------------------------------------------------------- // delay for each epoch (change for print speed) // (or run with --silent for no print delay) 100::ms => dur T_PRINT; // learning rate 0.01 => float learningRate; // number of epochs 100 => int epochs; // instantiate a multilayer perception (a basic neural network) MLP mlp; // neurons per layer: input, [hidden layer(s)], output [3, 5, 5, 2] @=> int nodesPerLayer[]; // initialize with the # of neurons per layer mlp.init( nodesPerLayer ); // input observations [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] @=> float X[][]; // output observations [[0.1, 0.2], [0.3, 0.4]] @=> float Y[][]; // input and output array references float x[], y[]; //--------------------------------------------------------------------- // train the neural network epoch by epoch // this is a more "manual" alternative to simply calling MLP.train() // intended for education and curiosity //--------------------------------------------------------------------- for( int i; i < epochs; i++ ) { // shuffle the observations MLP.shuffle( X, Y ); // train over all observations for( int j; j < X.size(); j++ ) { // print weights and biases chout <= IO.newline() <= "--- WEIGHTS AND BIASES |" <= " EPOCH:" <= i+1 <= " OBSERVATION:" <= j+1 <= " ---" <= IO.newline(); visWeightsBiases(); // forward propagation chout <= IO.newline() <= "--- FORWARD-PROPAGATION |" <= " EPOCH:" <= i+1 <= " OBSERVATION:" <= j+1 <= " ---" <= IO.newline(); mlp.forward( X[j] ); visActivations(); // backpropagation chout <= IO.newline() <= "--- BACKPROPAGATION |" <= " EPOCH:" <= i+1 <= " OBSERVATION:" <= j+1 <= " ---" <= IO.newline(); mlp.backprop( Y[j], learningRate ); visGradients(); } // print chout <= IO.newline(); chout <= "********************************************" <= IO.newline(); chout <= "*** LEARNING-RATE:" <= learningRate <= " | EPOCH:" <= i+1 <= " DONE" <= IO.newline(); chout <= "********************************************" <= IO.newline(); // pause for print T_PRINT => now; } // print weights and biases chout <= IO.newline() <= "--- FINAL WEIGHTS AND BIASES |" <= " EPOCH:" <= epochs <= " ---" <= IO.newline(); visWeightsBiases(); chout <= IO.newline(); // print chout <= "********************************************" <= IO.newline(); chout <= "************* TRAINING DONE ****************" <= IO.newline(); chout <= "********************************************" <= IO.newline(); chout <= IO.newline(); //--------------------------------------------------------------------- // visualize activations //--------------------------------------------------------------------- fun void visActivations() { // chout <= "[activations]" <= IO.newline(); for ( 0 => int k; k < nodesPerLayer.size(); k++ ) { // activations chout <= "layer " <= k <= " ACTIVATIONS" <= IO.newline(); float a[nodesPerLayer[k]]; mlp.getActivations(k, a); for ( 0 => int l; l < nodesPerLayer[k]; l++ ) chout <= a[l] <= " "; chout <= IO.newline(); } } //--------------------------------------------------------------------- // visualize weights and biases //--------------------------------------------------------------------- fun void visWeightsBiases() { // chout <= "[weights]" <= IO.newline(); for ( 0 => int k; k < nodesPerLayer.size() - 1; k++ ) { // weights chout <= "layer " <= k <= " WEIGHTS" <= IO.newline(); float w[nodesPerLayer[k]][nodesPerLayer[k+1]]; mlp.getWeights(k, w); for ( 0 => int l; l < nodesPerLayer[k]; l++ ) { for ( 0 => int m; m < nodesPerLayer[k+1]; m++ ) chout <= w[l][m] <= " "; chout <= IO.newline(); } } chout <= "---" <= IO.newline(); // chout <= "[biases]" <= IO.newline(); for ( 0 => int k; k < nodesPerLayer.size() - 1; k++ ) { // biases chout <= "layer " <= k <= " BIASES" <= IO.newline(); float b[nodesPerLayer[k+1]]; mlp.getBiases(k, b); for ( 0 => int l; l < nodesPerLayer[k+1]; l++ ) chout <= b[l] <= " "; chout <= IO.newline(); } } //--------------------------------------------------------------------- // visualize gradients //--------------------------------------------------------------------- fun void visGradients() { // chout <= "[gradients]" <= IO.newline(); for ( 0 => int k; k < nodesPerLayer.size(); k++ ) { // gradients chout <= "layer " <= k <= " GRADIENTS" <= IO.newline(); float g[nodesPerLayer[k]]; mlp.getGradients(k, g); for ( 0 => int l; l < nodesPerLayer[k]; l++ ) chout <= g[l] <= " "; chout <= IO.newline(); } }