realbardia · February 5, 2025 11:58 · m4oughi · Feb 3, 2025
diff --git a/main.cpp b/main.cpp
 #include <iostream>
 #include <vector>
 #include <cmath>
 #include <cstdlib>
 #include <ctime>
 #include <random>

 using namespace std;

 // Activation and Derivative functions
 double tanh_activation(double x) { return tanh(x); }
 double tanh_derivative(double x) { return 1 - x*x; }

 class NeuralNetwork {
 private:
    vector<vector<vector<double>>> weights; // Layer's weights [layer][node][weight]
    vector<vector<double>> biases;          // Layer's biases [layer][node]
    double learning_rate;
    mt19937 gen;
    uniform_real_distribution<double> dist{-1.0, 1.0};

    void initialize(vector<int> architecture) {
        weights.resize(architecture.size()-1);
        biases.resize(architecture.size()-1);

        for(size_t i=0; i<weights.size(); i++) {
            int inputs = architecture[i];
            int outputs = architecture[i+1];

            weights[i].resize(outputs);
            for(auto& w : weights[i]) {
                w.resize(inputs);
                for(auto& val : w) val = dist(gen);
            }

            biases[i].resize(outputs);
            for(auto& b : biases[i]) b = dist(gen);
        }
    }

 public:
    NeuralNetwork(vector<int> architecture, double lr = 0.1) : learning_rate(lr) {
        gen.seed(time(0));
        initialize(architecture);
    }

    // Predict
    vector<double> predict(const vector<double>& inputs) {
        vector<double> current = inputs;

        for(size_t layer=0; layer<weights.size(); layer++) {
            vector<double> next(weights[layer].size());

            for(size_t node=0; node<weights[layer].size(); node++) {
                double sum = biases[layer][node];

                for(size_t w=0; w<weights[layer][node].size(); w++)
                    sum += weights[layer][node][w] * current[w];

                next[node] = tanh_activation(sum);
            }

            current = next;
        }

        return current;
    }

    // Train
    void train(const vector<double>& inputs, const vector<double>& targets) {
        vector<vector<double>> layer_outputs;
        vector<double> current = inputs;

        // Store output of layers
        layer_outputs.push_back(current);

        // Forward Propagation
        for(size_t layer=0; layer<weights.size(); layer++) {
            vector<double> next(weights[layer].size());

            for(size_t node=0; node<weights[layer].size(); node++) {
                double sum = biases[layer][node];

                for(size_t w=0; w<weights[layer][node].size(); w++)
                    sum += weights[layer][node][w] * current[w];

                next[node] = tanh_activation(sum);
            }

            layer_outputs.push_back(next);
            current = next;
        }

        // Error calculation
        vector<double> errors;
        for(size_t i=0; i<targets.size(); i++) {
            errors.push_back((targets[i] - current[i]) * tanh_derivative(current[i]));
        }

        // Backpropagation
        for(int layer=weights.size()-1; layer >= 0; layer--) {
            vector<double> new_errors(layer_outputs[layer].size(), 0.0);

            for(size_t node=0; node<weights[layer].size(); node++) {
                // Update bias
                biases[layer][node] += learning_rate * errors[node];

                // Update weights
                for(size_t w=0; w<weights[layer][node].size(); w++) {
                    double delta = learning_rate * errors[node] * layer_outputs[layer][w];
                    weights[layer][node][w] += delta;
                    new_errors[w] += weights[layer][node][w] * errors[node];
                }
            }

            // Calculate previous layers errors
            errors.clear();
            for(size_t w=0; w<new_errors.size(); w++) {
                errors.push_back(new_errors[w] * tanh_derivative(layer_outputs[layer][w]));
            }
        }
    }
 };

 int main() {
    // Example 1: Sin train and predict
    NeuralNetwork nn_sin({1, 10, 5, 1}, 0.05); // 1 Input, 1 Ouput

    // Train
    for(int i=0; i<50000; i++) {
        double angle = (rand() / double(RAND_MAX)) * 2 * M_PI;
        nn_sin.train({angle}, {sin(angle)});
    }

    // Predict
    cout << "Sin Predict:\n";
    cout << "Angle\t\tPredicted\tActual\t\tError\n";
    cout << "-------------------------------------------------\n";

    for(int i=0; i<10; i++) {
        double angle = (rand() / double(RAND_MAX)) * 2 * M_PI;
        double pred = nn_sin.predict({angle})[0];
        double actual = sin(angle);
        printf("%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n",
              angle, pred, actual, fabs(pred-actual));
    }


    // Example 1: XOR train and predict
    NeuralNetwork nn_xor({2, 4, 1}, 0.1); // 2 Inputs, 1 output

    vector<vector<double>> xor_inputs = {
        {0,0}, {0,1}, {1,0}, {1,1}
    };
    vector<vector<double>> xor_targets = {
        {0}, {1}, {1}, {0}
    };

    // Train
    for(int i=0; i<10000; i++) {
        int idx = rand() % 4;
        nn_xor.train(xor_inputs[idx], xor_targets[idx]);
    }

    // Predict
    cout << "\nXOR Predict:\n";
    for(auto& input : xor_inputs) {
        auto res = nn_xor.predict(input);
        cout << input[0] << " XOR " << input[1] << " = " << round(res[0])
             << " (" << res[0] << ")\n";
    }

    return 0;
 }
	#include <iostream>
	#include <vector>
	#include <cmath>
	#include <cstdlib>
	#include <ctime>
	#include <random>

	using namespace std;

	// Activation and Derivative functions
	double tanh_activation(double x) { return tanh(x); }
	double tanh_derivative(double x) { return 1 - x*x; }

	class NeuralNetwork {
	private:
	vector<vector<vector<double>>> weights; // Layer's weights [layer][node][weight]
	vector<vector<double>> biases; // Layer's biases [layer][node]
	double learning_rate;
	mt19937 gen;
	uniform_real_distribution<double> dist{-1.0, 1.0};

	void initialize(vector<int> architecture) {
	weights.resize(architecture.size()-1);
	biases.resize(architecture.size()-1);

	for(size_t i=0; i<weights.size(); i++) {
	int inputs = architecture[i];
	int outputs = architecture[i+1];

	weights[i].resize(outputs);
	for(auto& w : weights[i]) {
	w.resize(inputs);
	for(auto& val : w) val = dist(gen);
	}

	biases[i].resize(outputs);
	for(auto& b : biases[i]) b = dist(gen);
	}
	}

	public:
	NeuralNetwork(vector<int> architecture, double lr = 0.1) : learning_rate(lr) {
	gen.seed(time(0));
	initialize(architecture);
	}

	// Predict
	vector<double> predict(const vector<double>& inputs) {
	vector<double> current = inputs;

	for(size_t layer=0; layer<weights.size(); layer++) {
	vector<double> next(weights[layer].size());

	for(size_t node=0; node<weights[layer].size(); node++) {
	double sum = biases[layer][node];

	for(size_t w=0; w<weights[layer][node].size(); w++)
	sum += weights[layer][node][w] * current[w];

	next[node] = tanh_activation(sum);
	}

	current = next;
	}

	return current;
	}

	// Train
	void train(const vector<double>& inputs, const vector<double>& targets) {
	vector<vector<double>> layer_outputs;
	vector<double> current = inputs;

	// Store output of layers
	layer_outputs.push_back(current);

	// Forward Propagation
	for(size_t layer=0; layer<weights.size(); layer++) {
	vector<double> next(weights[layer].size());

	for(size_t node=0; node<weights[layer].size(); node++) {
	double sum = biases[layer][node];

	for(size_t w=0; w<weights[layer][node].size(); w++)
	sum += weights[layer][node][w] * current[w];

	next[node] = tanh_activation(sum);
	}

	layer_outputs.push_back(next);
	current = next;
	}

	// Error calculation
	vector<double> errors;
	for(size_t i=0; i<targets.size(); i++) {
	errors.push_back((targets[i] - current[i]) * tanh_derivative(current[i]));
	}

	// Backpropagation
	for(int layer=weights.size()-1; layer >= 0; layer--) {
	vector<double> new_errors(layer_outputs[layer].size(), 0.0);

	for(size_t node=0; node<weights[layer].size(); node++) {
	// Update bias
	biases[layer][node] += learning_rate * errors[node];

	// Update weights
	for(size_t w=0; w<weights[layer][node].size(); w++) {
	double delta = learning_rate * errors[node] * layer_outputs[layer][w];
	weights[layer][node][w] += delta;
	new_errors[w] += weights[layer][node][w] * errors[node];
	}
	}

	// Calculate previous layers errors
	errors.clear();
	for(size_t w=0; w<new_errors.size(); w++) {
	errors.push_back(new_errors[w] * tanh_derivative(layer_outputs[layer][w]));
	}
	}
	}
	};

	int main() {
	// Example 1: Sin train and predict
	NeuralNetwork nn_sin({1, 10, 5, 1}, 0.05); // 1 Input, 1 Ouput

	// Train
	for(int i=0; i<50000; i++) {
	double angle = (rand() / double(RAND_MAX)) * 2 * M_PI;
	nn_sin.train({angle}, {sin(angle)});
	}

	// Predict
	cout << "Sin Predict:\n";
	cout << "Angle\t\tPredicted\tActual\t\tError\n";
	cout << "-------------------------------------------------\n";

	for(int i=0; i<10; i++) {
	double angle = (rand() / double(RAND_MAX)) * 2 * M_PI;
	double pred = nn_sin.predict({angle})[0];
	double actual = sin(angle);
	printf("%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n",
	angle, pred, actual, fabs(pred-actual));
	}


	// Example 1: XOR train and predict
	NeuralNetwork nn_xor({2, 4, 1}, 0.1); // 2 Inputs, 1 output

	vector<vector<double>> xor_inputs = {
	{0,0}, {0,1}, {1,0}, {1,1}
	};
	vector<vector<double>> xor_targets = {
	{0}, {1}, {1}, {0}
	};

	// Train
	for(int i=0; i<10000; i++) {
	int idx = rand() % 4;
	nn_xor.train(xor_inputs[idx], xor_targets[idx]);
	}

	// Predict
	cout << "\nXOR Predict:\n";
	for(auto& input : xor_inputs) {
	auto res = nn_xor.predict(input);
	cout << input[0] << " XOR " << input[1] << " = " << round(res[0])
	<< " (" << res[0] << ")\n";
	}

	return 0;
	}