Created
January 21, 2019 08:35
-
-
Save DaHoC/d4d189d9f703f920f2a4a6ef85dae1cd to your computer and use it in GitHub Desktop.
Derivation of "backpropagation of error" formula for a Multi-Layer-Perceptron, written in LaTeX
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\documentclass[10pt, a4paper]{article} | |
\usepackage[english,ngerman]{babel} | |
\usepackage{amsmath, amssymb, wasysym} | |
\usepackage{textcomp} | |
\usepackage{graphicx} % Graphics package | |
%\usepackage{graphs} % c.f. http://www8.cs.umu.se/~drewes/graphs/ | |
\usepackage{tikz} | |
\usepackage[T1]{fontenc} | |
% \usepackage[latin9]{inputenc} % Encoding | |
\usepackage[utf8]{inputenc} % Encoding | |
\usepackage{hyperref} | |
\usepackage{caption} % Package in order to change image captions | |
\usepackage{rotating} % Package for rotating elements | |
\usepackage{color} | |
\usepackage{verbatim} | |
\usepackage{tikz} | |
\hypersetup{pdfborder=0 0 0} | |
% Abstand nach Bildunterschriften etwas vergrößern | |
%\addtolength{\belowcaptionskip}{0.2cm} | |
% Listensymbol einstellen | |
\renewcommand{\labelitemii}{$\diamond$} | |
% Format der Bildunterschriften ändern | |
\renewcommand{\captionfont}{\small\itshape} | |
% Schriftart setzen | |
\renewcommand{\familydefault}{\sfdefault} | |
\definecolor{red}{rgb}{0.4, 0.0, 0.0} | |
\definecolor{green}{rgb}{0.0, 0.4, 0.0} | |
\definecolor{blue}{rgb}{0.0, 0.0, 0.4} | |
\definecolor{magenta}{rgb}{0.4, 0.4, 0.0} | |
\definecolor{orange}{rgb}{0.2, 0.2, 0.0} | |
% Schriftart für Überschriften setzen | |
%\setkomafont{sectioning}{\bf\rmfamily} | |
% Seitenränder setzen | |
\usepackage[top=1cm, left=0.5cm, right=0.5cm, bottom=1.2cm]{geometry} | |
%% And now it goes looooose :) | |
\begin{document} | |
\begin{center} | |
{\LARGE Derivation \& Operation of Backpropagation of Error} | |
\end{center} | |
\begin{itemize} | |
%\item[] | |
%\underline{Zunächst Nomenklatur des Netzausschnittes:} | |
\begin{comment} | |
\item[] | |
\underline{Nähere Informationen:} | |
\\ | |
Hierin wird beschrieben, wie ein künstliches Neuronales Netz des Typs Multi-Layer-Perzeptron mit Backpropagation of Error trainiert wird. | |
\\ | |
Es wird Grundwissen vorausgesetzt, interessierte Fachfremde verweise ich auf: | |
\\ | |
\url{http://dkriesel.com/science/neural_networks}. | |
\item[] | |
\underline{Netztopologie:} | |
\def\layersep{2.5cm} | |
\begin{tikzpicture}[shorten >=1pt,->,draw=black!50, node distance=\layersep] | |
\tikzstyle{every pin edge}=[<-,shorten <=1pt] | |
\tikzstyle{neuron}=[circle,fill=black!25,minimum size=17pt,inner sep=0pt] | |
\tikzstyle{input neuron}=[neuron, fill=green!50]; | |
\tikzstyle{output neuron}=[neuron, fill=red!50]; | |
\tikzstyle{hidden neuron}=[neuron, fill=blue!50]; | |
\tikzstyle{annot} = [text width=4em, text centered] | |
% Draw the input layer nodes | |
\foreach \name / \y in {1,...,3} | |
% This is the same as writing \foreach \name / \y in {1/1,2/2,3/3,4/4} | |
\node[input neuron, pin=left:$x_\y$] (I-\name) at (0,-\y cm) {$\diagup$}; | |
\node[] (I-4) at (0,-4 cm) {$\vdots$}; | |
\node[input neuron, pin=left:$x_N$] (I-5) at (0,-5 cm) {$\diagup$}; | |
% Draw the hidden layer nodes | |
\foreach \name / \y in {1,...,5} | |
\path[yshift=0.5cm] | |
node[hidden neuron] (H-\name) at (\layersep,-\y cm) {$\frac{S}{\sum}$}; | |
% Draw the output layer nodes | |
\foreach \name / \y in {1,...,3} | |
% \path[yshift=1.0cm] | |
\node[output neuron,pin={[pin edge={->}]right:$y_\y$}, right of=H-3] (Y-\name) at (\layersep,-\y cm) {}; | |
\node[] (Y-4) at (2*\layersep,-4 cm) {$\vdots$}; | |
\node[output neuron,pin={[pin edge={->}]right:$y_M$}] (Y-5) at (2*\layersep,-5 cm) {}; | |
% Connect every node in the input layer with every node in the | |
% hidden layer. | |
\foreach \source in {1,...,3} | |
\foreach \dest in {1,...,5} | |
\path (I-\source) edge (H-\dest); | |
\foreach \source in {5,...,5} | |
\foreach \dest in {1,...,5} | |
\path (I-\source) edge (H-\dest); | |
% Connect every node in the hidden layer with the output layer | |
\foreach \source in {1,...,5} | |
\foreach \dest in {1,...,3,5} | |
\path (H-\source) edge (Y-\dest); | |
% Annotate the layers | |
\node[annot,above of=H-1, node distance=1cm] (hl) {Hidden layer}; | |
\node[annot,left of=hl] {Input layer}; | |
\node[annot,right of=hl] {Output layer}; | |
\end{tikzpicture} | |
\end{comment} | |
\item[] | |
\underline{Error function:} | |
\begin{align*} | |
F &= \sum_{p \in P} \ ^pE \ \ \ \text{with $p$ being pattern in training set $P$} \\ | |
^p E &= \frac{1}{2} \sum_{j \in M} \left(\hat{y}_m - y_m \right)^2 \ \ \ \text{with $\hat{y}_m$ being the teacher of $m$-th output $y_m$} | |
\end{align*} | |
\item[] | |
\underline{Weight function:} | |
\begin{align*} | |
^p \vartriangle w_{hm} &\sim - \nabla_w \cdot \ ^p E \ \ \ \text{with $w_{hm}$ being the weight from neuron $h$ to neuron $m$} \\ | |
\vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm}} } \ \ \ \text{with $\eta$ being the learning rate} | |
\end{align*} | |
\item[] | |
\underline{For output neurons:} | |
$net_m = \sum_{i=0}^H w_{im} \tilde{o}_i$ | |
$o_m = y_m = f_m(net_m) $ | |
$ \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_m }} \cdot \textcolor{green}{\frac{ \partial net_m }{ \partial w_{hm} }}$ | |
\begin{align*} | |
\textcolor{green}{\frac{\partial net_m }{ \partial w_{hm} }} &= \frac{ \partial }{ \partial w_{hm} } \cdot net_m \\ | |
&= \frac{ \partial }{ \partial w_{hm} } \sum_{i=0}^H w_{im} \tilde{o}_i \\ | |
&= \sum_{i=0}^H \frac{ \partial }{ \partial w_{hm} } w_{im} \tilde{o}_i \\ | |
&= \frac{ \partial }{ \partial w_{hm} } \tilde{o}_h w_{hm} \\ | |
&= \text{\textcolor{green}{\framebox{$ \tilde{o}_h $}}} \\ | |
\\ | |
\textcolor{magenta}{\frac{\partial E }{ \partial net_m }} &= \frac{ \partial E }{ \partial y_m } \cdot \textcolor{orange}{\frac{ \partial y_m }{ \partial net_m }} \ \left( = \textcolor{magenta}{- \delta_m} \right) \\ | |
&= \frac{ \partial E }{ \partial y_m } \cdot \frac{ \partial }{ \partial net_m } f_m(net_m) \\ | |
&= \underbrace{\textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \cdot \text{\textcolor{orange}{\framebox{$ f'_m (net_m) $}}}}_{=: -\delta_m} \\ | |
\\ | |
\textcolor{blue}{\frac{ \partial E }{ \partial y_m }} &= \frac{ \partial }{ \partial y_m } \cdot \frac{1}{2} \sum_{j=1}^M \left( \hat{y}_j - y_j \right)^2 \\ | |
&= \text{\textcolor{blue}{\framebox{$ - (\hat{y}_m - y_m) $}}} \\ | |
\end{align*} | |
\begin{align*} | |
\vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{ \partial E }{ \partial w_{hm} }} \\ | |
&= \textcolor{blue}{-} \eta \textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \textcolor{orange}{\frac{ \partial y_m}{ \partial net_m}} \textcolor{green}{\frac{ \partial net_m}{ \partial w_{hm}}} \\ | |
&= \eta \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \textcolor{orange}{f'_m(net_m)} \textcolor{green}{\tilde{o}_h} | |
\end{align*} | |
\framebox{$\delta_m = \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \cdot \textcolor{orange}{f'_m(net_m)}$} | |
\framebox{$ \vartriangle w_{hm} = \eta \cdot \delta_m \cdot \tilde{o}_h $} | |
Widrow-Hoff-Rule / $\delta$-Rule | |
\\ | |
\item[] | |
\underline{For hidden neurons:} | |
$net_h = \sum_{i=0}^H w_{ih} \tilde{o}_i$ | |
$ \textcolor{red}{\frac{\partial E }{ \partial w_{kh} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_h }} \cdot \textcolor{green}{\frac{ \partial net_h }{ \partial w_{kh} } }$ | |
\begin{align*} | |
\delta_h &= - \textcolor{magenta}{\frac{ \partial E }{ \partial net_h}} \\ | |
&= \textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} \cdot \textcolor{orange}{\frac{ \partial o_h }{ \partial net_h}} \\ | |
\\ | |
\textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} &= - \frac{ \partial E \left( \underline{net}_{l=1} , \underline{net}_{l=2} , \hdots , \underline{net}_{l=L} \right) }{ \partial o_h} \\ | |
&= \sum_{l=1}^L \left( - \frac{ \partial E}{ \partial \underline{net}_l } \right) \cdot \frac{ \partial \underline{net}_l }{ \partial o_h} \\ | |
&= \sum_{l=1}^L \underline{\delta}_l \cdot \frac{ \partial }{ \partial o_h} \sum_{j=0}^H \underline{w}_{jl} \cdot o_j \\ | |
&= \textcolor{blue}{\sum_{l=1}^L \underline{\delta}_l \cdot \underline{w}_{hl}} | |
\end{align*} | |
\framebox{$\delta_h = \textcolor{blue}{\sum_{l=1}^L \left( \underline{\delta}_l \cdot \underline{w}_{hl} \right)} \cdot \textcolor{orange}{f'\left( net_h \right)}$} | |
\framebox{$ \vartriangle w_{kh} = \eta \cdot \delta_h \cdot \tilde{o}_k $} | |
\begin{comment} | |
\item[] | |
\underline{Kontakt:} | |
\\ | |
[email protected] | |
\end{comment} | |
\end{itemize} | |
\end{document} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment