DaHoC · January 21, 2019 08:35
diff --git a/BackpropagationOfError.tex b/BackpropagationOfError.tex
 \documentclass[10pt, a4paper]{article}

 \usepackage[english,ngerman]{babel}
 \usepackage{amsmath, amssymb, wasysym}
 \usepackage{textcomp}
 \usepackage{graphicx}           % Graphics package
 %\usepackage{graphs}		% c.f. http://www8.cs.umu.se/~drewes/graphs/
 \usepackage{tikz}
 \usepackage[T1]{fontenc}
 % \usepackage[latin9]{inputenc}   % Encoding
 \usepackage[utf8]{inputenc}    % Encoding
 \usepackage{hyperref}
 \usepackage{caption}           % Package in order to change image captions
 \usepackage{rotating}           % Package for rotating elements
 \usepackage{color}
 \usepackage{verbatim}

 \usepackage{tikz}

 \hypersetup{pdfborder=0 0 0}

 % Abstand nach Bildunterschriften etwas vergrößern
 %\addtolength{\belowcaptionskip}{0.2cm}

 % Listensymbol einstellen
 \renewcommand{\labelitemii}{$\diamond$}

 % Format der Bildunterschriften ändern
 \renewcommand{\captionfont}{\small\itshape}

 % Schriftart setzen
 \renewcommand{\familydefault}{\sfdefault}

 \definecolor{red}{rgb}{0.4, 0.0, 0.0}
 \definecolor{green}{rgb}{0.0, 0.4, 0.0}
 \definecolor{blue}{rgb}{0.0, 0.0, 0.4}
 \definecolor{magenta}{rgb}{0.4, 0.4, 0.0}
 \definecolor{orange}{rgb}{0.2, 0.2, 0.0}

 % Schriftart für Überschriften setzen
 %\setkomafont{sectioning}{\bf\rmfamily}

 % Seitenränder setzen
 \usepackage[top=1cm, left=0.5cm, right=0.5cm, bottom=1.2cm]{geometry}

 %% And now it goes looooose :)
 \begin{document}

 \begin{center}
 {\LARGE Derivation \& Operation of Backpropagation of Error}
 \end{center}

 \begin{itemize}

 %\item[]
 %\underline{Zunächst Nomenklatur des Netzausschnittes:}

 \begin{comment}
 \item[]
 \underline{Nähere Informationen:}
 \\
 Hierin wird beschrieben, wie ein künstliches Neuronales Netz des Typs Multi-Layer-Perzeptron mit Backpropagation of Error trainiert wird.
 \\
 Es wird Grundwissen vorausgesetzt, interessierte Fachfremde verweise ich auf:
 \\
 \url{http://dkriesel.com/science/neural_networks}.

 \item[]
 \underline{Netztopologie:}
 \def\layersep{2.5cm}

 \begin{tikzpicture}[shorten >=1pt,->,draw=black!50, node distance=\layersep]
    \tikzstyle{every pin edge}=[<-,shorten <=1pt]
    \tikzstyle{neuron}=[circle,fill=black!25,minimum size=17pt,inner sep=0pt]
    \tikzstyle{input neuron}=[neuron, fill=green!50];
    \tikzstyle{output neuron}=[neuron, fill=red!50];
    \tikzstyle{hidden neuron}=[neuron, fill=blue!50];
    \tikzstyle{annot} = [text width=4em, text centered]

    % Draw the input layer nodes
    \foreach \name / \y in {1,...,3}
    % This is the same as writing \foreach \name / \y in {1/1,2/2,3/3,4/4}
        \node[input neuron, pin=left:$x_\y$] (I-\name) at (0,-\y cm) {$\diagup$};

        \node[] (I-4) at (0,-4 cm) {$\vdots$};

        \node[input neuron, pin=left:$x_N$] (I-5) at (0,-5 cm) {$\diagup$};

    % Draw the hidden layer nodes
    \foreach \name / \y in {1,...,5}
        \path[yshift=0.5cm]
            node[hidden neuron] (H-\name) at (\layersep,-\y cm) {$\frac{S}{\sum}$};

    % Draw the output layer nodes
    \foreach \name / \y in {1,...,3}
 %        \path[yshift=1.0cm]
 	\node[output neuron,pin={[pin edge={->}]right:$y_\y$}, right of=H-3] (Y-\name) at (\layersep,-\y cm) {};

    \node[] (Y-4) at (2*\layersep,-4 cm) {$\vdots$};

    \node[output neuron,pin={[pin edge={->}]right:$y_M$}] (Y-5) at (2*\layersep,-5 cm) {};

    % Connect every node in the input layer with every node in the
    % hidden layer.
    \foreach \source in {1,...,3}
        \foreach \dest in {1,...,5}
            \path (I-\source) edge (H-\dest);

    \foreach \source in {5,...,5}
        \foreach \dest in {1,...,5}
            \path (I-\source) edge (H-\dest);

    % Connect every node in the hidden layer with the output layer
    \foreach \source in {1,...,5}
        \foreach \dest in {1,...,3,5}
 	    \path (H-\source) edge (Y-\dest);

    % Annotate the layers
    \node[annot,above of=H-1, node distance=1cm] (hl) {Hidden layer};
    \node[annot,left of=hl] {Input layer};
    \node[annot,right of=hl] {Output layer};
 \end{tikzpicture}

 \end{comment}

 \item[]
 \underline{Error function:}
 \begin{align*}
 F &= \sum_{p \in P} \ ^pE \ \ \ \text{with $p$ being pattern in training set $P$} \\
 ^p E &= \frac{1}{2} \sum_{j \in M} \left(\hat{y}_m - y_m \right)^2 \ \ \ \text{with $\hat{y}_m$ being the teacher of $m$-th output $y_m$}
 \end{align*}

 \item[]
 \underline{Weight function:}
 \begin{align*}
 ^p \vartriangle w_{hm} &\sim - \nabla_w \cdot \ ^p E \ \ \ \text{with $w_{hm}$ being the weight from neuron $h$ to neuron $m$} \\
 \vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm}} } \ \ \ \text{with $\eta$ being the learning rate}
 \end{align*}

 \item[]
 \underline{For output neurons:}

 $net_m = \sum_{i=0}^H w_{im} \tilde{o}_i$

 $o_m = y_m = f_m(net_m) $

 $ \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_m }} \cdot \textcolor{green}{\frac{ \partial net_m }{ \partial w_{hm} }}$

 \begin{align*}
 \textcolor{green}{\frac{\partial net_m }{ \partial w_{hm} }} &= \frac{ \partial }{ \partial w_{hm} } \cdot net_m \\
 &= \frac{ \partial }{ \partial w_{hm} } \sum_{i=0}^H w_{im} \tilde{o}_i \\
 &= \sum_{i=0}^H \frac{ \partial }{ \partial w_{hm} } w_{im} \tilde{o}_i \\
 &= \frac{ \partial }{ \partial w_{hm} } \tilde{o}_h w_{hm} \\
 &= \text{\textcolor{green}{\framebox{$ \tilde{o}_h $}}} \\
 \\
 \textcolor{magenta}{\frac{\partial E }{ \partial net_m }} &= \frac{ \partial E }{ \partial y_m } \cdot \textcolor{orange}{\frac{ \partial y_m }{ \partial net_m }} \ \left( = \textcolor{magenta}{- \delta_m} \right) \\
 &= \frac{ \partial E }{ \partial y_m } \cdot \frac{ \partial }{ \partial net_m } f_m(net_m) \\
 &= \underbrace{\textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \cdot \text{\textcolor{orange}{\framebox{$ f'_m (net_m) $}}}}_{=: -\delta_m} \\
 \\
 \textcolor{blue}{\frac{ \partial E }{ \partial y_m }} &= \frac{ \partial }{ \partial y_m } \cdot \frac{1}{2} \sum_{j=1}^M \left( \hat{y}_j - y_j \right)^2 \\
 &= \text{\textcolor{blue}{\framebox{$ - (\hat{y}_m - y_m) $}}} \\
 \end{align*}

 \begin{align*}
 \vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{ \partial E }{ \partial w_{hm} }} \\
 &= \textcolor{blue}{-} \eta \textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \textcolor{orange}{\frac{ \partial y_m}{ \partial net_m}} \textcolor{green}{\frac{ \partial net_m}{ \partial w_{hm}}} \\
 &= \eta \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \textcolor{orange}{f'_m(net_m)} \textcolor{green}{\tilde{o}_h}
 \end{align*}

 \framebox{$\delta_m = \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \cdot \textcolor{orange}{f'_m(net_m)}$}

 \framebox{$ \vartriangle w_{hm} = \eta \cdot \delta_m \cdot \tilde{o}_h $}
 Widrow-Hoff-Rule / $\delta$-Rule
 \\

 \item[]
 \underline{For hidden neurons:}

 $net_h = \sum_{i=0}^H w_{ih} \tilde{o}_i$

 $ \textcolor{red}{\frac{\partial E }{ \partial w_{kh} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_h }} \cdot \textcolor{green}{\frac{ \partial net_h }{ \partial w_{kh} } }$

 \begin{align*}
 \delta_h &= - \textcolor{magenta}{\frac{ \partial E }{ \partial net_h}} \\
 &= \textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} \cdot \textcolor{orange}{\frac{ \partial o_h }{ \partial net_h}} \\
 \\
 \textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} &= - \frac{ \partial E \left( \underline{net}_{l=1} , \underline{net}_{l=2} , \hdots , \underline{net}_{l=L} \right) }{ \partial o_h} \\
 &= \sum_{l=1}^L \left( - \frac{ \partial E}{ \partial \underline{net}_l } \right) \cdot \frac{ \partial \underline{net}_l }{ \partial o_h} \\
 &= \sum_{l=1}^L \underline{\delta}_l \cdot \frac{ \partial }{ \partial o_h} \sum_{j=0}^H \underline{w}_{jl} \cdot o_j \\
 &= \textcolor{blue}{\sum_{l=1}^L \underline{\delta}_l \cdot \underline{w}_{hl}}
 \end{align*}

 \framebox{$\delta_h = \textcolor{blue}{\sum_{l=1}^L \left( \underline{\delta}_l \cdot \underline{w}_{hl} \right)} \cdot \textcolor{orange}{f'\left( net_h \right)}$}

 \framebox{$ \vartriangle w_{kh} = \eta \cdot \delta_h \cdot \tilde{o}_k $}

 \begin{comment}
 \item[]
 \underline{Kontakt:}
 \\
 [email protected]
 \end{comment}

 \end{itemize}

 \end{document}
	\documentclass[10pt, a4paper]{article}

	\usepackage[english,ngerman]{babel}
	\usepackage{amsmath, amssymb, wasysym}
	\usepackage{textcomp}
	\usepackage{graphicx} % Graphics package
	%\usepackage{graphs} % c.f. http://www8.cs.umu.se/~drewes/graphs/
	\usepackage{tikz}
	\usepackage[T1]{fontenc}
	% \usepackage[latin9]{inputenc} % Encoding
	\usepackage[utf8]{inputenc} % Encoding
	\usepackage{hyperref}
	\usepackage{caption} % Package in order to change image captions
	\usepackage{rotating} % Package for rotating elements
	\usepackage{color}
	\usepackage{verbatim}

	\usepackage{tikz}

	\hypersetup{pdfborder=0 0 0}

	% Abstand nach Bildunterschriften etwas vergrößern
	%\addtolength{\belowcaptionskip}{0.2cm}

	% Listensymbol einstellen
	\renewcommand{\labelitemii}{$\diamond$}

	% Format der Bildunterschriften ändern
	\renewcommand{\captionfont}{\small\itshape}

	% Schriftart setzen
	\renewcommand{\familydefault}{\sfdefault}

	\definecolor{red}{rgb}{0.4, 0.0, 0.0}
	\definecolor{green}{rgb}{0.0, 0.4, 0.0}
	\definecolor{blue}{rgb}{0.0, 0.0, 0.4}
	\definecolor{magenta}{rgb}{0.4, 0.4, 0.0}
	\definecolor{orange}{rgb}{0.2, 0.2, 0.0}

	% Schriftart für Überschriften setzen
	%\setkomafont{sectioning}{\bf\rmfamily}

	% Seitenränder setzen
	\usepackage[top=1cm, left=0.5cm, right=0.5cm, bottom=1.2cm]{geometry}

	%% And now it goes looooose :)
	\begin{document}

	\begin{center}
	{\LARGE Derivation \& Operation of Backpropagation of Error}
	\end{center}

	\begin{itemize}

	%\item[]
	%\underline{Zunächst Nomenklatur des Netzausschnittes:}

	\begin{comment}
	\item[]
	\underline{Nähere Informationen:}
	\\
	Hierin wird beschrieben, wie ein künstliches Neuronales Netz des Typs Multi-Layer-Perzeptron mit Backpropagation of Error trainiert wird.
	\\
	Es wird Grundwissen vorausgesetzt, interessierte Fachfremde verweise ich auf:
	\\
	\url{http://dkriesel.com/science/neural_networks}.

	\item[]
	\underline{Netztopologie:}
	\def\layersep{2.5cm}

	\begin{tikzpicture}[shorten >=1pt,->,draw=black!50, node distance=\layersep]
	\tikzstyle{every pin edge}=[<-,shorten <=1pt]
	\tikzstyle{neuron}=[circle,fill=black!25,minimum size=17pt,inner sep=0pt]
	\tikzstyle{input neuron}=[neuron, fill=green!50];
	\tikzstyle{output neuron}=[neuron, fill=red!50];
	\tikzstyle{hidden neuron}=[neuron, fill=blue!50];
	\tikzstyle{annot} = [text width=4em, text centered]

	% Draw the input layer nodes
	\foreach \name / \y in {1,...,3}
	% This is the same as writing \foreach \name / \y in {1/1,2/2,3/3,4/4}
	\node[input neuron, pin=left:$x_\y$] (I-\name) at (0,-\y cm) {$\diagup$};

	\node[] (I-4) at (0,-4 cm) {$\vdots$};

	\node[input neuron, pin=left:$x_N$] (I-5) at (0,-5 cm) {$\diagup$};

	% Draw the hidden layer nodes
	\foreach \name / \y in {1,...,5}
	\path[yshift=0.5cm]
	node[hidden neuron] (H-\name) at (\layersep,-\y cm) {$\frac{S}{\sum}$};

	% Draw the output layer nodes
	\foreach \name / \y in {1,...,3}
	% \path[yshift=1.0cm]
	\node[output neuron,pin={[pin edge={->}]right:$y_\y$}, right of=H-3] (Y-\name) at (\layersep,-\y cm) {};

	\node[] (Y-4) at (2*\layersep,-4 cm) {$\vdots$};

	\node[output neuron,pin={[pin edge={->}]right:$y_M$}] (Y-5) at (2*\layersep,-5 cm) {};

	% Connect every node in the input layer with every node in the
	% hidden layer.
	\foreach \source in {1,...,3}
	\foreach \dest in {1,...,5}
	\path (I-\source) edge (H-\dest);

	\foreach \source in {5,...,5}
	\foreach \dest in {1,...,5}
	\path (I-\source) edge (H-\dest);

	% Connect every node in the hidden layer with the output layer
	\foreach \source in {1,...,5}
	\foreach \dest in {1,...,3,5}
	\path (H-\source) edge (Y-\dest);

	% Annotate the layers
	\node[annot,above of=H-1, node distance=1cm] (hl) {Hidden layer};
	\node[annot,left of=hl] {Input layer};
	\node[annot,right of=hl] {Output layer};
	\end{tikzpicture}

	\end{comment}

	\item[]
	\underline{Error function:}
	\begin{align*}
	F &= \sum_{p \in P} \ ^pE \ \ \ \text{with $p$ being pattern in training set $P$} \\
	^p E &= \frac{1}{2} \sum_{j \in M} \left(\hat{y}_m - y_m \right)^2 \ \ \ \text{with $\hat{y}_m$ being the teacher of $m$-th output $y_m$}
	\end{align*}

	\item[]
	\underline{Weight function:}
	\begin{align*}
	^p \vartriangle w_{hm} &\sim - \nabla_w \cdot \ ^p E \ \ \ \text{with $w_{hm}$ being the weight from neuron $h$ to neuron $m$} \\
	\vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm}} } \ \ \ \text{with $\eta$ being the learning rate}
	\end{align*}

	\item[]
	\underline{For output neurons:}

	$net_m = \sum_{i=0}^H w_{im} \tilde{o}_i$

	$o_m = y_m = f_m(net_m) $

	$ \textcolor{red}{\frac{\partial E \left( w_{hm} \right) }{ \partial w_{hm} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_m }} \cdot \textcolor{green}{\frac{ \partial net_m }{ \partial w_{hm} }}$

	\begin{align*}
	\textcolor{green}{\frac{\partial net_m }{ \partial w_{hm} }} &= \frac{ \partial }{ \partial w_{hm} } \cdot net_m \\
	&= \frac{ \partial }{ \partial w_{hm} } \sum_{i=0}^H w_{im} \tilde{o}_i \\
	&= \sum_{i=0}^H \frac{ \partial }{ \partial w_{hm} } w_{im} \tilde{o}_i \\
	&= \frac{ \partial }{ \partial w_{hm} } \tilde{o}_h w_{hm} \\
	&= \text{\textcolor{green}{\framebox{$ \tilde{o}_h $}}} \\
	\\
	\textcolor{magenta}{\frac{\partial E }{ \partial net_m }} &= \frac{ \partial E }{ \partial y_m } \cdot \textcolor{orange}{\frac{ \partial y_m }{ \partial net_m }} \ \left( = \textcolor{magenta}{- \delta_m} \right) \\
	&= \frac{ \partial E }{ \partial y_m } \cdot \frac{ \partial }{ \partial net_m } f_m(net_m) \\
	&= \underbrace{\textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \cdot \text{\textcolor{orange}{\framebox{$ f'_m (net_m) $}}}}_{=: -\delta_m} \\
	\\
	\textcolor{blue}{\frac{ \partial E }{ \partial y_m }} &= \frac{ \partial }{ \partial y_m } \cdot \frac{1}{2} \sum_{j=1}^M \left( \hat{y}_j - y_j \right)^2 \\
	&= \text{\textcolor{blue}{\framebox{$ - (\hat{y}_m - y_m) $}}} \\
	\end{align*}

	\begin{align*}
	\vartriangle w_{hm} &= - \eta \textcolor{red}{\frac{ \partial E }{ \partial w_{hm} }} \\
	&= \textcolor{blue}{-} \eta \textcolor{blue}{\frac{ \partial E }{ \partial y_m }} \textcolor{orange}{\frac{ \partial y_m}{ \partial net_m}} \textcolor{green}{\frac{ \partial net_m}{ \partial w_{hm}}} \\
	&= \eta \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \textcolor{orange}{f'_m(net_m)} \textcolor{green}{\tilde{o}_h}
	\end{align*}

	\framebox{$\delta_m = \textcolor{blue}{\left( \hat{y}_m - y_m \right)} \cdot \textcolor{orange}{f'_m(net_m)}$}

	\framebox{$ \vartriangle w_{hm} = \eta \cdot \delta_m \cdot \tilde{o}_h $}
	Widrow-Hoff-Rule / $\delta$-Rule
	\\

	\item[]
	\underline{For hidden neurons:}

	$net_h = \sum_{i=0}^H w_{ih} \tilde{o}_i$

	$ \textcolor{red}{\frac{\partial E }{ \partial w_{kh} }} = \textcolor{magenta}{\frac{\partial E }{ \partial net_h }} \cdot \textcolor{green}{\frac{ \partial net_h }{ \partial w_{kh} } }$

	\begin{align*}
	\delta_h &= - \textcolor{magenta}{\frac{ \partial E }{ \partial net_h}} \\
	&= \textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} \cdot \textcolor{orange}{\frac{ \partial o_h }{ \partial net_h}} \\
	\\
	\textcolor{blue}{- \frac{ \partial E }{ \partial o_h}} &= - \frac{ \partial E \left( \underline{net}_{l=1} , \underline{net}_{l=2} , \hdots , \underline{net}_{l=L} \right) }{ \partial o_h} \\
	&= \sum_{l=1}^L \left( - \frac{ \partial E}{ \partial \underline{net}_l } \right) \cdot \frac{ \partial \underline{net}_l }{ \partial o_h} \\
	&= \sum_{l=1}^L \underline{\delta}_l \cdot \frac{ \partial }{ \partial o_h} \sum_{j=0}^H \underline{w}_{jl} \cdot o_j \\
	&= \textcolor{blue}{\sum_{l=1}^L \underline{\delta}_l \cdot \underline{w}_{hl}}
	\end{align*}

	\framebox{$\delta_h = \textcolor{blue}{\sum_{l=1}^L \left( \underline{\delta}_l \cdot \underline{w}_{hl} \right)} \cdot \textcolor{orange}{f'\left( net_h \right)}$}

	\framebox{$ \vartriangle w_{kh} = \eta \cdot \delta_h \cdot \tilde{o}_k $}

	\begin{comment}
	\item[]
	\underline{Kontakt:}
	\\
	[email protected]
	\end{comment}

	\end{itemize}

	\end{document}