%--*- latex -*-----------------------------------------------------------------
%$Author: saulius $
%$Date: 2020-06-04 14:58:32 +0300 (Thu, 04 Jun 2020) $ 
%$Revision: 1524 $
%$URL: svn+ssh://saulius-grazulis.lt/home/saulius/svn-repositories/seminarai/2020-verifikacjos-seminarui/slides.tex $
%------------------------------------------------------------------------------

\documentclass[mathserif]{beamer}
\usetheme{Warwick}
\useoutertheme{infolines}
\setbeamertemplate{headline}{} % removes the headline the infolines inserts
%\setbeamertemplate{footline}[frame number]
\renewcommand\familydefault{\rmdefault}
% For XeLaTeX:
% https://tex.stackexchange.com/questions/452151/how-do-i-render-the-word-v%C7%ABlundarkvi%C3%B0a-with-bookman-and-xelatex
% "Use an OpenType clone of Bookman, for instance TeX Gyre Bonum":
\usepackage{fontspec}
\setmainfont{TeX Gyre Bonum}

\usepackage[style=authoryear,maxnames=1,doi=true,url=true,backend=biber]{biblatex}
%\addbibresource{bibliography/citations.bib}
%% \addbibresource{bibliography/Intel.bib}
%% \addbibresource{bibliography/Jorgensen.bib}
\addbibresource{bibliography/AMD.bib}
\addbibresource{bibliography/Zhmakin.bib}
\addbibresource{bibliography/Gustafson.bib}
\addbibresource{bibliography/Coleman.bib}
\addbibresource{bibliography/Engelen.bib}
\addbibresource{bibliography/Goldberg.bib}
\addbibresource{bibliography/IEEE.bib}
\addbibresource{bibliography/Cody.bib}
\newcommand{\mycite}{\parencite}


\usepackage{colordvi}
\usepackage{graphicx}
\usepackage{tikz}
\usetikzlibrary{snakes}
\usepackage{chemfig}
\usepackage{listings}
% https://tex.stackexchange.com/questions/212069/listings-cannot-load-requested-language
\lstset{defaultdialect=[x86masm]Assembler}

% https://en.wikibooks.org/wiki/LaTeX/Algorithms
% http://mirror.datacenter.by/pub/mirrors/CTAN/macros/latex/contrib/algorithmicx/algorithmicx.pdf
\usepackage{algpseudocode}
\usepackage{algorithm}
\usepackage{amssymb}

\include{commands}

\newcommand{\RCSid}[1]{\fontsize{7pt}{7pt}\selectfont $#1$ \today}


%%BEGIN LANGUAGE en
\title{Floating point numbers}
%%END LANGUAGE en


\author{Saulius Gražulis}

\date{Vilnius, \the\year}

% Define colors as in
% https://venngage.com/blog/color-blind-friendly-palette/ ``Retro''
\definecolor{Bluish}{HTML}{63ACBE}
\definecolor{Magentish}{HTML}{601A4A}
\definecolor{Orangish}{HTML}{EE442F}

\begin{document}

\colorlet{IdentifierColor}{red!40!black}
\colorlet{StringColor}{green!70!black}
\colorlet{KwdColor}{Bluish}
\colorlet{CommentColor}{Orangish}

\colorlet{SignColor}{Bluish}
\colorlet{ExponentColor}{Magentish}
\colorlet{SignificandColor}{Orangish}

\colorlet{SC}{SignColor}
\colorlet{EC}{ExponentColor}
\colorlet{FC}{SignificandColor} % a.k.a. ``Fraction Color''

%------------------------------------------------------------------------------

\begin{frame}
    \titlepage

\input{affiliation}
    
    \begin{center}
      \mbox{}
      \hfill\hfill\hfill
      \includegraphics[height=1.5cm]{images/sp_VU_zenklas.eps}
      \hfill
      \includegraphics[height=1.5cm]{images/2019-05-02_Melynas_MIF-zenklas242x244.png}
      \hfill\hfill\hfill
      \mbox{}
    \end{center}

    \vfill

    %% \tiny
    %% \RCSid{
    %%   $Id: slides.tex 1524 2020-06-04 11:58:32Z saulius $
    %% }
    \begin{flushright}
      \begin{minipage}[c]{0.67\textwidth}
        \tiny\raggedright
        %%BEGIN LANGUAGE en
        This set of slides may be copied and used as specified in the
        %%END LANGUAGE en
        \myhref{http://creativecommons.org/licenses/by-sa/4.0/}{Attribution-ShareAlike
          4.0 International}
license
      \end{minipage}
      %%
      \begin{minipage}[c]{1.5cm}
        \myhref{http://creativecommons.org/licenses/by-sa/4.0/}{
          \includegraphics[width=1.5cm]{images/CC-BY-SA.eps}
        }
      \end{minipage}
    \end{flushright}

\end{frame}

%==============================================================================

\begin{frame}
\frametitle{Scientific number notation}

  $$
  602\underbrace{00\dots0}_{21
\textrm{\ times}
  } = 6.02\times10^{23}
  $$

  $$
  \pm d_0 . d_1 d_2 \dots d_{p-1} \times \beta^e =  
  \sum_{i=0}^{p-1} d_i \beta^{-i} \times \beta^e, (0 \le d_i < \beta) 
  $$

  \begin{center}
    $
    \underbrace{602 \times 10^{21}}_{
unnormalised
    } =
    \underbrace{6.02 \times 10^{23}}_{
normalised
    } =
    \underbrace{0.602 \times 10^{24}}_{
unnormalised
    }
    $
  \end{center}
  
\end{frame}

%------------------------------------------------------------------------------

%% \begin{frame}
%%   %%LANGUAGE en \frametitle{Fixed point numbers}
%%   %%LANGUAGE lt \frametitle{Fiksuoto kablelio skaičiai}
%%   %%LANGUAGE ru \frametitle{Числа с фиксированной запятой}
%% 
%% \end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Floating point numbers}

  $$
  \textcolor{SC}{\pm} \textcolor{FC}{d_0 . d_1 d_2 \dots d_{p-1}} \times \beta^{\textcolor{EC}{e}} =  
  \textcolor{SC}{\pm} \sum_{i=0}^{p-1} \textcolor{FC}{d_i \beta^{-i}} \times \beta^{\textcolor{EC}{e}},
  (0 \le d_i < \beta) 
  $$

  $$
  \begin{array}{rl}
    \beta &= 2 \\
  \end{array}
  $$

  $$
  0.1_{10} \approx \textcolor{SC}{+} \textcolor{FC}{1.10011001100110011001101_2} \times 2^{\textcolor{EC}{-4}}
  $$
  
  \begin{itemize}
  \item
    \color{SC}
    %%BEGIN LANGUAGE en
    Sign (of the significand)
    %%END LANGUAGE en
  \item
    \color{EC}
    %%BEGIN LANGUAGE en
    Exponent
    %%END LANGUAGE en
  \item
    \color{FC}
    %%BEGIN LANGUAGE en
    Significand (mantissa, fraction)
    %%END LANGUAGE en
  \end{itemize}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{IEEE 754 Standard}

  \begin{itemize}
  \item
    %%BEGIN LANGUAGE en
    Binary ($\beta = 2$, \mycite{IEEE1985a}) and decimal ($\beta = 10$,
    \mycite{IEEE2008}) formats
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Single, double, single-extended and double-extended precision
    \mycite{IEEE1985a}, half-precision (16 bits), quad precision (128
    bits) and octuple precision (256 bits) and longer interchange
    formats \mycite{IEEE2008}
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Special values: not-a-number(s) (NaN), infinities ($\pm\infty$),
    signed zeroes ($\pm0$)
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Denormalised numbers
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Roundoff control
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Masked exceptions
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Specifies precision of operations
    %%END LANGUAGE en
  \end{itemize}

\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{IEEE 754 Standard encoding}

  \begin{itemize}
  \item
    %%BEGIN LANGUAGE en
    Significand: represented as \textit{signed magnitude}
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Exponent: uses \textit{biased} representation (excess $2^{n-1}-1$ for $n$
    exponent bits)
    %%END LANGUAGE en
  \item
Exponent range: $-(2^{n-1}-2) - +(2^{n-1}-1)$\newline{} (e.g. $-126 - +127$ for the 8 bit eksponent)
  \item
    %%BEGIN LANGUAGE en
    Fraction (significand): hidden (assumed) bit used for normalised numbers
    %%END LANGUAGE en
  \end{itemize}

  \vspace{-\baselineskip}
  $$
  0.1_{10} \approx \textcolor{gray}{1}.10011001100110011001101_2 \times 2^{-4}
  $$

Example: $0.1$ in single precision FP:
  
  \vspace{0.5\baselineskip}
  p: $23+1$
bit
  e: $-126 - 127$ (8
bits
  );
bias
  = $2^{8-1}-1 = 128 - 1 = 127$
  {
    \raggedright
    \tt
    $f =$ \textcolor{gray}{1.}\textcolor{FC}{10011001100110011001101} \\
    $e =$ \textcolor{EC}{127 + (-4) = $\tt 123_{10}$ = $\tt 01111011_2$}
  }

  \begin{center}
    \tt
    \textcolor{SC}{0} \textcolor{EC}{01111011} \textcolor{FC}{10011001100110011001101}
  \end{center}

\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Normal(ised) numbers}

  $$
  0.15625_{10} = \underbrace{0.00101_2}_{\text{
not normalised
  }} = \textcolor{SC}{\pm}
  \textcolor{gray}{1.}\textcolor{FC}{01} \times 2^{\textcolor{EC}{-3}} 
  $$

  $$
  e = 127 + (-3) = 124_{10} = \textcolor{EC}{01111100}_2
  $$

Representation:

  Float 32 (float; single precision):

  \begin{center}
    \texttt{
      \textcolor{SC}{0} \textcolor{EC}{01111100}
      $
      \tt
      \textcolor{FC}{01\underbrace{\tt 00\dots0}_{\text{
21 zero
      }}}
      $
    }
  \end{center}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Denormalised numbers}

  \begin{center}
    $$
    1.0_2 \times 2^{-130_{10}}
    $$
  \end{center}

For a single precision floating point,

  $$
  e_{\text{min}} = -126_{10} \Rightarrow \text{
Can not normalise!
  }
  $$

Note that:

  $$
  e_{\text{min}} + \text{
bias
  } = 127_{10} + (-126_{10}) = 1 = \tt 0000\_0001_2
  $$
  
  \vspace{0.5\baselineskip}
  %%BEGIN LANGUAGE en
  For the \texttt{0000\_0000} biased exponent, interpretation is changed:
  %%END LANGUAGE en

  \vspace{-\baselineskip}
  $$
  \tt
  \textcolor{SC}{0}\ \textcolor{EC}{\tt 0000\,0000}\ %
  \textcolor{FC}{
    \tt
    0001\!\!\underbrace{\tt 00\dots0}_{\text{
19 zeros
    }}
  }
  =
  \textcolor{SC}{+} \textcolor{FC}{0.0001_2} \times
  2^{\textcolor{EC}{\mathbf{-126_{10}}}}
  = 
  \textcolor{SC}{+} \textcolor{FC}{0.0625_{10}} \times
  2^{\textcolor{EC}{\mathbf{-126_{10}}}}
  $$
  
  \vspace{0.5\baselineskip}
  %%BEGIN LANGUAGE en
  exponent is $-126$, \textbf{not} $-127$ !
  %%END LANGUAGE en
  \vspace{\baselineskip}

\end{frame}

%------------------------------------------------------------------------------

\begin{frame}[containsverbatim]
\frametitle{Why bother about denormalised numbers}

  %%BEGIN LANGUAGE en
  Gradual underflow
  %%END LANGUAGE en

  \begin{center}
    \includegraphics[width=13cm,trim=1cm 2.5cm 1cm 9.5cm,clip]{images/floating-point/2008_Engelen_1_p6.pdf}
  \end{center}

  \vspace{-0.5\baselineskip}
  \rightline{\mycite{Engelen2008}}

  \begin{lstlisting}[language=C,frame=trBL,basicstyle=\ttfamily]
    if (a != b) { x = a/(a-b); }
  \end{lstlisting}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}[containsverbatim]
\frametitle{Zeroes}

  $$
  \tt
  \textcolor{SC}{0}\ \textcolor{EC}{\tt 0000\,0000}\ %
  \textcolor{FC}{
    \tt
    \underbrace{\tt 00\dots0}_{\text{
23 zeros
    }}
  }
  = 0
  $$
  
  $$
  \tt
  \textcolor{SC}{1}\ \textcolor{EC}{\tt 0000\,0000}\ %
  \textcolor{FC}{
    \tt
    \underbrace{\tt 00\dots0}_{\text{
23 zeros
    }}
  }
  = -0
  $$  

  $$
  \frac{1}{0} = \infty
  $$

  $$
  \frac{1}{-0} = -\infty
  $$

  \vspace{\baselineskip}
  \begin{lstlisting}[language=C,frame=trBL,basicstyle=\ttfamily]
    if (a > b) { x = log(a-b); }
  \end{lstlisting}
  \rightline{\mycite{Engelen2008}}

\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Infinities}

  %%BEGIN LANGUAGE en
  Still not used the all 1s exponent, \texttt{1111\_1111}
  %%END LANGUAGE en

  $$
  \tt
  \textcolor{SC}{0}\ \textcolor{EC}{\tt 1111\,1111}\ %
  \textcolor{FC}{
    \tt
    \underbrace{\tt 00\dots0}_{\text{
23 zeros
    }}
  }
  = \infty
  $$
  
  $$
  \tt
  \textcolor{SC}{1}\ \textcolor{EC}{\tt 1111\,1111}\ %
  \textcolor{FC}{
    \tt
    \underbrace{\tt 00\dots0}_{\text{
23 zeros
    }}
  }
  = -\infty
  $$  

  $$
  \frac{1}{0} = +\infty; \quad \frac{1}{+\infty} = +0
  $$

  $$
  \frac{1}{-0} = -\infty; \quad \frac{1}{-\infty} = -0
  $$

\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Not a number: NaN}

  $$
  \tt
  \textcolor{SC}{0}\ \textcolor{EC}{\tt 1111\,1111}\ %
  \textcolor{FC}{
    \tt
    \underbrace{\tt \textbf{1}1\dots0}_{\text{
not all 23 positions are zeros
    }}
  }
  = \text{qNaN}
  $$
  
  $$
  \tt
  \textcolor{SC}{0}\ \textcolor{EC}{\tt 1111\,1111}\ %
  \textcolor{FC}{
    \tt
    \underbrace{\tt \textbf{0}1\dots0}_{\text{
not all 23 positions are zeros
    }}
  }
  = \text{sNaN}
  $$

  \vspace{0.3\baselineskip}
Operations that produce NaN:

  \begin{center}
    \small
    \begin{tabular}{ll}
      \hline
Operation
      &
NaN produced by
      \\
      \hline
      $+$ & $\infty + (-\infty)$ \\
      $\times$ & $0 \times \infty$ \\
      $/$ & $0/0$, $\infty/\infty$ \\
      rem & $0\;\text{rem}\;0$, $\infty\;\text{rem}\;\infty$ \\
      $\sqrt{\mbox{}}$ & $\sqrt{x}\quad \forall x < 0$ \\
      \hline
    \end{tabular}
  \end{center}
  \rightline{\mycite{Goldberg1991}}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Comparison of NaNs}

  \begin{center}
    \begin{itemize}
    \item
      %%BEGIN LANGUAGE en
      Any comparison to NaN returns false $\Rightarrow$ when
      $x<\text{NaN}$ fails, this does not imply $x>=\text{NaN}$
      %%END LANGUAGE en
    \item
      \texttt{!(x < y)} $\not\Leftrightarrow$ \texttt{x >= y}
    \item
      \texttt{(x == y) == FALSE}
when
      \texttt{x == NaN}
    \item
      %%BEGIN LANGUAGE en
      Can not sort array of floats with NaNs
      %%END LANGUAGE en
    \end{itemize}
  \end{center}
  \rightline{\mycite{Engelen2008}}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Using NaNs}

For single precision FP, NaNs contain
  21
``free'' bits
  
For double precision FP, NaNs contain
  50
``free'' bits
  
  $$
  \raisebox{0.6pt}{
    \includegraphics[width=5cm,trim=0cm 0cm 7.67cm 0cm,clip]{images/floating-point/IEEE_754_Double_Float_Nan.eps}
  }
  \hspace{-5mm}
  \underbrace{
    \text{
      \includegraphics[width=5cm,trim=7.8cm 0cm 0cm 0cm,clip]{images/floating-point/IEEE_754_Double_Float_Nan.eps}
    }
  }_{\text{
      \parbox{0.4\linewidth}{
        \raggedright
at least 32 bits free! Enough for a 32-bit pointer...
      }
  }}
  $$

  \begin{itemize}
  \item
    %%BEGIN LANGUAGE en
    Dynamic languages (e.g. JavaScript) use ``boxed NaN values''
    %%END LANGUAGE en
  \item
sNaN is useful for catching uninitialised values
  \item
qNaN can represent unknown/unspecified values
  \end{itemize}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Summary: IEEE 754 special values}

  \begin{center}
    \begin{tabular}{lll}
      \hline
Exponent
      &
Fraction 
      &
Denotes
      \\
      \hline
      $e = e_{\text{min}} - 1$ & $f = 0$ & $\pm 0$ \\
      $e = e_{\text{min}} - 1$ & $f \ne 0$ & $\pm 0.f \times 2^{e_{\text{min}}}$ \\
      $e_{\text{min}} \le e \le e_{\text{max}}$ & $f = \forall n$ & $\pm 1.f \times 2^{e}$ \\
      $e = e_{\text{max}} + 1 $ & $f = 0$ & $\pm\infty$ \\
      $e = e_{\text{max}} + 1 $ & $f \ne 0$, & NaN \\
      \hline
    \end{tabular}
  \end{center}
  \rightline{\mycite{Goldberg1991}}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Single precision FP}

32-bit number
  
  \begin{center}
    \includegraphics[width=11cm]{images/floating-point/Float_example.eps}
  \end{center}
  
  \rightline{
    \scriptsize
    Vectorization:
    \myhref{https://commons.wikimedia.org/wiki/User:Stannered}{Stannered},
    \myhref{http://creativecommons.org/licenses/by-sa/3.0/}{CC BY-SA 3.0}
    via \myhref{https://commons.wikimedia.org/wiki/File:Float_example.svg}{Wikimedia Commons}
  }
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Double precision FP}

64-bit number
  
  \begin{center}
    \includegraphics[width=11cm]{images/floating-point/IEEE_754_Double_Floating_Point_Format.eps}
  \end{center}

  \rightline{
    \scriptsize
    \mywebref{https://en.wikipedia.org/wiki/File:IEEE\_754\_Double\_Floating\_Point\_Format.svg}
  }
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Intel 80 bit extended precision}

  \begin{center}
    \includegraphics[width=11cm]{images/floating-point/X86_Extended_Floating_Point_Format.eps}
  \end{center}
  
  \rightline{\tiny
    \myhref{https://commons.wikimedia.org/w/index.php?title=User:BillF4}{BillF4},
    \myhref{https://creativecommons.org/licenses/by-sa/3.0}{CC BY-SA
      3.0}, via \myhref{https://commons.wikimedia.org/wiki/File:X86_Extended_Floating_Point_Format.svg}{Wikimedia Commons}
  }

  \begin{itemize}
  \item
No hidden bit
  \item
Just enough precision to compute $x^y$
  \item
Intended for intermediate results only
  \end{itemize}

\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Intel x87 FP registers}

  \begin{center}
    \includegraphics[page=320,height=7cm,trim=2cm 16cm 3cm 4.3cm,clip]{images/AMD-dokumentacija/AMD64_Architecture_Programmers_Manual_Vol_1.pdf}
  \end{center}

  \mycite{AMD2017}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Floating point status flags}

  \begin{center}
    \includegraphics[page=322,height=7cm,trim=2cm 12.5cm 2cm 4.3cm,clip]{images/AMD-dokumentacija/AMD64_Architecture_Programmers_Manual_Vol_1.pdf}
  \end{center}

  \mycite{AMD2017}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Floating point control flags}

  \begin{center}
    \includegraphics[page=325,height=6cm,trim=2cm 16cm 2cm 4.3cm,clip]{images/AMD-dokumentacija/AMD64_Architecture_Programmers_Manual_Vol_1.pdf}
  \end{center}

  \mycite{AMD2017}

\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Floating point properties}

  \small
  \begin{itemize}
  \item
Guaranteed precision of single operations

    \begin{quote}
      Except where stated otherwise, every operation shall be
      performed as if it first produced an intermediate result correct
      to infinite precision and with unbounded range, and then rounded
      that result according to one of the attributes in this clause.
    \end{quote}
    \rightline{\scriptsize\mycite{IEEE2019}, sect. 4.3}
  \item
Each representable number has a single representation
  \item
FP numbers are ordered as signed magnitude integers!

    \begin{quote}
      All of the possible single-precision entities are well ordered
      in the natural lexicographic ordering of their machine
      representations interpreted as sign-magnitude binary integers
    \end{quote}
    \rightline{\scriptsize\mycite{Cody1981}}

  \end{itemize}
  
\end{frame}

%% %------------------------------------------------------------------------------
%% 
%% \begin{frame}
%%   %%LANGUAGE en \frametitle{Fused instructions}
%%   %%LANGUAGE lt \frametitle{Sulietos operacijos}
%%   %%LANGUAGE ru \frametitle{Составные операции}
%% 
%% \end{frame}

%------------------------------------------------------------------------------

\lstset{
  keywordstyle=\color{KwdColor},
  commentstyle=\color{CommentColor}\ttfamily,
  identifierstyle=\color{IdentifierColor},
  stringstyle=\color{StringColor},
  basicstyle=\ttfamily\tiny
}

\begin{frame}[containsverbatim]
\frametitle{Example: FP 16-bit code}

  \begin{center}
    \lstinputlisting[language=bash,basicstyle=\ttfamily\small]{examples/floating-point/compiled-examples/16-bit-single-precision/command.sh}
  \end{center}
  
  \hfill
  \begin{minipage}[c]{0.4\textwidth}
    \lstinputlisting[language=C,firstline=8,frame=trBL]{examples/floating-point/compiled-examples/16-bit-single-precision/single-precision.c}
  \end{minipage}
  \hspace{2em}
  \begin{minipage}[c]{0.4\textwidth}
    \lstinputlisting[language=assembler,linerange={7-17},frame=trBL,morekeywords={flds,faddp,fdivrp}]{examples/floating-point/compiled-examples/16-bit-single-precision/single-precision.asm}
  \end{minipage}
  \hfill
  \mbox{}
  
\end{frame}

%------------------------------------------------------------------------------

\lstset{
  keywordstyle=\color{KwdColor},
  commentstyle=\color{CommentColor}\ttfamily,
  identifierstyle=\color{IdentifierColor},
  stringstyle=\color{StringColor},
  basicstyle=\ttfamily\tiny
}

\begin{frame}[containsverbatim]
\frametitle{Example: FP 64-bit code}

  \begin{center}
    \lstinputlisting[language=bash,basicstyle=\ttfamily\small]{examples/floating-point/compiled-examples/64-bit-single-precision/command.sh}
  \end{center}

  \hfill
  \begin{minipage}[c]{0.4\textwidth}
    \lstinputlisting[language=C,firstline=8,frame=trBL]{examples/floating-point/compiled-examples/64-bit-single-precision/single-precision.c}
  \end{minipage}
  \hspace{2em}
  \begin{minipage}[c]{0.4\textwidth}
    \lstinputlisting[language=assembler,linerange={6-14},frame=trBL,morekeywords={movaps,addss,mulss,divss}]{examples/floating-point/compiled-examples/64-bit-single-precision/single-precision.asm}
  \end{minipage}
  \hfill
  \mbox{}
  
\end{frame}

%------------------------------------------------------------------------------

%% \begin{frame}
%%   %%LANGUAGE en \frametitle{Floating point comparisons}
%%   %%LANGUAGE lt \frametitle{Slankaus kablelio palyginimai}
%%   %%LANGUAGE ru \frametitle{Сравнения чисел с п.з.}
%% 
%% \end{frame}

%------------------------------------------------------------------------------

%% \begin{frame}
%%   %%LANGUAGE en \frametitle{Example: Kahan summation}
%%   %%LANGUAGE lt \frametitle{Pavyzdys: Kahano sumavimo algoritmas}
%%   %%LANGUAGE ru \frametitle{Пример: суммирование Кэхэна}
%% 
%% 
%%   {
%%     \scriptsize
%%     %%BEGIN LANGUAGE en
%%     \rightline{\mywebref{https://en.wikipedia.org/wiki/Kahan\_summation\_algorithm}}
%%     %%END LANGUAGE en
%%     %%BEGIN LANGUAGE lt
%%     \rightline{\mywebref{https://lt.wikipedia.org/wiki/Kahano\_sudėties\_algoritmas}}
%%     %%END LANGUAGE lt
%%     %%BEGIN LANGUAGE ru
%%     \rightline{\mywebref{https://ru.wikipedia.org/wiki/Алгоритм\_Кэхэна}}
%%     %%END LANGUAGE ru
%%   }
%% \end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Floating point alternatives}

  \begin{itemize}
  \item
    %%BEGIN LANGUAGE en
    Rational arithmetic
    %%END LANGUAGE en


  \item
    %%BEGIN LANGUAGE en
    Tapered floating point
    %%END LANGUAGE en


  \item
    %%BEGIN LANGUAGE en
    J. Gustafson's Unum number system
    %%END LANGUAGE en
    \mycite{Gustafson2015}
  \item
    %%BEGIN LANGUAGE en
    Logarithmic number systems
    %%END LANGUAGE en
    \mycite{Coleman2008,Ismail2011}
  \end{itemize}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}
\frametitle{Take home messages}

  \begin{itemize}
  \item
    %%BEGIN LANGUAGE en
    Floating point numbers approximate mathematical real numbers
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Usually a normalised representation is used
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Special codes are used for denormalised numbers, infinities, NaNs,
    $\pm 0$
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Each IEEE 754 floating point (FP) entity has a unique bit representation, and
    each bit representation represents an FP entity
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Some FP objects (e.g. NaNs) have mathematical properties (e.g. in
    comparisons) different from those of regular real numbers
    %%END LANGUAGE en
  \item
    %%BEGIN LANGUAGE en
    Alternatives and further developments of FP arithmetic are being researched
    %%END LANGUAGE en
  \end{itemize}
  
\end{frame}

%------------------------------------------------------------------------------

\begin{frame}%%[allowframebreaks]
\frametitle{References}

  \setmainfont{Liberation Serif}
  \renewcommand{\bibfont}{\scriptsize}
  \printbibliography

\end{frame}

%------------------------------------------------------------------------------                                                                                                          
\end{document}
% 2021-11-29 10:13:37 EET
