software-engineering-basics.tex 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. \documentclass{beamer}
  2. \usetheme{Feather}
  3. % \usecolortheme{default}
  4. \usepackage{hyperref}
  5. \definecolor{links}{HTML}{2A1B81}
  6. \definecolor{green}{HTML}{008000}
  7. \hypersetup{colorlinks,linkcolor=,urlcolor=links}
  8. \usepackage[utf8]{inputenc} % this is needed for german umlauts
  9. \usepackage[english]{babel} % this is needed for german umlauts
  10. \usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf
  11. \usepackage{pifont}
  12. \usepackage{graphicx}
  13. \usepackage{minted} % needed for the inclusion of source code
  14. \usepackage{csquotes}
  15. \newcommand{\FolderOpen}[1][.8\ht\strutbox]{%
  16. \includegraphics[height=#1]{graphics/FolderOpen}%
  17. }
  18. \newcommand{\FileImage}[1][.8\ht\strutbox]{%
  19. \includegraphics[height=#1]{graphics/file}%
  20. }
  21. \begin{document}
  22. \title{Software Engineering Basics}
  23. % \subtitle{A subtitle}
  24. \author{Martin Thoma}
  25. \date{14. August 2018}
  26. \subject{Software Engineering}
  27. \frame{\titlepage}
  28. \section{Intended Audience}
  29. \subsection{A subsection!}
  30. \begin{frame}{Intended Audience}
  31. Software is written by people with different backgrounds and strengths. Not
  32. everybody has a Software Engineering background. Those slides should help
  33. you to get the basics.
  34. \end{frame}
  35. \subsection{A subsection!}
  36. \begin{frame}{Alice and Bob}
  37. \begin{columns}
  38. \begin{column}{0.5\textwidth}
  39. \includegraphics[width=1.0\textwidth]{graphics/car.jpg}
  40. \end{column}
  41. \begin{column}{0.5\textwidth}
  42. \begin{itemize}
  43. \item Project: Build self-driving car
  44. \item Alice is in the US, Bob in Germany
  45. \end{itemize}
  46. \end{column}
  47. \end{columns}
  48. \end{frame}
  49. % \subsection{Version Control}
  50. \begin{frame}{Project structure}
  51. \begin{columns}
  52. \begin{column}{0.5\textwidth}
  53. \begin{itemize}
  54. \item[\FolderOpen] bin
  55. \item[\FolderOpen] docs
  56. \item[\FolderOpen] awesome\_project
  57. \item[\FolderOpen] tests
  58. \item[\FileImage] setup.py
  59. \item[\FileImage] tox.ini
  60. \end{itemize}
  61. \end{column}
  62. \begin{column}{0.5\textwidth}
  63. \begin{itemize}
  64. \item[\textcolor{green}{\ding{58}}] \texttt{\$ grep -rnIi foobar}
  65. \end{itemize}
  66. Details on \href{https://martin-thoma.com/python-projects/}{my blog}.
  67. \end{column}
  68. \end{columns}
  69. \end{frame}
  70. \begin{frame}{git workflow}
  71. \begin{enumerate}
  72. \item \texttt{\$ git clone repostory.git}
  73. \item \texttt{\$ git add filename}
  74. \item \texttt{\$ git commit}
  75. \item \texttt{\$ git push}
  76. \end{enumerate}
  77. \uncover<2>{\includegraphics[width=1.0\textwidth,height=0.5\textheight,keepaspectratio]{graphics/git-workflow.png}}
  78. \end{frame}
  79. \begin{frame}{Version Control: git}
  80. \includegraphics[width=1.0\textwidth]{graphics/git-simple.png}
  81. \end{frame}
  82. \begin{frame}{git log}
  83. \includegraphics[width=0.9\textwidth]{graphics/git-log.png}
  84. \end{frame}
  85. \begin{frame}{git blame}
  86. \includegraphics[width=1.0\textwidth]{graphics/git-blame.png}
  87. \end{frame}
  88. \begin{frame}{git status}
  89. \includegraphics[width=1.0\textwidth]{graphics/git-status.png}
  90. \end{frame}
  91. \begin{frame}{}
  92. \huge We read code MUCH more often than we write it.
  93. \end{frame}
  94. \begin{frame}{git}
  95. Problem: Dirty commit history due to bugs / fixes
  96. \includegraphics[width=1.0\textwidth]{graphics/git-2-fixes.png}
  97. \end{frame}
  98. \begin{frame}{Auto formatting}
  99. \begin{itemize}[<+->]
  100. \item PRs should not be about simple code style
  101. \item Choose one \href{https://martin-thoma.com/python-style-guide/}{style guide}
  102. and stick to it
  103. \item Trailing spaces are just noise - make your editor remove them
  104. automatically.
  105. \end{itemize}
  106. \end{frame}
  107. \begin{frame}{The Zen of Python, by Tim Peters (1)}
  108. Beautiful is better than ugly.\\
  109. Explicit is better than implicit.\\
  110. Simple is better than complex.\\
  111. Complex is better than complicated.\\
  112. Flat is better than nested.\\
  113. Sparse is better than dense.\\
  114. Readability counts.\\
  115. Special cases aren't special enough to break the rules.\\
  116. Although practicality beats purity.\\
  117. Errors should never pass silently.\\
  118. Unless explicitly silenced.
  119. \end{frame}
  120. \begin{frame}{The Zen of Python, by Tim Peters (2)}
  121. In the face of ambiguity, refuse the temptation to guess.\\
  122. There should be one-- and preferably only one --obvious way to do it.\\
  123. Although that way may not be obvious at first unless you're Dutch.\\
  124. Now is better than never.\\
  125. Although never is often better than *right* now.\\
  126. If the implementation is hard to explain, it's a bad idea.\\
  127. If the implementation is easy to explain, it may be a good idea.\\
  128. Namespaces are one honking great idea -- let's do more of those!
  129. \end{frame}
  130. \begin{frame}{commit squashing}
  131. \begin{columns}
  132. \begin{column}{0.5\textwidth}
  133. \textbf{Commit squashing}
  134. Making multiple commits in a row become one
  135. \end{column}
  136. \begin{column}{0.5\textwidth}
  137. \includegraphics[height=0.5\textheight]{graphics/commit-squashing.png}
  138. {\small Image source: \href{https://stevenschwenke.de/GitToSquashOrNotToSquash}{stevenschwenke.de}}
  139. \end{column}
  140. \end{columns}
  141. \end{frame}
  142. \begin{frame}{git merge}
  143. \includegraphics[width=1.0\textwidth,height=0.5\textheight,keepaspectratio]{graphics/git-merge.png}
  144. \end{frame}
  145. \begin{frame}{git merge vs git rebase}
  146. \includegraphics[width=1.0\textwidth,height=0.8\textheight,keepaspectratio]{graphics/git-rebase.png}
  147. {\small Image source: \href{https://jeffkreeftmeijer.com/git-rebase/}{Jeff Kreeftmeijer}}
  148. \end{frame}
  149. \section{Bugs}
  150. \begin{frame}{Logic Bugs: Functions and McCabe}
  151. \inputminted[linenos,
  152. numbersep=7pt,
  153. fontsize=\footnotesize, tabsize=4]{python}{src/sieve-v1.py}
  154. \end{frame}
  155. \begin{frame}{Logic Bugs: Names}
  156. \inputminted[linenos,
  157. numbersep=7pt,
  158. fontsize=\footnotesize, tabsize=4,
  159. firstline=4]{python}{src/sieve-v2.py}
  160. \end{frame}
  161. \begin{frame}{Logic Bugs: Doctests!}
  162. \inputminted[linenos,
  163. numbersep=7pt,
  164. fontsize=\footnotesize, tabsize=4,
  165. firstline=8, lastline=24]{python}{src/sieve-v3.py}
  166. \end{frame}
  167. \section{Falsehood Data Scientists Beliefes}
  168. \begin{frame}{Falsehood Data Scientists Beliefes}
  169. \begin{centering}
  170. (1) Floating point numbers always look like this: 1.23456 or 0.000004577 or 12345.467765.
  171. \end{centering}
  172. \uncover<2->{
  173. \begin{itemize}
  174. \item<2-> Scientific notation: \texttt{4.577E-5} or \texttt{1.2345467765E4}
  175. \item<3-> German decimal format: \texttt{1,23456} or \texttt{0,000004577}
  176. \end{itemize}
  177. }
  178. \end{frame}
  179. \begin{frame}{Falsehood Data Scientists Beliefes}
  180. \begin{centering}
  181. (2.1) Country names have an unique representation
  182. \end{centering}
  183. \uncover<2>{
  184. \enquote{Germany} vs \enquote{Deutschland}
  185. }
  186. \end{frame}
  187. \begin{frame}{Falsehood Data Scientists Beliefes}
  188. \begin{centering}
  189. (2.2) Country names have an unique representation in English
  190. \end{centering}
  191. \uncover<2>{
  192. \enquote{United Kingdom} vs \enquote{UK}
  193. }
  194. \end{frame}
  195. \begin{frame}{Falsehood Data Scientists Beliefes}
  196. \begin{centering}
  197. (2.3) Country names have an unique unabreviated representation in English
  198. \end{centering}
  199. \uncover<2>{
  200. \enquote{United Kingdom} vs \enquote{Great Britain} vs \enquote{England}
  201. }
  202. \uncover<3>{Solution: Use/Demand \textbf{ISO 3166-1 alpha-3 country codes} everywhere}
  203. \end{frame}
  204. \begin{frame}{Falsehood Data Scientists Beliefes}
  205. \begin{centering}
  206. (3) Data is clean
  207. \end{centering}
  208. \uncover<2->{
  209. No.
  210. \begin{itemize}
  211. \item<3-> User database: Birth date in the year 3.
  212. \item<4-> User database: Active user who is more than 90 years old.
  213. \item<5-> User database: User who is younger than 6.
  214. \end{itemize}
  215. }
  216. \end{frame}
  217. \begin{frame}{Falsehood Data Scientists Beliefes}
  218. \begin{centering}
  219. (4) Time has no beginning and no end
  220. \end{centering}
  221. \uncover<2->{
  222. Unix Time Stamp: Seconds since 1st of January, 1970. Stored in unsigned int.
  223. }
  224. \end{frame}
  225. \begin{frame}{Falsehood Data Scientists Beliefes}
  226. \begin{centering}
  227. (4) To avoid the Year-2038 problem, I can store \texttt{YYYY-mm-dd HH:MM:ss}
  228. \end{centering}
  229. \uncover<2->{
  230. \begin{itemize}
  231. \item<2-> \href{http://strftime.org/}{Python's strftime directives}
  232. \item<3-> Timezones
  233. \item<4-> Whenever possible, store the timezone and use \textbf{ISO 8601}:
  234. \texttt{2012-04-23T18:25:43.511+02:30} (\href{https://stackoverflow.com/a/15952652/562769}{reasons})
  235. \end{itemize}
  236. }
  237. \end{frame}
  238. \begin{frame}{Falsehood Data Scientists Beliefes}
  239. \begin{centering}
  240. (5) The (physical) unit of a column / an API can be guessed.
  241. \end{centering}
  242. \uncover<2->{
  243. \begin{itemize}
  244. \item<2-> Clarify it
  245. \item<3-> See if the distribution / quantiles are reasonable
  246. \item<4-> Internally, use unit library \href{http://pint.readthedocs.io/en/latest/}{Pint}
  247. \end{itemize}
  248. }
  249. \end{frame}
  250. \begin{frame}{See also}
  251. \begin{itemize}
  252. \item git
  253. \begin{itemize}
  254. \item \href{http://meldmerge.org/}{meld}: Tool for diff and merge (\texttt{\$ git mergetool})
  255. \item \href{https://nvie.com/posts/a-successful-git-branching-model/}{A successful Git branching model}
  256. \end{itemize}
  257. \item \href{https://www.youtube.com/watch?v=8SNaW1nt6j0}{Debugging Python with ipdb and Sypder} - starting at 4:00
  258. \item \href{https://docs.python.org/3/library/profile.html}{cprofile}: Check where code improvements are effective
  259. \item David Goldberg: \href{https://www.itu.dk/~sestoft/bachelor/IEEE754_article.pdf}{What Every Computer Scientist Should Know About Floating-Point Arithmetic}
  260. \item \href{https://martin-thoma.com/testing-python-code/}{Testing with Python}
  261. \item \href{https://martin-thoma.com/logging-in-python/}{Logging with Python}
  262. \item UML: \href{https://www.websequencediagrams.com/}{Sequence diagrams}, Flow charts (e.g. \href{https://wiki.gnome.org/Apps/Dia/}{Dia} or \href{https://www.draw.io/}{draw.io})
  263. \item \href{https://balsamiq.com/}{Balsamiq}: Draft an UI
  264. \item Web: \href{https://gist.github.com/alexserver/2fcc26f7e1ebcfc9f6d8}{REST basics}
  265. \end{itemize}
  266. \end{frame}
  267. \end{document}
  268. Bare excepts are only correct when trying to capture and forward exceptions - in all other cases, except Exception should be used to avoid catching KeyboardInterrupt.