PageRank.tex 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. \subsection{Idea}
  2. \begin{frame}{Basics of PageRank}
  3. We all know that:
  4. \begin{itemize}[<+->]
  5. \item humans know what is good for them
  6. \item[\xmark] machines don't know what's good for humans
  7. \item humans create websites
  8. \item humans will only \href{http://en.wikipedia.org/wiki/Hyperlink}{link} to websites they like
  9. \item[$\Rightarrow$] hyperlinks are a quality indicator
  10. \end{itemize}
  11. \end{frame}
  12. \begin{frame}{How Could We Use That?}
  13. \begin{itemize}[<+->]
  14. \item simply count number of links to a website
  15. \item[\xmark] 10,000 links from only one page
  16. \item count number of websites that link to a website
  17. \item[\xmark] quality of the linking website matters
  18. \item[\xmark] total number of links on the source page matters
  19. \end{itemize}
  20. \end{frame}
  21. \framedgraphic{A Brilliant Idea}{../images/BrinPage.jpg}
  22. \begin{frame}{Ideas of PageRank}
  23. \begin{itemize}[<+->]
  24. \item decisions of humans are complicated
  25. \item a lot of webpages get visited
  26. \item[$\Rightarrow$] modellize clicks on links as random behaviour
  27. \item links are important
  28. \begin{itemize}
  29. \item links of page A get less important, if A has many links
  30. \item links of page A get more important, if many link to A
  31. \end{itemize}
  32. \item[$\Rightarrow$] if B has a link from A, the rank of B increases by $\frac{Rank(A)}{Links(A)}$
  33. \end{itemize}
  34. \pause[\thebeamerpauses]
  35. \begin{algorithmic}
  36. \If{A links to B}
  37. \State $Rank(B)$ += $\frac{Rank(A)}{Links(A)}$
  38. \EndIf
  39. \end{algorithmic}
  40. \end{frame}
  41. \begin{frame}{What is PageRank?}
  42. The PageRank algorithm calculates the probability of a randomly
  43. clicking user ending up on a given page.
  44. \end{frame}
  45. \input{Animation}
  46. %\begin{frame}{Ants}
  47. % \begin{itemize}[<+->]
  48. % \item Websites = nodes = anthill
  49. % \item Links = edges = paths
  50. % \item You place ants on each node
  51. % \item They walk over the paths
  52. % \item[] (at random, they are ants!)
  53. % \item After some time, some anthills will have more ants than
  54. % others
  55. % \item Those hills are more attractive than others
  56. % \item \# ants is probability that a random user would end on
  57. % a website
  58. % \end{itemize}
  59. %\end{frame}
  60. \begin{frame}{Mathematics}
  61. Let $x$ be a web page. Then
  62. \begin{itemize}
  63. \item $L(x)$ is the set of websites that link to $x$
  64. \item $C(y)$ is the out-degree of page $y$
  65. \item $\alpha$ is probability of random jump
  66. \item $N$ is the total number of websites
  67. \end{itemize}
  68. \[\displaystyle PR(x) := \alpha \left ( \frac{1}{N} \right ) + (1-\alpha) \sum_{y\in L(x)} \frac{PR(y)}{C(y)}\]
  69. \end{frame}
  70. \begin{frame}{Pseudocode}
  71. \begin{algorithmic}
  72. \alertline<1> \Function{PageRank}{Graph $web$, double $q=0.15$, int $iterations$} %q is a damping factor
  73. %\alertline<2> \ForAll{$page \in web$}
  74. %\alertline<3> \State $page.pageRank = \frac{1}{|web|}$ \Comment{intial probability}
  75. %\alertline<2> \EndFor
  76. \alertline<2> \While{$iterations > 0$}
  77. \alertline<3> \ForAll{$page \in web$} \Comment{calculate pageRank of $page$}
  78. \alertline<4> \State $page.pageRank = q$
  79. \alertline<5> \ForAll{$y \in L(page)$}
  80. \alertline<6> \State $page.pageRank$ += $\frac{y.pageRank}{C(y)}$
  81. \alertline<5> \EndFor
  82. \alertline<3> \EndFor
  83. \alertline<2> \State $iterations$ -= $1$
  84. \alertline<2> \EndWhile
  85. \alertline<1> \EndFunction
  86. \end{algorithmic}
  87. \end{frame}