PageRank.tex 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. \subsection{Idea}
  2. \begin{frame}{Basics}
  3. \begin{itemize}[<+->]
  4. \item Humans know what is good for them
  5. \item Humans create Websites
  6. \item Humans will only \href{http://en.wikipedia.org/wiki/Hyperlink}{link} to Websites they like
  7. \item[$\Rightarrow$] Hyperlinks are a quality indicator
  8. \end{itemize}
  9. \end{frame}
  10. \begin{frame}{How could we use that?}
  11. \begin{itemize}[<+->]
  12. \item Simply count number of links to a Website
  13. \item[\xmark] 10,000 links from only one page
  14. \item Count numbers of Websites that link to a Website
  15. \item[\xmark] Quality of the page matters
  16. \item[\xmark] Total number of links on the source page matters
  17. \end{itemize}
  18. \end{frame}
  19. \framedgraphic{A brilliant idea}{../images/BrinPage.jpg}
  20. \begin{frame}{Ideas of PageRank}
  21. \begin{itemize}[<+->]
  22. \item Decisions of humans are complicated
  23. \item A lot of webpages get visited
  24. \item[$\Rightarrow$] modellize clicks on links as random behaviour
  25. \item Links are important
  26. \item Links of page A get less important, if A has many links
  27. \item Links of page A get more important, if many link to A
  28. \item[$\Rightarrow$] if B has a link from A, the rank of B increases by $\frac{Rank(A)}{Links(A)}$
  29. \end{itemize}
  30. \pause[\thebeamerpauses]
  31. \begin{algorithmic}
  32. \If{A links to B}
  33. \State $Rank(B)$ += $\frac{Rank(A)}{Links(A)}$
  34. \EndIf
  35. \end{algorithmic}
  36. \end{frame}
  37. \begin{frame}{Ants}
  38. \begin{itemize}[<+->]
  39. \item Websites = nodes = anthill
  40. \item Links = edges = paths
  41. \item You place ants on each node
  42. \item They walk over the paths
  43. \item[] (at random, they are ants!)
  44. \item After some time, some anthills will have more ants than
  45. others
  46. \item Those hills are more attractive than others
  47. \item \# ants is probability that a random user would end on
  48. a website
  49. \end{itemize}
  50. \end{frame}
  51. \begin{frame}{Mathematics}
  52. Let $x$ be a web page. Then
  53. \begin{itemize}
  54. \item $L(x)$ is the set of Websites that link to $x$
  55. \item $C(y)$ is the out-degree of page $y$
  56. \item $\alpha$ is probability of random jump
  57. \item $N$ is the total number of websites
  58. \end{itemize}
  59. \[\displaystyle PR(x) := \alpha \left ( \frac{1}{N} \right ) + (1-\alpha) \sum_{y\in L(x)} \frac{PR(y)}{C_{y}}\]
  60. \end{frame}
  61. \begin{frame}{Pseudocode}
  62. \begin{algorithmic}
  63. \alertline<1> \Function{PageRank}{Graph $web$, double $q=0.15$, int $iterations$} %q is a damping factor
  64. \alertline<2> \ForAll{$page \in G$}
  65. \alertline<3> \State $page.pageRank = \frac{1}{|G|}$ \Comment{intial probability}
  66. \alertline<2> \EndFor
  67. \alertline<4> \While{$iterations > 0$}
  68. \alertline<5> \ForAll{$page \in G$} \Comment{calculate pageRank of $page$}
  69. \alertline<6> \State $page.pageRank = q$
  70. \alertline<7> \ForAll{$y \in L(page)$}
  71. \alertline<8> \State $page.pageRank$ += $\frac{y.pageRank}{C(y)}$
  72. \alertline<7> \EndFor
  73. \alertline<5> \EndFor
  74. \alertline<4> \State $iterations$ -= $1$
  75. \alertline<4> \EndWhile
  76. \alertline<1> \EndFunction
  77. \end{algorithmic}
  78. \end{frame}