| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- \subsection{How can we use this massive amout of information?}
- \begin{frame}{How can we use this massive amout of information?}
- \begin{itemize}[<+->]
- \item 625.3 million websites
- \item Wikipedia is one website and has several millions of pages
- \item[$\Rightarrow$] we need to rank websites!
- \end{itemize}
- \end{frame}
- \subsection{Idea}
- \begin{frame}{Basics of PageRank}
- We all know that:
- \begin{itemize}[<+->]
- \item humans know what is good for them
- \item[\xmark] machines don't know what's good for humans
- \item humans create websites
- \item humans will only \href{http://en.wikipedia.org/wiki/Hyperlink}{link} to websites they like
- \item[$\Rightarrow$] hyperlinks are a quality indicator
- \end{itemize}
- \end{frame}
- \begin{frame}{How Could We Use That?}
- \begin{itemize}[<+->]
- \item simply count number of links to a website
- \item[\xmark] 10,000 links from only one page
- \item count number of websites that link to a website
- \item[\xmark] quality of the linking website matters
- \item[\xmark] total number of links on the source page matters
- \end{itemize}
- \end{frame}
- \framedgraphic{A Brilliant Idea}{../images/BrinPage.jpg}
- \begin{frame}{Ideas of PageRank}
- \begin{itemize}[<+->]
- \item decisions of humans are complicated
- \item a lot of webpages get visited
- \item[$\Rightarrow$] modellize clicks on links as random behaviour
- \item links are important
- \begin{itemize}
- \item links of page A get less important, if A has many links
- \item links of page A get more important, if many link to A
- \end{itemize}
- \item[$\Rightarrow$] if B has a link from A, the rank of B increases by $\frac{Rank(A)}{Links(A)}$
- \end{itemize}
- \pause[\thebeamerpauses]
- \begin{algorithmic}
- \If{A links to B}
- \State $Rank(B)$ += $\frac{Rank(A)}{Links(A)}$
- \EndIf
- \end{algorithmic}
- \end{frame}
- \begin{frame}{What is PageRank?}
- The PageRank algorithm calculates the probability of a randomly
- clicking user ending up on a given page.
- \end{frame}
- \input{Animation}
- %\begin{frame}{Ants}
- % \begin{itemize}[<+->]
- % \item Websites = nodes = anthill
- % \item Links = edges = paths
- % \item You place ants on each node
- % \item They walk over the paths
- % \item[] (at random, they are ants!)
- % \item After some time, some anthills will have more ants than
- % others
- % \item Those hills are more attractive than others
- % \item \# ants is probability that a random user would end on
- % a website
- % \end{itemize}
- %\end{frame}
- \begin{frame}{Mathematics}
- Let $x$ be a web page. Then
- \begin{itemize}
- \item $L(x)$ is the set of websites that link to $x$
- \item $C(y)$ is the out-degree of page $y$
- \item $\alpha$ is probability of random jump
- \item $N$ is the total number of websites
- \end{itemize}
- \[\displaystyle PR(x) := \alpha \left ( \frac{1}{N} \right ) + (1-\alpha) \sum_{y\in L(x)} \frac{PR(y)}{C(y)}\]
- \end{frame}
- \begin{frame}{Pseudocode}
- \begin{algorithmic}
- \alertline<1> \Function{PageRank}{Graph $web$, double $q=0.15$, int $iterations$} %q is a damping factor
- %\alertline<2> \ForAll{$page \in web$}
- %\alertline<3> \State $page.pageRank = \frac{1}{|web|}$ \Comment{intial probability}
- %\alertline<2> \EndFor
- \alertline<2> \While{$iterations > 0$}
- \alertline<3> \ForAll{$page \in web$} \Comment{calculate pageRank of $page$}
- \alertline<4> \State $page.pageRank = q$
- \alertline<5> \ForAll{$y \in L(page)$}
- \alertline<6> \State $page.pageRank$ += $\frac{y.pageRank}{C(y)}$
- \alertline<5> \EndFor
- \alertline<3> \EndFor
- \alertline<2> \State $iterations$ -= $1$
- \alertline<2> \EndWhile
- \alertline<1> \EndFunction
- \end{algorithmic}
- \end{frame}
|