12 tahun lalu · 04a45f295e
--- a/documents/DYCOS/Analyse.tex
+++ b/documents/DYCOS/Analyse.tex
@@ -0,0 +1,27 @@
 
				+Für den DYCOS-Algorithmus wurde in \cite{aggarwal2011} bewiesen,
			
 
				+dass sich nach Ausführung von DYCOS für einen unbeschrifteten
			
 
				+Knoten mit einer Wahrscheinlichkeit von höchstens
			
 
				+$(1-k)\cdot e^{-l \cdot b^2 / 2}$ eine Knotenbeschriftung ergibt, deren
			
 
				+relative Häufigkeit weniger als $b$ der häufigsten Beschriftung ist.
			
 
				+Dabei ist $k$ die Anzahl der Klassen und $l$ die Länge der 
			
 
				+Random-Walks.
			
 
				+
			
 
				+Außerdem wurde experimentell anhand des DBLP-Datensatzes\footnote{http://dblp.uni-trier.de/}
			
 
				+und des CORA-Datensatzes\footnote{http://people.cs.umass.edu/~mccallum/data/cora-classify.tar.gz}
			
 
				+gezeigt, dass die Klassifikationsgüte nicht wesentlich von der
			
 
				+maximalen Listenlänge $a$ und der Anzahl der Wörter mit
			
 
				+höchstem Gini-Koeffizient $m$ abhängt. Obwohl es sich nicht sagen lässt,
			
 
				+wie genau die Ergebnisse aus \cite{aggarwal2011} zustande gekommen sind,
			
 
				+eignet sich das Kreuzvalidierungsverfahren zur Bestimmung der Klassifikationsgüte
			
 
				+wie es in \cite{Lavesson,Stone1974} vorgestellt wird:
			
 
				+\begin{enumerate}
			
 
				+    \item Betrachte nur $V_{L,T}$.
			
 
				+    \item Unterteile $V_{L,T}$ zufällig in $k$ disjunkte Mengen $M_1, \dots, M_k$.
			
 
				+    \item \label{schritt3} Teste die Klassifikationsgüte, wenn die Knotenbeschriftungen
			
 
				+          aller Knoten in $M_i$ für DYCOS verborgen werden für $i=1,\dots, k$.
			
 
				+    \item Bilde den Durchschnitt der Klassifikationsgüten aus \cref{schritt3}.
			
 
				+\end{enumerate}
			
 
				+
			
 
				+Es wird $k=10$ vorgeschlagen.
			
 
				+
			
 
				+
			
--- a/documents/DYCOS/DYCOS.pdf
+++ b/documents/DYCOS/DYCOS.pdf
--- a/documents/DYCOS/DYCOS.tex
+++ b/documents/DYCOS/DYCOS.tex
@@ -63,6 +63,9 @@
 
				 \section{DYCOS}

			
 
				 \input{DYCOS-Algorithmus}

			
 
				 

			
 
				+\section{Analyse des DYCOS-Algorithmus}

			
 
				+\input{Analyse}

			
 
				+

			
 
				 \section{Probleme des DYCOS-Algorithmus}

			
 
				 \input{SchwaechenVerbesserungen}

			
 
				 

			
--- a/documents/DYCOS/literatur.bib
+++ b/documents/DYCOS/literatur.bib
@@ -97,6 +97,51 @@
 
				     address   = {New York, NY, USA},

			
 
				 } 

			
 
				 

			
 
				+@MASTERSTHESIS{heck,

			
 
				+  AUTHOR = {Heck, Michael},

			
 
				+  TITLE = {Unsupervised Acoustic Model Training for Simultaneous Lecture Translation in Incremental and Batch Mode},

			
 
				+  SCHOOL = {Karlsruhe Institute of Technology},

			
 
				+  TYPE = {Diploma Thesis},

			
 
				+  ADDRESS = {Germany},

			
 
				+  MONTH = DEC,

			
 
				+  YEAR = 2012,

			
 
				+  PDF = {http://isl.anthropomatik.kit.edu/cmu-kit/downloads/Diplomarbeit_Heck_Michael.pdf}

			
 
				+}

			
 
				+

			
 
				+@MASTERSTHESIS{Lavesson,

			
 
				+  AUTHOR = {Lavesson, Niklas},

			
 
				+  TITLE = {Evaluation and analysis of supervised learning algorithms and classifiers},

			
 
				+  SCHOOL = {Blekinge Institute of Technology},

			
 
				+  TYPE = {Diploma Thesis},

			
 
				+  ADDRESS = {Sweden},

			
 
				+  MONTH = DEC,

			
 
				+  YEAR = 2006,

			
 
				+  PDF = {http://www.bth.se/fou/Forskinfo.nsf/Sok/c655a0b1f9f88d16c125714c00355e5d/$file/Lavesson_lic.pdf}

			
 
				+}

			
 
				+

			
 
				+@article{Stone1974,

			
 
				+    abstract = {{A generalized form of the cross-validation criterion is applied to the choice and assessment of prediction using the data-analytic concept of a prescri

			
 
				+ption. The examples used to illustrate the application are drawn from the problem areas of univariate estimation, linear regression and analysis of variance.}},

			
 
				+    author = {Stone, M.},

			
 
				+    citeulike-article-id = {6758792},

			
 
				+    citeulike-linkout-0 = {http://dx.doi.org/10.2307/2984809},

			
 
				+    citeulike-linkout-1 = {http://www.jstor.org/stable/2984809},

			
 
				+    doi = {10.2307/2984809},

			
 
				+    issn = {00359246},

			
 
				+    journal = {Journal of the Royal Statistical Society. Series B (Methodological)},

			
 
				+    keywords = {biomarker, cross\_validation},

			
 
				+    number = {2},

			
 
				+    pages = {111--147},

			
 
				+    posted-at = {2011-08-02 14:22:37},

			
 
				+    priority = {2},

			
 
				+    publisher = {Blackwell Publishing for the Royal Statistical Society},

			
 
				+    title = {{Cross-Validatory Choice and Assessment of Statistical Predictions}},

			
 
				+    url = {http://dx.doi.org/10.2307/2984809},

			
 
				+    volume = {36},

			
 
				+    year = {1974}

			
 
				+}

			
 
				+

			
 
				+

			
 
				 @incollection{porter,

			
 
				  author     = {Porter, M. F.},

			
 
				  chapter    = {An Algorithm for Suffix Stripping},

			
@@ -112,6 +157,8 @@
 
				  address    = {San Francisco, CA, USA},

			
 
				 } 

			
 
				 

			
 
				+

			
 
				+

			
 
				 @incollection{szummer,

			
 
				 title       = {Partially labeled classification with Markov random walks},

			
 
				 author      = {Martin Szummer and Jaakkola, Tommi},