12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324 |
- <?xml version="1.0" encoding="utf-8"?>
- <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
- "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
- <book lang="en_US" xml:base="../">
- <title>Running HPCC in a Virtual Machine</title>
- <bookinfo>
- <title>HPCC in a Virtual Machine</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/redswooshWithLogo3.jpg" />
- </imageobject>
- </mediaobject>
- <author>
- <surname>Boca Raton Documentation Team</surname>
- </author>
- <legalnotice>
- <para>We welcome your comments and feedback about this document via
- email to <email>docfeedback@hpccsystems.com</email></para>
- <para>Please include <emphasis role="bold">Documentation
- Feedback</emphasis> in the subject line and reference the document name,
- page numbers, and current Version Number in the text of the
- message.</para>
- <para>LexisNexis and the Knowledge Burst logo are registered trademarks
- of Reed Elsevier Properties Inc., used under license.</para>
- <para>HPCC Systems is a registered trademark of LexisNexis Risk Data
- Management Inc.</para>
- <para>Other products, logos, and services may be trademarks or
- registered trademarks of their respective companies.</para>
- <para>All names and example data used in this manual are fictitious. Any
- similarity to actual persons, living or dead, is purely
- coincidental.</para>
- <para></para>
- </legalnotice>
- <xi:include href="common/Version.xml" xpointer="FooterInfo"
- xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="common/Version.xml" xpointer="DateVer"
- xmlns:xi="http://www.w3.org/2001/XInclude" />
- <corpname>HPCC Systems</corpname>
- <xi:include href="common/Version.xml" xpointer="Copyright"
- xmlns:xi="http://www.w3.org/2001/XInclude" />
- <mediaobject role="logo">
- <imageobject>
- <imagedata fileref="images/LN_Rightjustified.jpg" />
- </imageobject>
- </mediaobject>
- </bookinfo>
- <chapter>
- <title>Introduction</title>
- <para>These instructions will guide you through installing and running an
- HPCC<footnote>
- <para><emphasis role="bold">H</emphasis>igh <emphasis
- role="bold">P</emphasis>erformance <emphasis
- role="bold">C</emphasis>omputing <emphasis
- role="bold">C</emphasis>luster (HPCC) is a massively parallel
- processing computing platform that solves Big Data problems. See
- <ulink
- url="http://hpccsystems.com/Why-HPCC/How-it-works">http://hpccsystems.com/Why-HPCC/How-it-works</ulink>
- for more details.</para>
- </footnote> System on a single node inside a Linux virtual machine
- running on a Windows host.</para>
- <para>Packaged to run inside a virtual machine, this version provides a
- hands-on experience with an HPCC system. You can experiment with it and
- even create real-world data analytics applications-all on your desktop or
- laptop PC.</para>
- <para>This version includes the tools and functionality of an HPCC without
- the need for a physical cluster of servers. It provides enough for you to
- evaluate an HPCC system and learn to use ECL<footnote>
- <para><emphasis role="bold">E</emphasis>nterprise <emphasis
- role="bold">C</emphasis>ontrol <emphasis
- role="bold">L</emphasis>anguage (ECL) is a declarative, data-centric
- programming language used to manage all aspects of the massive data
- joins, sorts, and builds that truly differentiate HPCC (High
- Performance Computing Cluster) from other technologies in its ability
- to provide flexible data analysis on a massive scale.</para>
- </footnote>. Naturally, you do not get the power of parallel processing,
- but you can use this version as an evaluation, learning, and
- experimentation tool.</para>
- <para><informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic fileref="images/OSSgr3.png" /></entry>
- <entry>Reading this document in its entirety before beginning.
- The steps in this document can take an hour or two, depending on
- your download speed.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <sect1>
- <title>System Requirements</title>
- <para>Running HPCC in a virtual machine requires (at minimum):</para>
- <itemizedlist>
- <listitem>
- <para>A personal computer running Windows XP, Vista, Windows 7
- (either 32- or 64-bit)</para>
- </listitem>
- <listitem>
- <para>A minimum of 2 GB ram, with at least 1.5 GB of free memory
- available. We recommend 4 GB or more</para>
- </listitem>
- <listitem>
- <para>Intel Pentium D (or better) or AMD Athlon64/Opteron/Phenom
- processor</para>
- </listitem>
- <listitem>
- <para>Minimum 5 GB of available disk space, we recommend 20
- GB</para>
- </listitem>
- <listitem>
- <para>A virtualization software package:
- VMware<superscript>®</superscript> Player or Server (version 5.0 or
- later) or Oracle VM VirtualBox (version 4.0 or later).</para>
- </listitem>
- <listitem>
- <para>Internet Explorer<superscript>®</superscript> 8, Google Chrome
- 10, or Firefox™ 3.0 (or later)</para>
- </listitem>
- </itemizedlist>
- <para>Users should have familiarity with installing and running Windows
- applications.</para>
- </sect1>
- </chapter>
- <chapter>
- <title>Getting the Tools and the VM Image</title>
- <para>To run the virtual machine version of the HPCC System, you need
- virtualization software. These packages allow you to run virtual images
- inside a single host. There are a several different vendors who make
- virtualization software, while any or all of these could work, we support
- the following products:</para>
- <itemizedlist>
- <listitem>
- <para>VMware's Virtual Machine</para>
- </listitem>
- <listitem>
- <para>Oracle's VM VirtualBox</para>
- </listitem>
- </itemizedlist>
- <sect1>
- <title>VMware Virtual Machine</title>
- <para>The VMware player from VMware<superscript>®</superscript> is a
- virtualization software you can use to run the HPCC virtual
- machine.</para>
- <para>In the following sections, you will:</para>
- <para><itemizedlist>
- <listitem>
- <para>Download and install the VMware Player</para>
- </listitem>
- <listitem>
- <para>Download the HPCC virtual machine image from HPCC
- Systems.</para>
- </listitem>
- <listitem>
- <para>Open and import the image in the VMware Player</para>
- </listitem>
- </itemizedlist>Once you have completed these steps, you can evaluate
- the HPCC Platform and learn how to use it.</para>
- <sect2>
- <title>Download and Install the VMware Player</title>
- <para>If you already have VMware Player installed, you can skip this
- section and go to <xref linkend="get_hpcc" />.</para>
- <orderedlist>
- <listitem>
- <para>Go to the VMware site: <ulink
- url="http://www.vmware.com/products/player/">http://www.vmware.com/products/player/</ulink>.</para>
- </listitem>
- <listitem>
- <para>Click on download link, then follow the instructions to
- download the <emphasis>VMware Player for 32-bit and 64-bit
- Windows</emphasis>.</para>
- <para>Registration is required, but the player is free.</para>
- </listitem>
- <listitem>
- <para>Download the VMware Player (save to a folder on your
- machine).</para>
- </listitem>
- <listitem>
- <para>Follow VMware's on-screen instructions and install the
- VMware Player.</para>
- </listitem>
- </orderedlist>
- </sect2>
- <?hard-pagebreak ?>
- <sect2>
- <title id="get_hpcc">Get the latest HPCC Virtual Image File</title>
- <para><orderedlist>
- <listitem>
- <para>Download the latest HPCC virtual machine image file
- from:</para>
- <para><ulink
- url="http://HPCCsystems.com/download/hpcc-vm-image">http://hpccsystems.com/download/hpcc-vm-image</ulink></para>
- <para><variablelist>
- <varlistentry>
- <term>Note:</term>
- <listitem>
- <para>You may need to register to login.</para>
- </listitem>
- </varlistentry>
- </variablelist></para>
- <para>Choose the VM Image file for the VMware player. The
- <emphasis>filename</emphasis>-<emphasis
- role="bold">vmx</emphasis>.ova file is appropriate for the
- VMware player.</para>
- </listitem>
- <listitem>
- <para>Save the file to a folder on your machine.</para>
- </listitem>
- <listitem>
- <para>Open the VMware Player.</para>
- </listitem>
- <listitem>
- <para>From the <emphasis role="bold">Player</emphasis> menu,
- select <emphasis role="bold">File</emphasis>, then <emphasis
- role="bold">Open...</emphasis></para>
- </listitem>
- <listitem>
- <para>Go to the folder where you saved the downloaded file and
- select it.</para>
- <para>For example (<emphasis
- role="bluebold">HPCCSystemsVM-<emphasis>n.n.n.n</emphasis>.ova</emphasis>
- , where n.n.n.n is the version number).</para>
- <para>The .ova file should open an <emphasis>Import Virtual
- Machine</emphasis> dialog window providing for the name of the
- new virtual machine, and the storage path of the virtual machine
- you selected. <figure id="ImportVm">
- <title xreflabel="welc">Import Virtual Machine</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg13.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">Import</emphasis> button
- to import the virtual machine into the VMware Player.</para>
- <para>Wait for the HPCC virtual machine to load to the desktop
- in the VMware player. This may take a few minutes.</para>
- <variablelist>
- <varlistentry>
- <term><emphasis role="bold">Note</emphasis>:</term>
- <listitem>
- <para>The first time you use the VM Player, you must
- accept the license agreement. You may also be prompted to
- install add-ons, but they are not necessary for the HPCC
- virtual machine.</para>
- </listitem>
- </varlistentry>
- </variablelist>
- </listitem>
- <listitem>
- <para>At the VMware Player main window the new virtual machine
- you just imported is now listed. Double-click on that virtual
- machine to start it.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Once the VM initialization completes, you will see a
- window similar to the following:</para>
- <figure id="welcometovm">
- <title xreflabel="welc">VM Welcome Screen</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg01.JPG" />
- </imageobject>
- </mediaobject>
- </figure>
- <para><informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic
- fileref="images/caution.png" /></entry>
- <entry>Your virtual IP address could be different from
- the ones provided in the example images. Please use
- the IP address provided by <emphasis
- role="bold">your</emphasis> installation.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para>Do not resize this window, you will not interact with it.
- In addition, there is no need to Login.</para>
- <para><variablelist>
- <varlistentry>
- <term>TIP:</term>
- <listitem>
- <para>If you do click on the window, it will capture
- your keyboard/mouse. Press <emphasis
- role="bold">ctrl+alt</emphasis> to return control of
- your mouse.</para>
- </listitem>
- </varlistentry>
- </variablelist></para>
- </listitem>
- </orderedlist></para>
- <sect3>
- <title>Guest Additions</title>
- <para>The HPCC VM Images include Guest Additions. Guest Additions
- are device drivers and system applications that optimize the guest
- operating system for better performance and usability. If you would
- like to use your mouse pointer, and/or other desktop tools with your
- virtual machine, start the X windows display after you log into to
- your virtual machine. </para>
- <para>Log in with the credentials provided. (user: hpccdemo,
- password: hpccdemo) At the command prompt enter: <emphasis
- role="bold">startx</emphasis>.</para>
- </sect3>
- </sect2>
- </sect1>
- <sect1>
- <title>VM VirtualBox</title>
- <para>Oracle's virtualization software, VM VirtualBox is supported for
- running the HPCC virtual machine image.</para>
- <para>In the following section(s), you will:</para>
- <itemizedlist>
- <listitem>
- <para>Download and install the VM VirtualBox</para>
- </listitem>
- <listitem>
- <para>Download the HPCC virtual machine image from HPCC
- Systems.</para>
- </listitem>
- <listitem>
- <para>Open and import the image in VM VirtualBox</para>
- </listitem>
- </itemizedlist>
- <sect2 id="Install_VM_VirtualBox">
- <title>Download and Install the VM VirtualBox</title>
- <para>If you already have the VM VirtualBox installed, skip to step
- number 6, and verify the network configuration settings.</para>
- <orderedlist>
- <listitem>
- <para>Go to the VirtualBox site: <ulink
- url="https://www.virtualbox.org/wiki/Downloads">https://www.virtualbox.org/wiki/Downloads</ulink>.</para>
- </listitem>
- <listitem>
- <para>Click on the appropriate link for your operating system. For
- example <emphasis>VirtualBox for Windows hosts</emphasis>.</para>
- </listitem>
- <listitem>
- <para>Download the VirtualBox Installation file. Save to a folder
- on your machine.</para>
- </listitem>
- <listitem>
- <para>Double-click on the installation file to install
- VirtualBox.</para>
- </listitem>
- <listitem>
- <para>Follow the on-screen instructions to complete the
- installation of VirtualBox.</para>
- </listitem>
- <listitem>
- <para>Start the VM VirtualBox application.</para>
- </listitem>
- <listitem>
- <para>From the <emphasis role="bold">File</emphasis> menu, select
- <emphasis role="bold">Preferences</emphasis>...</para>
- <para>The <emphasis>VirtualBox - Settings</emphasis> dialog
- displays.<figure id="VBoxSettings">
- <title xreflabel="welc">VirtualBox - Settings</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg14.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Select<emphasis role="bold"> Network</emphasis> from the
- list on the left side of the window.</para>
- </listitem>
- <listitem>
- <para>Double-click on the <emphasis>VirtualBox Host-Only Ethernet
- Adaptor</emphasis>.</para>
- </listitem>
- <listitem>
- <para>From the<emphasis> Network Details</emphasis> window, select
- the <emphasis role="bold">DHCP Server</emphasis> Tab and check the
- <emphasis role="bold">Enable Server</emphasis> box. <figure
- id="NetworkDetails">
- <title xreflabel="welc">Network Details</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg15.jpg" />
- </imageobject>
- </mediaobject>
- </figure><variablelist>
- <varlistentry>
- <term>Note:</term>
- <listitem>
- <para>You may need to add IP Address values to the Lower
- Address Bound and Upper Address Bound fields. The address
- the VM uses is in the private IP address range of
- 192.168.x.x</para>
- </listitem>
- </varlistentry>
- </variablelist></para>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">OK</emphasis> button on the
- <emphasis>Network Details</emphasis> dialog box, then press the
- <emphasis role="bold">OK</emphasis> button on the
- <emphasis>VirtualBox - Settings</emphasis> box.</para>
- </listitem>
- </orderedlist>
- </sect2>
- <sect2>
- <title id="get_HPCC">Import the HPCC Virtual Image File</title>
- <para><orderedlist>
- <listitem>
- <para>Download the latest HPCC virtual machine image file
- from:</para>
- <para><ulink
- url="http://HPCCsystems.com/download/hpcc-vm-image">http://hpccsystems.com/download/hpcc-vm-image</ulink></para>
- <para><variablelist>
- <varlistentry>
- <term>Note:</term>
- <listitem>
- <para>You may need to register to login.</para>
- </listitem>
- </varlistentry>
- </variablelist></para>
- <para>Choose the VM Image file for VirtualBox. The
- <emphasis>filename</emphasis>.ova file is appropriate for the
- VirtualBox.</para>
- </listitem>
- <listitem>
- <para>Save the file to a folder on your machine.</para>
- </listitem>
- <listitem>
- <para>Open VM VirtualBox.</para>
- </listitem>
- <listitem>
- <para>From the <emphasis role="bold">File</emphasis> menu select
- <emphasis role="bold">Import Appliance... (Ctrl +
- I)</emphasis></para>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">Open appliance</emphasis>
- button from the dialog to select the appliance to import.</para>
- </listitem>
- <listitem>
- <para>Navigate to the folder where you saved the downloaded file
- and select it.</para>
- <para>For example (<emphasis
- role="bluebold">HPCCSystemsVM-<emphasis>n.n.n.n</emphasis>.ova</emphasis>
- , where n.n.n.n is the version number).</para>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">Open</emphasis> button to
- start the import. Follow the prompts in the import process
- pressing <emphasis role="bold">Next</emphasis> as appropriate,
- then press <emphasis role="bold">Import</emphasis>.</para>
- <para>Wait for the HPCC virtual machine to import. This may take
- a few minutes.</para>
- </listitem>
- <listitem>
- <para>At the main window, the newly imported virtual machine is
- now listed.</para>
- </listitem>
- <listitem>
- <para>Select the new virtual machine.</para>
- </listitem>
- <listitem>
- <para>Click on the <emphasis role="bold">Settings</emphasis>
- Icon, this will open up the settings dialog window. <figure
- id="VBoxSettingIcon">
- <title xreflabel="welc">VirtualBox Settings Icon</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg16.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Select the <emphasis role="bold">Network</emphasis> menu
- option. On the <emphasis>Adapter 1</emphasis> tab check the
- <emphasis>Enable Network Adapter</emphasis> box, and set the
- <emphasis>Attached to:</emphasis> option to <emphasis
- role="bold">NAT</emphasis>. <figure id="VBoxNet1">
- <title xreflabel="welc">VM VirtualBox Network Adapter
- 1</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg17.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <variablelist>
- <varlistentry>
- <term>Note:</term>
- <listitem>
- <para>These settings may be set as required by default. If
- so just verify that they are correct.</para>
- </listitem>
- </varlistentry>
- </variablelist>
- </listitem>
- <listitem>
- <para>From the same <emphasis>Network - Settings</emphasis>
- window, select the tab for<emphasis> Adapter 2</emphasis>. Check
- the <emphasis>Enable Network Adapter</emphasis> box, and set the
- <emphasis>Attached to:</emphasis> option to <emphasis
- role="bold">Host-only Adapter</emphasis>.</para>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">OK</emphasis>
- button.</para>
- </listitem>
- <listitem>
- <para>Double-click on that virtual machine to start it.
- <variablelist>
- <varlistentry>
- <term>TIP:</term>
- <listitem>
- <para>If you get any Network Error messages, please
- review the steps in <xref
- linkend="Install_VM_VirtualBox" /> and verify that all
- the network settings are set appropriately.</para>
- </listitem>
- </varlistentry>
- </variablelist></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Once the VM initialization completes, you will see a
- window similar to the following:</para>
- <figure id="vbox_welcome">
- <title xreflabel="welc">VirtualBox Welcome Screen</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg01b.JPG" />
- </imageobject>
- </mediaobject>
- </figure>
- <para><informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic
- fileref="images/caution.png" /></entry>
- <entry>Your virtual IP address could be different from
- the ones provided in the example images. Please use
- the IP address provided by <emphasis
- role="bold">your</emphasis> installation.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para>Do not resize this window, you will not interact with it.
- In addition, there is no need to Login.</para>
- <para><variablelist>
- <varlistentry>
- <term>TIP:</term>
- <listitem>
- <para>If you click on the window, it can capture your
- keyboard/mouse. Press the <emphasis role="bold">Right
- Ctrl</emphasis> to regain control of your mouse.</para>
- </listitem>
- </varlistentry>
- </variablelist></para>
- </listitem>
- </orderedlist></para>
- <sect3>
- <title>Guest Additions</title>
- <para>The HPCC VM Images include Guest Additions. Guest Additions
- are device drivers and system applications that optimize the guest
- operating system for better performance and usability. If you would
- like to use your mouse pointer, and/or other desktop tools with your
- virtual machine, start the X windows display after you log into to
- your virtual machine.</para>
- <para>Log in with the credentials provided. (user: hpccdemo,
- password: hpccdemo) At the command prompt enter: <emphasis
- role="bold">startx</emphasis>.</para>
- </sect3>
- </sect2>
- </sect1>
- </chapter>
- <chapter>
- <title>Running the HPCC VM</title>
- <para>In this section, we will access the HPCC using the web-based
- interface: ECL Watch<footnote>
- <para>ECL Watch is a Web-based interface to your HPCC system. It
- enables you to examine and manage many aspects of the HPCC and allows
- you to see information about jobs you run, data files, and system
- metrics.</para>
- </footnote>.From ECL Watch, we will download the ECL IDE<footnote>
- <para>The ECL IDE (Integrated Development Environment) is the tool
- used to create queries into your data and ECL files with which to
- build your queries. This is a Windows application.</para>
- </footnote>. If you already have the ECL IDE installed, you can skip
- this section and continue at <emphasis>Running the HPCC ECL IDE when you
- had a previous version installed</emphasis>.</para>
- <para><orderedlist>
- <listitem>
- <para>In your browser, go to the <emphasis role="bold">ECL
- Watch</emphasis> URL displayed (circled in red) in Figure 1,
- <emphasis>VM Welcome Screen</emphasis>. For example,
- http://nnn.nnn.nnn.nnn:8010, where nnn.nnn.nnn.nnn is your Virtual
- Machine's IP address.</para>
- <para><informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic
- fileref="images/caution.png" /></entry>
- <entry>Your virtual IP address could be different from the
- ones provided in the example images. Please use the IP
- address provided by <emphasis role="bold">your</emphasis>
- installation.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>From the ECL Watch Advanced menu, select on the <emphasis
- role="bold">Additional Resources </emphasis>link.</para>
- <para><figure>
- <title>ECL Watch Resource Page</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/GS_1311.jpg" vendor="eclwatchSS" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <para>Follow the link to the HPCC System's portal download
- page.</para>
- </listitem>
- <listitem>
- <para>Click on the <emphasis role="bold">ECL IDE </emphasis>link.
- (on the right hand side in the Download column, under the Free
- Community Edition heading)</para>
- </listitem>
- <listitem>
- <para>Follow the instructions on the web page to install the ECL
- IDE.</para>
- </listitem>
- <listitem>
- <para>Install the ECL IDE, following the prompts in the installation
- program. Once the ECL IDE is installed successfully, you can
- proceed.</para>
- </listitem>
- </orderedlist></para>
- <sect1>
- <title>Running the ECL IDE for the first time</title>
- <para>In this section, we will configure the ECL IDE.</para>
- <orderedlist>
- <listitem>
- <para>Open the ECL IDE, from your start menu. (Start <emphasis
- role="bold">>></emphasis> All Programs <emphasis
- role="bold">>></emphasis> HPCCSystems <emphasis
- role="bold">>></emphasis> ECL IDE).</para>
- <informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic fileref="images/OSSgr3.png" /></entry>
- <entry>You can create a shortcut on your desktop to provide
- quick access to the ECL IDE.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- </listitem>
- <listitem>
- <para>Enter the IP Address shown in <xref linkend="welcometovm" />
- for the server in the <emphasis role="bold">Server </emphasis>box
- (as shown in <xref linkend="Preferences" />) and press the <emphasis
- role="bold">OK</emphasis> button.</para>
- <para><figure id="Preferences">
- <title>ECL IDE Preferences</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg06.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Enter the <emphasis role="bold">Login ID</emphasis> and
- <emphasis role="bold">Password</emphasis> provided in the Login
- dialog.</para>
- <informaltable colsep="1" rowsep="1">
- <tgroup cols="2">
- <colspec colwidth="80pt" />
- <colspec colwidth="100pt" />
- <tbody>
- <row>
- <entry>Login ID</entry>
- <entry>hpccdemo</entry>
- </row>
- <row>
- <entry>Password</entry>
- <entry>hpccdemo</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <figure>
- <title>Login Window</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg06a.JPG" />
- </imageobject>
- </mediaobject>
- </figure>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">OK</emphasis> button.</para>
- <para>At this point you are now connected and ready to work with the
- HPCC!</para>
- </listitem>
- </orderedlist>
- </sect1>
- <sect1>
- <title>Running the HPCC ECL IDE when you had a previous version
- installed</title>
- <orderedlist>
- <listitem>
- <para>Open the ECL IDE, from your Start menu. (Start <emphasis
- role="bold">>></emphasis> All Programs <emphasis
- role="bold">>></emphasis> HPCCSystems <emphasis
- role="bold">>></emphasis> ECL IDE <emphasis
- role="bold">>></emphasis> ECL IDE)</para>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">Preferences</emphasis> button
- in the Login dialog that displays upon start up.</para>
- <para></para>
- <para><figure>
- <title>Login Window</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg05.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Enter the IP Address shown in <xref linkend="welcometovm" />
- for the server in the <emphasis role="bold">Server</emphasis> box
- (as shown in <xref linkend="Preferences2" />) and press the
- <emphasis role="bold">OK</emphasis> button.</para>
- <para><figure id="Preferences2">
- <title>ECL IDE Preferences</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg06.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Enter the <emphasis role="bold">Login ID</emphasis> and
- <emphasis role="bold">Password</emphasis> provided in the Login
- dialog.</para>
- <informaltable colsep="1" rowsep="1">
- <tgroup cols="2">
- <colspec colwidth="80pt" />
- <colspec colwidth="100pt" />
- <tbody>
- <row>
- <entry>Login ID</entry>
- <entry>hpccdemo</entry>
- </row>
- <row>
- <entry>Password</entry>
- <entry>hpccdemo</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <figure>
- <title>Login Window</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg06a.JPG" />
- </imageobject>
- </mediaobject>
- </figure>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">OK</emphasis> button.</para>
- <para>You are now connected and ready to work with the HPCC!</para>
- </listitem>
- </orderedlist>
- </sect1>
- <sect1>
- <title>Write some ECL</title>
- <para>Let's write, compile, and execute a simple "Hello World" program
- on our HPCC.</para>
- <orderedlist>
- <listitem>
- <para>Open ECL IDE, from your Start menu. (Start <emphasis
- role="bold">>></emphasis> All Programs <emphasis
- role="bold">>></emphasis> HPCCSystems <emphasis
- role="bold">>></emphasis> ECL IDE or use the desktop shortcut,
- if you have created one.)</para>
- <para>The Login Window displays.</para>
- <para><figure>
- <title>Login Window</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg06a.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Provide your credentials (hpccdemo) then press the <emphasis
- role="bold">OK</emphasis> button.</para>
- <para></para>
- </listitem>
- <listitem>
- <para>Open a new <emphasis role="bold">Builder Window</emphasis>
- (CTRL+N) and write the following code:<programlisting>OUTPUT('Hello World');
- </programlisting></para>
- <para>This could also be written as:</para>
- <para><programlisting>'Hello World';
- </programlisting>In the second program listing, the OUTPUT keyword is omitted.
- This is possible because the language is declarative and the OUTPUT
- action is implicit.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Select <emphasis role="bold">thor</emphasis> as your target
- cluster.</para>
- <para><emphasis role="bold">Thor</emphasis> is the Data Refinery
- component of your HPCC. It is a massively parallel computer cluster,
- optimized for sorting, manipulating, and transforming massive data.
- This process is also known as ETL (Extract, Transform, and
- Load)</para>
- <para><figure>
- <title>Select target</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg10.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Press the syntax check button on the main toolbar (or press
- F7).</para>
- <para><figure>
- <title>Syntax Check</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg09.JPG" />
- </imageobject>
- </mediaobject>
- </figure>A successful syntax check displays the "No Errors"
- message.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Press the <emphasis role="bold">Submit</emphasis> button (or
- press CTRL+ENTER).</para>
- <para><figure>
- <title>Completed job</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg11.JPG" />
- </imageobject>
- </mediaobject>
- </figure>The green check mark indicates successful
- completion.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Click on the workunit number tab and then on the Result 1 tab
- to see the output.</para>
- <para><figure>
- <title>Completed job output</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg12.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- </orderedlist>
- </sect1>
- <sect1>
- <title>Working with ECL</title>
- <para>Now that you have submitted some ECL code, it's time to try some
- more complex operations.</para>
- <para>The following examples are provided to get you started.</para>
- <sect2>
- <title>ECL Example: Anagram1</title>
- <para>This example takes a STRING and produces every possible anagram
- from it. This code is the basis for a second example which evaluates
- which of these are actual words using a word list data file.</para>
- <para><orderedlist>
- <listitem>
- <para>Open a new <emphasis role="bold">Builder Window</emphasis>
- (CTRL+N) and write the following code:<programlisting>STRING Word := 'FRED' :STORED('Word');
- R := RECORD
- STRING SoFar {MAXLENGTH(200)};
- STRING Rest {MAXLENGTH(200)};
- END;
- Init := DATASET([{'',Word}],R);
- R Pluck1(DATASET(R) infile) := FUNCTION
- R TakeOne(R le, UNSIGNED1 c) := TRANSFORM
- SELF.SoFar := le.SoFar + le.Rest[c];
- SELF.Rest := le.Rest[..c-1]+le.Rest[c+1..];
- // Boundary Conditions handled automatically
- END;
- RETURN NORMALIZE(infile,LENGTH(LEFT.Rest),TakeOne(LEFT,COUNTER));
- END;
- L := LOOP(Init,LENGTH(TRIM(Word)),Pluck1(ROWS(LEFT)));
- OUTPUT(L);</programlisting></para>
- </listitem>
- <listitem>
- <para>Select <emphasis role="bold">thor</emphasis> as your
- target cluster.</para>
- <para></para>
- </listitem>
- <listitem>
- <para>Press the syntax check button on the main toolbar (or
- press F7)</para>
- <para></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Press the <emphasis role="bold">Submit</emphasis> button
- (or press CTRL+ENTER).</para>
- <para><figure>
- <title>Completed job</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg11a.JPG" />
- </imageobject>
- </mediaobject>
- </figure>The green check mark indicates successful
- completion.</para>
- <para></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Click on the workunit number tab and then on the Result 1
- tab to see the output.</para>
- <para><figure>
- <title>Completed job output</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg11b.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- </orderedlist></para>
- <para></para>
- </sect2>
- <?hard-pagebreak ?>
- <sect2>
- <title>Roxie Example: Anagram2</title>
- <para>In this example, we will download an open source data file of
- dictionary words, spray that file to our Thor cluster, then validate
- our anagrams against that file so that we determine which are valid
- words. The validation step uses a JOIN of the anagram list to the
- dictionary file. Using an index and a keyed join would be more
- efficient, but this serves as a simple example.</para>
- <sect3>
- <title>Download the word list</title>
- <para>We will download the word list from <ulink
- url="http://wordlist.sourceforge.net/">http://wordlist.sourceforge.net/</ulink></para>
- <para><orderedlist>
- <listitem>
- <para>Download the <emphasis>Official 12 Dicts
- </emphasis>Package</para>
- </listitem>
- <listitem>
- <para>Extract the <emphasis role="bold">2of12.txt</emphasis>
- file to a folder on your local machine. The files are
- available in tar.gz or ZIP format.</para>
- </listitem>
- </orderedlist></para>
- </sect3>
- <sect3 id="Load_the_Incoming_Data">
- <title>Load the Dictionary File to your Landing Zone</title>
- <para>In this step, you will copy the data files to a location from
- which it can be sprayed to your HPCC cluster. A Landing Zone is a
- storage location attached to your HPCC. It has a utility running to
- facilitate file spraying to a cluster.</para>
- <para>For smaller data files, maximum of 2GB, you can use the
- upload/download file utility in ECL Watch. This data file is only
- ~400 kb.</para>
- <para>Next you will distribute (or Spray) the dataset to all the
- nodes in the HPCC cluster. The power of the HPCC comes from its
- ability to assign multiple processors to work on different portions
- of the data file in parallel. Even though the VM Edition only has a
- single node, the data must be sprayed to the cluster.</para>
- <orderedlist>
- <listitem>
- <para>In your browser, go to the <emphasis role="bold">ECL
- Watch</emphasis> URL. For example, http://nnn.nnn.nnn.nnn:8010,
- where nnn.nnn.nnn.nnn is your ESP Server's IP address.</para>
- <para><informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic
- fileref="images/caution.png" /></entry>
- <entry>Your IP address could be different from the
- ones provided in the example images. Please use the IP
- address provided by <emphasis
- role="bold">your</emphasis> installation.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>From ECL Watch click on the <emphasis
- role="bold">Files</emphasis> icon, then click the <emphasis
- role="bold">Landing Zones</emphasis> link from the navigation
- sub-menu.</para>
- <para>Press the <emphasis role="bold">Upload </emphasis>action
- button.</para>
- <para><figure>
- <title>Upload</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/LZimg03-1.jpg"
- vendor="eclwatchSS" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>A dialog opens. <emphasis role="bold">Browse</emphasis>
- your local machine select the file to upload and then press the
- <emphasis role="bold">Open</emphasis> button. <figure>
- <title>File Uploader</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/LZimg04a.jpg"
- vendor="eclwatchSS" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <para>The file you selected should appear in the <emphasis
- role="bold">File Name</emphasis> field. The data file is named:
- <emphasis role="bold">2of12.txt</emphasis>.</para>
- </listitem>
- <listitem>
- <para>Press on <emphasis role="bold">Upload Now</emphasis> to
- complete the file upload.</para>
- </listitem>
- </orderedlist>
- </sect3>
- <sect3 id="Spray_the_Data_to_THOR">
- <title>Spray the Data File to your <emphasis>Data Refinery (Thor)
- Cluster</emphasis></title>
- <para>To use the data file in our HPCC system, we must “spray” it to
- all the nodes. A <emphasis>spray</emphasis> or
- <emphasis>import</emphasis> is the relocation of a data file from
- one location (such as a Landing Zone) to multiple file parts on
- nodes in a cluster.</para>
- <para>The distributed or sprayed file is given a
- <emphasis>logical-file-name</emphasis> as follows<emphasis
- role="bold">: thor::word_list_csv </emphasis> The system maintains a
- list of logical files and the corresponding physical file locations
- of the file parts.</para>
- <para></para>
- <orderedlist>
- <listitem>
- <para>Open ECL Watch using the following URL:</para>
- <para><emphasis role="bold">http://nnn.nnn.nnn.nnn:pppp(where
- nnn.nnn.nnn.nnn is your ESP Server’s IP Address and pppp is the
- port. The default port is 8010)</emphasis></para>
- </listitem>
- <listitem>
- <para>Click on the <emphasis role="bold">Files</emphasis> icon,
- then click the <emphasis role="bold">Landing Zones</emphasis>
- link from the navigation sub-menu. Select the appropriate
- landing zone (if there are more than one landing zones). Click
- the arrow to the left of your landing zone to expand it.</para>
- </listitem>
- <listitem>
- <para>Select the file from your drop zone by checking the box
- next to it.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Check the box next to 2of12.txt, then press the <emphasis
- role="bold">Delimited</emphasis> button.</para>
- <para><figure>
- <title>Spray Delimited</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/GSSprayDict.jpg"
- vendor="eclwatchSS" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <para>The <emphasis role="bold">DFU Spray Delimited</emphasis>
- page displays.</para>
- </listitem>
- <listitem>
- <para>Select mythor in the Target Group drop list.</para>
- </listitem>
- <listitem>
- <para>Complete the Name Prefix as<emphasis>
- thor</emphasis>.</para>
- </listitem>
- <listitem>
- <para>Fill in the rest of the parameters (if they are not filled
- in already).</para>
- <para><itemizedlist>
- <listitem>
- <para>Max Record Length 8192</para>
- </listitem>
- <listitem>
- <para>Separator \,</para>
- </listitem>
- <listitem>
- <para>Line Terminator \n,\r\n</para>
- </listitem>
- <listitem>
- <para>Quote: '</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- <listitem>
- <para>Fill in the Target Name using the rest of the Logical File
- name desired: word_list_csv<emphasis role="bold">
- </emphasis></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Make sure the <emphasis role="bold">Overwrite</emphasis>
- and <emphasis role="bold">Replicate</emphasis><emphasis
- role="bold"> </emphasis>boxes are checked.</para>
- <para><emphasis role="bold">Note:</emphasis> The <emphasis
- role="bold">Replicate</emphasis> option is only available on
- systems where replication has been enabled.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Press the <emphasis role="bold">Spray</emphasis><emphasis
- role="bold"> </emphasis>button.</para>
- <para>A tab displays the DFU Workunit where you can see the
- progress of the spray.</para>
- </listitem>
- </orderedlist>
- </sect3>
- <sect3>
- <title>Run the query on Thor<parameter></parameter></title>
- <para><orderedlist>
- <listitem>
- <para>Open a new <emphasis role="bold">Builder
- Window</emphasis> (CTRL+N) and write the following
- code:<programlisting>IMPORT Std;
- layout_word_list := record
- string word;
- end;
- File_Word_List := dataset('~thor::word_list_csv', layout_word_list,
- CSV(heading(1),separator(','),quote('')));
- STRING Word := 'teacher' :STORED('Word');
- STRING SortString(STRING input) := FUNCTION
- OneChar := RECORD
- STRING c;
- END;
- OneChar MakeSingle(OneChar L, unsigned pos) := TRANSFORM
- SELF.c := L.c[pos];
- END;
- Split := NORMALIZE(DATASET([input],OneChar), LENGTH(input),
- MakeSingle(LEFT,COUNTER));
- SortedSplit := SORT(Split, c);
- OneChar Recombine(OneChar L, OneChar R) := TRANSFORM
- SELF.c := L.c+R.c;
- END;
- Recombined := ROLLUP(SortedSplit, Recombine(LEFT, RIGHT),ALL);
- RETURN Recombined[1].c;
- END;
- STRING CleanedWord := SortString(TRIM(Std.Str.ToUpperCase(Word)));
- R := RECORD
- STRING SoFar {MAXLENGTH(200)};
- STRING Rest {MAXLENGTH(200)};
- END;
- Init := DATASET([{'',CleanedWord}],R);
- R Pluck1(DATASET(R) infile) := FUNCTION
- R TakeOne(R le, UNSIGNED1 c) := TRANSFORM
- SELF.SoFar := le.SoFar + le.Rest[c];
- SELF.Rest := le.Rest[..c-1]+le.Rest[c+1..];
- // Boundary Conditions
- // handled automatically
- END;
- RETURN DEDUP(NORMALIZE(infile,LENGTH(LEFT.Rest),TakeOne(LEFT,COUNTER)));
- END;
- L := LOOP(Init,LENGTH(CleanedWord),Pluck1(ROWS(LEFT)));
- ValidWords := JOIN(L,File_Word_List,
- LEFT.SoFar=Std.Str.ToUpperCase(RIGHT.Word),TRANSFORM(LEFT));
- OUTPUT(CleanedWord);
- COUNT(ValidWords);
- OUTPUT(ValidWords)
- </programlisting></para>
- </listitem>
- <listitem>
- <para>Select <emphasis role="bold">thor</emphasis> as your
- target cluster.</para>
- </listitem>
- <listitem>
- <para>Press the syntax check button on the main toolbar (or
- press F7)</para>
- </listitem>
- <listitem>
- <para>Press the <emphasis
- role="bold">Submit</emphasis><emphasis role="bold">
- </emphasis>button.</para>
- </listitem>
- <listitem>
- <para>When it completes, select the Workunit<emphasis
- role="bold"> </emphasis>tab, then select the Result
- tab.</para>
- </listitem>
- <listitem>
- <para>Examine the result.</para>
- </listitem>
- </orderedlist></para>
- </sect3>
- <?hard-pagebreak ?>
- <sect3>
- <title>Compile and Publish the query to
- Roxie<parameter></parameter></title>
- <para><orderedlist>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Right-click on the <emphasis role="bold">MyFiles</emphasis>
- folder in the Repository<emphasis role="bold"></emphasis>
- window, and select <emphasis role="bold">Insert
- Folder</emphasis> from the pop-up menu.</para>
- <para><figure>
- <title>Insert Folder</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/DTimg04.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <para></para>
- </listitem>
- <listitem>
- <para>Enter <emphasis role="bold">Anagram</emphasis> for the
- label, then press the OK button.</para>
- <para></para>
- <para><figure>
- <title>Enter Folder Label</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/VM1005.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <para></para>
- </listitem>
- <listitem>
- <para>Right-click on the <emphasis role="bold">Anagram
- </emphasis>Folder, and select <emphasis role="bold">Insert
- File</emphasis> from the pop-up menu.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Enter <emphasis role="bold">ValidateAnagrams</emphasis>
- for the label, then press the OK button.</para>
- <para>A Builder Window opens.</para>
- <para><figure>
- <title>Builder Window</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/VM1006.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Write the following code (you can copy the code from the
- other builder window):<programlisting>IMPORT Std;
- layout_word_list := record
- string word;
- end;
- File_Word_List := dataset('~thor::word_list_csv', layout_word_list,
- CSV(heading(1),separator(','),quote('')));
- STRING Word := 'teacher' :STORED('Word');
- STRING SortString(STRING input) := FUNCTION
- OneChar := RECORD
- STRING c;
- END;
- OneChar MakeSingle(OneChar L, unsigned pos) := TRANSFORM
- SELF.c := L.c[pos];
- END;
- Split := NORMALIZE(DATASET([input],OneChar), LENGTH(input),
- MakeSingle(LEFT,COUNTER));
- SortedSplit := SORT(Split, c);
- OneChar Recombine(OneChar L, OneChar R) := TRANSFORM
- SELF.c := L.c+R.c;
- END;
- Recombined := ROLLUP(SortedSplit, Recombine(LEFT, RIGHT),ALL);
- RETURN Recombined[1].c;
- END;
- STRING CleanedWord := SortString(TRIM(Std.Str.ToUpperCase(Word)));
- R := RECORD
- STRING SoFar {MAXLENGTH(200)};
- STRING Rest {MAXLENGTH(200)};
- END;
- Init := DATASET([{'',CleanedWord}],R);
- R Pluck1(DATASET(R) infile) := FUNCTION
- R TakeOne(R le, UNSIGNED1 c) := TRANSFORM
- SELF.SoFar := le.SoFar + le.Rest[c];
- SELF.Rest := le.Rest[..c-1]+le.Rest[c+1..];
- // Boundary Conditions
- // handled automatically
- END;
- RETURN DEDUP(NORMALIZE(infile,LENGTH(LEFT.Rest),TakeOne(LEFT,COUNTER)));
- END;
- L := LOOP(Init,LENGTH(CleanedWord),Pluck1(ROWS(LEFT)));
- ValidWords := JOIN(L,File_Word_List,
- LEFT.SoFar=Std.Str.ToUpperCase(RIGHT.Word),TRANSFORM(LEFT));
- OUTPUT(CleanedWord);
- COUNT(ValidWords);
- OUTPUT(ValidWords)
- </programlisting></para>
- </listitem>
- <listitem>
- <para>Select <emphasis role="bold">Roxie</emphasis> as your
- target cluster.</para>
- </listitem>
- <listitem>
- <para>Press the syntax check button on the main toolbar (or
- press F7)</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>In the Builder window, in the upper left corner the
- <emphasis role="bold">Submit</emphasis> button has a drop down
- arrow next to it. Select the arrow to expose the <emphasis
- role="bold">Compile</emphasis> option.</para>
- <figure>
- <title>Compile</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/DTimg17.jpg" />
- </imageobject>
- </mediaobject>
- </figure>
- </listitem>
- <listitem>
- <para>Select <emphasis role="bold">Compile</emphasis></para>
- </listitem>
- <listitem>
- <para>When it completes, select the Workunit<emphasis
- role="bold"> </emphasis>tab, then select the Result
- tab.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>When the workunit finishes, it will display a green
- circle indicating it has compiled.</para>
- <para><figure>
- <title>Compiled</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/VM1007.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- </orderedlist></para>
- </sect3>
- <sect3 id="Deploy_the_Query_to_Roxie">
- <title>Publish the Roxie query</title>
- <para>Next we will publish the query to a Roxie Cluster.</para>
- <orderedlist>
- <listitem>
- <para>Select the workunit tab for the ValidateAnagrams that you
- just compiled.</para>
- </listitem>
- <listitem>
- <para>Select the ECL Watch tab.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Press the <emphasis role="bold">Publish</emphasis> button,
- complete the dialog, and press <emphasis
- role="bold">Submit</emphasis>.</para>
- <para><figure>
- <title>Publish Query</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/VM1008.jpg" />
- </imageobject>
- </mediaobject>
- </figure>When it successfully publishes, a confirmation
- message displays.</para>
- </listitem>
- </orderedlist>
- </sect3>
- <sect3 id="Run_the_Roxie_Query">
- <title>Run the Roxie Query in WsECL</title>
- <para>Now that the query is published to a Roxie cluster, we can run
- it using the WsECL service Using the following URL:</para>
- <para><emphasis role="bold">http://nnn.nnn.nnn.nnn:pppp (where
- nnn.nnn.nnn.nnn is your ESP Server’s IP address and pppp is the
- port. The default port is 8002)</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Click on the + sign next to <emphasis
- role="bold">myroxie</emphasis> to expand the tree.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Click on the <emphasis
- role="bold">ValidateAnagrams.1</emphasis> hyperlink.</para>
- <para>The form for the service displays.</para>
- <para><figure>
- <title>RoxieECL</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/VM1009.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Select Output Tables in the drop list.</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Provide a word to make anagrams from (e.g., TEACHER), then
- press the Submit button.</para>
- <para>The results display.</para>
- <para><figure>
- <title>RoxieResults</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/VM1010.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- </orderedlist>
- </sect3>
- </sect2>
- </sect1>
- <sect1 id="Working_with_a_data_file">
- <title>Working with data files</title>
- <para>Once you start working with your HPCC system, you will want to
- process some real data, this section shows you how to load data to your
- HPCC system.</para>
- <sect2 id="Cautions_and_Warnings">
- <title>Before you begin</title>
- <para>A typical production HPCC system would have much more data
- capacity than using a virtual system for testing purposes. The size of
- the file you wish to work with is limited by the size of your virtual
- machine.</para>
- <para><itemizedlist>
- <listitem>
- <para>The virtual machine has a limit of 20GB.</para>
- </listitem>
- <listitem>
- <para>The size of the file(s) you can work with in your virtual
- machine is also limited by your machine's available disk space.
- Make sure you have adequate disk space available.</para>
- </listitem>
- </itemizedlist></para>
- <informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic fileref="images/caution.png" /></entry>
- <entry>If you exceed the file size limits your VM state will
- not be saved and you risk losing your work. Make sure that you
- have ample space to not only store your data, but your virtual
- machine's "state" to be able to save your work.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <para></para>
- </sect2>
- <?hard-pagebreak ?>
- <sect2 id="Uploading_a_file">
- <title>Uploading a file</title>
- <para>For smaller data files, maximum of 2GB, you can use the
- upload/download file utility in ECL Watch.</para>
- <orderedlist>
- <listitem>
- <para>In your browser, go to the <emphasis role="bold">ECL
- Watch</emphasis> URL displayed (circled in red) in <xref
- linkend="welcometovm" />. For example,
- http://nnn.nnn.nnn.nnn:8010, where nnn.nnn.nnn.nnn is your Virtual
- Machine's IP address.</para>
- <para><informaltable colsep="1" frame="all" rowsep="1">
- <?dbfo keep-together="always"?>
- <tgroup cols="2">
- <colspec colwidth="49.50pt" />
- <colspec />
- <tbody>
- <row>
- <entry><inlinegraphic
- fileref="images/caution.png" /></entry>
- <entry>Your virtual IP address could be different from
- the ones provided in the example images. Please use the
- IP address provided by <emphasis
- role="bold">your</emphasis> installation.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>From ECL Watch page, click on the <emphasis
- role="bold">Upload/download File </emphasis> link in the menu on
- the left side.</para>
- <para><figure>
- <title>Upload/download</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/LZimg03-1.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <para><phrase> </phrase>Once you click on the Upload/download file
- link, it will take you to the dropzones and files page, where you
- can choose to <emphasis role="bold">Browse</emphasis> your machine
- for a file to upload:</para>
- <para><figure>
- <title>Dropzones</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/LZimg04.jpg" scale="90" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Press the <emphasis role="bold">Browse</emphasis> button to
- browse the files on your local machine, select the file to upload
- and then click <emphasis role="bold">Open</emphasis>
- button.</para>
- <para>The file you selected should appear in the <emphasis
- role="bold">Select a file to upload</emphasis> field.</para>
- </listitem>
- <listitem>
- <para>Press on <emphasis role="bold">Upload Now</emphasis> to
- complete the file upload.</para>
- </listitem>
- <listitem>
- <para>Now that the file is on your Landing Zone, you can spray the
- file to your cluster and write ECL code to process it.</para>
- </listitem>
- </orderedlist>
- <para></para>
- <para></para>
- </sect2>
- <sect2 id="Uploading_files_w_secure_client">
- <title>Uploading files with a Secure Copy Client</title>
- <para>To upload a large file for processing to your virtual machine,
- you will need a tool that supports the secure copy protocol. In this
- section, we discuss using WinSCP. There are other tools available, but
- the steps are similar.</para>
- <para><orderedlist>
- <listitem>
- <para>Open the WinSCP tool, and login to your Virtual Machine's
- IP address using the username and password given.</para>
- <para><informaltable colsep="1" rowsep="1">
- <tgroup cols="2">
- <colspec colwidth="80pt" />
- <colspec colwidth="100pt" />
- <tbody>
- <row>
- <entry>Login ID:</entry>
- <entry>hpccdemo</entry>
- </row>
- <row>
- <entry>Password:</entry>
- <entry>hpccdemo</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- </listitem>
- <listitem>
- <para>Once logged in, it should, navigate automatically to the
- landing zone folder. (/var/lib/HPCCSystems/mydropzone)</para>
- </listitem>
- <listitem>
- <?dbfo keep-together="always"?>
- <para>Navigate to where your local file is in the left part of
- the window.</para>
- <para><figure>
- <title>WinSCP</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/LZimg05.jpg" />
- </imageobject>
- </mediaobject>
- </figure></para>
- </listitem>
- <listitem>
- <para>Select the data file to send and copy it to the landing
- zone, using drag-and-drop.</para>
- </listitem>
- <listitem>
- <para>Now that the file is on your Landing Zone, you can spray
- the file to your cluster and write ECL code to process
- it.</para>
- </listitem>
- </orderedlist></para>
- </sect2>
- </sect1>
- <sect1>
- <title>Next Steps</title>
- <para>Available from the menu in ECL Watch are several documents which
- provide details on various aspects of the HPCC.</para>
- <figure>
- <title>ECL Watch Resource Page</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg07a.jpg" />
- </imageobject>
- </mediaobject>
- </figure>
- <para></para>
- <para>You can also access them from the help menu: Help <emphasis
- role="bold">>></emphasis> Download Resources.</para>
- <para><figure>
- <title>Help Menu</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/vmimg07.JPG" />
- </imageobject>
- </mediaobject>
- </figure></para>
- <para>To familiarize yourself with what your system can do, we
- recommend:</para>
- <para><itemizedlist>
- <listitem>
- <para>The <emphasis role="bold">HPCC Data
- Tutorial</emphasis></para>
- <para>This is a simple, step-by-step tutorial that shows the
- end-to-end process from receiving a receiving a raw data file to
- publishing a web-based query to search the data. Along the way,
- you will learn how to process the data, index it, then write and
- publish a query to search the data. The self-led tutorial and
- accompanying data file is available on the ECL Watch Resource
- Page.</para>
- </listitem>
- <listitem>
- <para>The <emphasis role="bold">Six Degrees of Kevin Bacon
- Example</emphasis></para>
- <para>This is a more complex example (*also available on the ECL
- Watch Resource Page) that uses a database of movie data to find
- the degree of separation between actors in films.</para>
- </listitem>
- <listitem>
- <para>The HPCC Systems Portal (<ulink
- url="http://hpccsystems.com">http://hpccsystems.com</ulink>) is
- another valuable resource for more information including:</para>
- <itemizedlist>
- <listitem>
- <para>Video Tutorials</para>
- </listitem>
- <listitem>
- <para>Additional examples</para>
- </listitem>
- <listitem>
- <para>White Papers</para>
- </listitem>
- <listitem>
- <para>Documentation</para>
- </listitem>
- <listitem>
- <para>Support Forums</para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist></para>
- </sect1>
- </chapter>
- <chapter>
- <title>Frequently Asked Questions</title>
- <para></para>
- <qandaset defaultlabel="number">
- <qandadiv>
- <qandaentry>
- <question>
- <para>Can I run the VM while connected to my network using a
- Virtual Private network (VPN)?</para>
- </question>
- <answer>
- <para>No. Most VPN clients take control of your network device and
- routing and do not allow split tunnels.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Can I install this virtual machine on multiple nodes?</para>
- </question>
- <answer>
- <para>No. If you want to evaluate a multi-node system, you should
- use the Community version available from the HPCC Systems Portal
- at <ulink
- url="http://hpccsystems.com">http://hpccsystems.com</ulink>.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>What are the limits of this version?</para>
- </question>
- <answer>
- <para>The HPCC VM Edition runs on a single node, has a limit of 20
- GB in it workspace, and doesn't support custom
- configurations.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Can I run the VM on my Linux machine?</para>
- </question>
- <answer>
- <para>You can run the HPCC VM using the Linux VM Player. The HPCC
- ECL IDE is a Windows application, but can run under WINE. See the
- Client Tools Manual for details.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Can I run the VM on my Mac?</para>
- </question>
- <answer>
- <para>Not at this time. There is no Mac version of the VM
- Player.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Can I run the VM on my Windows Server?</para>
- </question>
- <answer>
- <para>Yes, You can run on Windows Server 2003 or 2008, providing
- you have access to it using Remote Desktop Protocol (RDP).</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Do I need a 64-bit processor to run the VM Edition?</para>
- </question>
- <answer>
- <para>No. The VM Player runs in either 32- or 64-bit environments
- and does not require a 64-bit processor.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Do I need a 64-bit processor to run the Community
- Edition?</para>
- </question>
- <answer>
- <para>Yes. Community Edition binaries run natively on 64-bit Linux
- server(s). You can access that HPCC from any Windows workstation
- (32- or 64-bit) that can run the ECL IDE and a supported
- browser.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>What happens to my work when I close the virtual
- machine?</para>
- </question>
- <answer>
- <para>The VM Player saves the state of your system when it closes.
- It saves all of your workunit information, data files, and
- published query sets as long as there is available space. The
- maximum size of the saved session is 20 GB.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Why won't my VM allow access to my network interface?</para>
- </question>
- <answer>
- <para>Check your Firewall settings. You may need to disable the
- Firewall for your VM's network interface.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Will this version utilize my multi-core processor?</para>
- </question>
- <answer>
- <para>This VM is designed to utilize a single core.</para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>The VM Fails to start on Mac OSx after upgrading to OSx 10.9
- Mavericks. How can I correct this?</para>
- </question>
- <answer>
- <para>To correct this issue run the following command:
- <programlisting>sudo launchctl load
- /Library/LaunchDaemons/org.virtualbox.startup.plist</programlisting></para>
- </answer>
- </qandaentry>
- <qandaentry>
- <question>
- <para>Where can I find more information?</para>
- </question>
- <answer>
- <para>Visit the HPCC Systems Portal at
- http://HPCCsystems.com.</para>
- </answer>
- </qandaentry>
- </qandadiv>
- </qandaset>
- </chapter>
- </book>
|