浏览代码

Merge branch 'candidate-5.2.0'

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 10 年之前
父节点
当前提交
0b258b2209

+ 226 - 162
docs/HPCCSystemAdmin/HPCCSystemAdministratorsGuide.xml

@@ -29,8 +29,8 @@
       <para>LexisNexis and the Knowledge Burst logo are registered trademarks
       <para>LexisNexis and the Knowledge Burst logo are registered trademarks
       of Reed Elsevier Properties Inc., used under license.</para>
       of Reed Elsevier Properties Inc., used under license.</para>
 
 
-      <para>HPCC Systems is a registered trademark of LexisNexis Risk Data
-      Management Inc.</para>
+      <para>HPCC Systems<superscript>®</superscript> is a registered trademark
+      of LexisNexis Risk Data Management Inc.</para>
 
 
       <para>Other products, logos, and services may be trademarks or
       <para>Other products, logos, and services may be trademarks or
       registered trademarks of their respective companies.</para>
       registered trademarks of their respective companies.</para>
@@ -48,7 +48,7 @@
     <xi:include href="common/Version.xml" xpointer="DateVer"
     <xi:include href="common/Version.xml" xpointer="DateVer"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 
 
-    <corpname>HPCC Systems</corpname>
+    <corpname>HPCC Systems<superscript>®</superscript></corpname>
 
 
     <xi:include href="common/Version.xml" xpointer="Copyright"
     <xi:include href="common/Version.xml" xpointer="Copyright"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
@@ -61,7 +61,8 @@
   </bookinfo>
   </bookinfo>
 
 
   <chapter>
   <chapter>
-    <title>Introducing HPCC Systems Administraton</title>
+    <title>Introducing HPCC Systems<superscript>®</superscript>
+    Administraton</title>
 
 
     <sect1 id="HPCC_SysAdminIntro" role="nobrk">
     <sect1 id="HPCC_SysAdminIntro" role="nobrk">
       <title>Introduction</title>
       <title>Introduction</title>
@@ -81,9 +82,9 @@
     <sect1 id="HPCC_Architectural_Overview">
     <sect1 id="HPCC_Architectural_Overview">
       <title>Architectural Overview</title>
       <title>Architectural Overview</title>
 
 
-      <para>An HPCC Systems Platform consists of the following components:
-      Thor, Roxie, ESP Server, Dali, Sasha, DFU Server, and ECLCC Server. LDAP
-      security is optionally available.</para>
+      <para>An HPCC Systems<superscript>®</superscript> Platform consists of
+      the following components: Thor, Roxie, ESP Server, Dali, Sasha, DFU
+      Server, and ECLCC Server. LDAP security is optionally available.</para>
 
 
       <para><figure>
       <para><figure>
           <title>HPCC Architectural Diagram</title>
           <title>HPCC Architectural Diagram</title>
@@ -153,11 +154,12 @@
         <sect3>
         <sect3>
           <title>ECL Agent</title>
           <title>ECL Agent</title>
 
 
-          <para>The ECL Agent can act as a single-node cluster. That is called
-          spawning an hThor cluster. hThor is used to process simple jobs that
-          would otherwise be an inefficient use of Thor. For simple tasks, the
-          ECL Agent will make a determination and perform the execution itself
-          by acting as an hThor cluster. <figure>
+          <para>The ECL Agent's primary function is to send the job to execute
+          on the appropriate cluster. The ECL Agent can act as a single-node
+          cluster. That is called spawning an hThor cluster. hThor is used to
+          process simple jobs that would otherwise be an inefficient use of
+          Thor. For simple tasks, the ECL Agent will make a determination and
+          perform the execution itself by acting as an hThor cluster. <figure>
               <title>Clusters</title>
               <title>Clusters</title>
 
 
               <mediaobject>
               <mediaobject>
@@ -236,7 +238,7 @@
           <para>When you submit workunits for execution on Thor, they are
           <para>When you submit workunits for execution on Thor, they are
           first converted to executable code by the ECLCC Server.</para>
           first converted to executable code by the ECLCC Server.</para>
 
 
-          <para>When you submit a Workunit to Roxie, code is compiled and
+          <para>When you submit a workunit to Roxie, code is compiled and
           later published to the Roxie cluster, where it is available to
           later published to the Roxie cluster, where it is available to
           execute multiple times.</para>
           execute multiple times.</para>
 
 
@@ -284,8 +286,9 @@
               </listitem>
               </listitem>
             </itemizedlist></para>
             </itemizedlist></para>
 
 
-          <para>Examples of protocols supported by the ESP Server framework
-          include: HTTP, HTTPS, SOAP, and JSON.</para>
+          <para>The ESP Server supports both XML and JSON Formats.</para>
+
+          <!--formerly : protocols - HTTP, HTTPS, SOAP, and JSON - -->
         </sect3>
         </sect3>
 
 
         <sect3>
         <sect3>
@@ -293,7 +296,7 @@
 
 
           <para>You can incorporate a Lightweight Directory Access Protocol
           <para>You can incorporate a Lightweight Directory Access Protocol
           (LDAP) server to work with Dali to enforce the security restrictions
           (LDAP) server to work with Dali to enforce the security restrictions
-          for data, file, workunit scopes, and feature access.</para>
+          for file scopes, workunit scopes, and feature access.</para>
 
 
           <para>When LDAP is configured, you need to authenticate when
           <para>When LDAP is configured, you need to authenticate when
           accessing ECL Watch, WsECL, ECL IDE, or any other client tools.
           accessing ECL Watch, WsECL, ECL IDE, or any other client tools.
@@ -330,16 +333,16 @@
           to create and execute queries into your data on an HPCC platform
           to create and execute queries into your data on an HPCC platform
           using Enterprise Control Language (ECL). Eclipse is open-source, and
           using Enterprise Control Language (ECL). Eclipse is open-source, and
           multi-platform and it can be used to interface with your data and
           multi-platform and it can be used to interface with your data and
-          workunits on HPCC. The ECL plug-in for Eclipse is also open
-          source.</para>
+          workunits on HPCC. The ECL plug-in for Eclipse is also
+          open-source.</para>
         </sect3>
         </sect3>
 
 
         <sect3>
         <sect3>
           <title>ECL IDE</title>
           <title>ECL IDE</title>
 
 
-          <para>ECL IDE is a full-featured GUI for ECL development providing
-          access to the ECL repository and many of the ECL Watch capabilities.
-          ECL IDE uses various ESP services via SOAP.</para>
+          <para>ECL IDE is a full-featured GUI providing access to your ECL
+          code for ECL development. ECL IDE uses various ESP services via
+          SOAP.</para>
 
 
           <para>The ECL IDE provides access to ECL Definitions to build your
           <para>The ECL IDE provides access to ECL Definitions to build your
           queries. These definitions are created by coding an expression that
           queries. These definitions are created by coding an expression that
@@ -370,7 +373,7 @@
               </listitem>
               </listitem>
 
 
               <listitem>
               <listitem>
-                <para>See status of all system servers.</para>
+                <para>See the status of all system servers.</para>
               </listitem>
               </listitem>
 
 
               <listitem>
               <listitem>
@@ -394,6 +397,9 @@
           provide command line access to functionality provided by the ECL
           provide command line access to functionality provided by the ECL
           Watch web pages. They work by communicating with the corresponding
           Watch web pages. They work by communicating with the corresponding
           ESP service via SOAP.</para>
           ESP service via SOAP.</para>
+
+          <para>See the <emphasis>Client Tools </emphasis>Manual for more
+          details.</para>
         </sect3>
         </sect3>
       </sect2>
       </sect2>
     </sect1>
     </sect1>
@@ -401,12 +407,159 @@
     <!--Inclusion-from-ClientTool-As-Sect1: REMOVED-->
     <!--Inclusion-from-ClientTool-As-Sect1: REMOVED-->
   </chapter>
   </chapter>
 
 
+  <chapter>
+    <title>Hardware and Software Requirements</title>
+
+    <para>This chapter consists of various Hardware and Software requirements
+    that HPCC works well on. HPCC is designed to run on commodity hardware,
+    which makes building and maintaining large scale (petabytes) clusters
+    economically feasible. When planning your cluster hardware, you will need
+    to balance a number of considerations.</para>
+
+    <para>This section provides some insight as to what sort of hardware and
+    infrastructure optimally HPCC works well on. This is not an exclusive
+    comprehensive set of instructions, nor a mandate on what hardware you must
+    have. Consider this as a guide to use when looking to implement or scale
+    your HPCC system. These suggestions should be taken into consideration for
+    your specific enterprise needs.</para>
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="HW-Switch"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="HW-LoadBalancer"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="Nodes-Hardware"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="HPCCSystemAdmin/SA-Mods/SysAdminConfigMod.xml"
+                xpointer="System_sizings"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="Nodes-Software"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="workstation-requirements"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+  </chapter>
+
+  <chapter>
+    <title>Hardware and Component Sizing</title>
+
+    <para>This section provides some insight as to what sort of hardware and
+    infrastructure optimally HPCC works well on. This is not an exclusive
+    comprehensive set of instructions, nor a mandate on what hardware you must
+    have. Consider this as a guide to use when looking to implement or scale
+    your HPCC system. These suggestions should be taken into consideration for
+    your specific enterprise needs.</para>
+
+    <para>HPCC is designed to run on commodity hardware, which makes building
+    and maintaining large scale (petabytes) clusters economically feasible.
+    When planning your cluster hardware, you will need to balance a number of
+    considerations, including fail-over domains and potential performance
+    issues. Hardware planning should include distributing HPCC across multiple
+    physical hosts, such as a cluster. Generally, one type of best practice is
+    to run HPCC processes of a particular type, for example Thor, Roxie, or
+    Dali, on a host configured specifically for that type of process.</para>
+
+    <sect1>
+      <title>Thor Hardware</title>
+
+      <para>Thor slave nodes require a proper balance of CPU, RAM, network,
+      and disk I/O in order to operate most efficiently. A single Thor slave
+      node works optimally when allocated 4 CPU cores, 8GB RAM, 1Gb/sec
+      network and 200MB/sec sequential read/write disk I/O.</para>
+
+      <para>Hardware architecture can provide higher value within a single
+      physical server. In such cases you can use multi-slave to configure your
+      larger physical servers to run multiple Thor slave nodes per physical
+      server.</para>
+
+      <para>It is important to note that HPCC by nature is a parallel
+      processing system and all Thor slave nodes will be exercising at
+      precisely the same time. So when allocating more than one HPCC Thor
+      slave per physical machine assure that each slave meets the recommended
+      requirements.</para>
+
+      <para>For instance, 1 physical server with 48 cores, 96GB RAM, 10Gb/sec
+      network and 2GB/sec sequential I/O would be capable of running ten (10)
+      HPCC Thor slaves at optimal efficiency. The order of optimization for
+      resource usage in a Thor slave node is disk I/O 60%, network 30%, and
+      CPU 10%. Any increase in sequential I/O will have the most impact on
+      speed, followed by improvements in network, followed by improvements in
+      CPU.</para>
+
+      <para>Network architecture is also an important consideration. HPCC Thor
+      nodes work optimally in a streamlined network architecture between all
+      Thor slave processes.</para>
+
+      <para>RAID is recommended and all RAID levels suitable for sequential
+      read/write operations and high availability are acceptable. For example,
+      RAID1, RAID10, RAID5 (preferred), and RAID6.</para>
+    </sect1>
+
+    <sect1>
+      <title>Roxie Hardware Configurations</title>
+
+      <para>HPCC Roxie processes require require a proper, yet different (from
+      Thor) balance of CPU, RAM, network, and disk I/O in order to ensure
+      efficient operations. A single HPCC Roxie node works optimally when
+      allocated 6 or more CPU cores, 24GB RAM, 1Gb/sec network backbone, and
+      400/sec 4k random read IOPS. </para>
+
+      <para>Each HPCC Roxie node is presented two hard drives, each capable of
+      200/sec 4k random seek IOPS. Hard drive recommendations for Roxie
+      efficiency are 15K SAS, or SSD. A good rule of thumb is the more random
+      read IOPS the better and faster your Roxie will perform.</para>
+
+      <para>Running multiple HPCC Roxie nodes on a single physical server is
+      not recommended, except in the cases of virtualization or
+      containers.</para>
+    </sect1>
+
+    <sect1>
+      <title>Dali and Sasha Hardware Configurations</title>
+
+      <para>HPCC Dali processes store cluster metadata in RAM. For optimal
+      efficiency, provide at least 48GB of RAM, 6 or more CPU cores, 1Gb/sec
+      network interface and a high availability disk for a single HPCC Dali.
+      HPCC's Dali processes are one of the few active/passive components.
+      Using standard “swinging disk” clustering is recommended for a high
+      availability setup. For a single HPCC Dali process, any suitable High
+      Availability (HA) RAID level is fine.</para>
+
+      <para>Sasha does not store any data. Sasha reads data from Dali then
+      processes it. Sasha does store archived workunits (WUs) on a disk.
+      Allocating a larger disk for Sasha reduces the amount of housekeeping
+      needed. Since Sasha assists Dali by performing housekeeping, it works
+      best when on its own node. You should avoid putting Sasha and Dali on
+      the same node.</para>
+    </sect1>
+
+    <sect1>
+      <title>Other HPCC Components</title>
+
+      <para>ECL Agent, ECLCC Server, DFU Server, the Thor master, and ECL
+      Watch are administrative processes which are used for supporting
+      components of the main clusters.</para>
+
+      <para>For maximum efficiency you should provide 24GB RAM, 6+ CPU cores,
+      1Gb/sec network and high availability disk(s). These components can be
+      made highly available in an active/active fashion.</para>
+    </sect1>
+  </chapter>
+
   <chapter id="Routine_Maintenance">
   <chapter id="Routine_Maintenance">
     <title>Routine Maintenance</title>
     <title>Routine Maintenance</title>
 
 
-    <para>There is some care required to ensure that your HPCC system keeps
-    operating optimally. The following sections address the routine
-    maintenance tasks for your HPCC system.</para>
+    <para>In order to ensure that your HPCC system keeps running optimally,
+    some care and maintenance is required. The following sections address
+    routine maintenance tasks for your HPCC system.</para>
 
 
     <!--***SYSTEM HEALTH CHECK UP***TO COME***-->
     <!--***SYSTEM HEALTH CHECK UP***TO COME***-->
 
 
@@ -417,16 +570,16 @@
       essential data. Devise a back up strategy to meet the needs of your
       essential data. Devise a back up strategy to meet the needs of your
       organization. This section is not meant to replace your current back up
       organization. This section is not meant to replace your current back up
       strategy, instead this section supplements it by outlining special
       strategy, instead this section supplements it by outlining special
-      considerations for HPCC Systems.</para>
+      considerations for HPCC Systems<superscript>®</superscript>.</para>
 
 
       <sect2>
       <sect2>
         <title>Back Up Considerations</title>
         <title>Back Up Considerations</title>
 
 
         <para>You probably already have some sort of a back up strategy in
         <para>You probably already have some sort of a back up strategy in
-        place, by adding HPCC Systems into your operating environment there
-        are some additional considerations to be aware of. The following
-        sections discuss back up considerations for the individual HPCC system
-        components.</para>
+        place, by adding HPCC Systems<superscript>®</superscript> into your
+        operating environment there are some additional considerations to be
+        aware of. The following sections discuss back up considerations for
+        the individual HPCC system components.</para>
 
 
         <sect3>
         <sect3>
           <title>Dali</title>
           <title>Dali</title>
@@ -499,10 +652,10 @@
           <title>Thor</title>
           <title>Thor</title>
 
 
           <para>Thor, the data refinery, as one of the critical components of
           <para>Thor, the data refinery, as one of the critical components of
-          HPCC Systems needs to be backed up. Back up Thor by configuring
-          replication and setting up a nightly back up cron task. Back up Thor
-          on demand before and/or after any node swap or drive swap if you do
-          not have a RAID configured.</para>
+          HPCC Systems<superscript>®</superscript> needs to be backed up. Back
+          up Thor by configuring replication and setting up a nightly back up
+          cron task. Back up Thor on demand before and/or after any node swap
+          or drive swap if you do not have a RAID configured.</para>
 
 
           <para>A very important part of administering Thor is to check the
           <para>A very important part of administering Thor is to check the
           logs to ensure the previous back ups completed successfully.</para>
           logs to ensure the previous back ups completed successfully.</para>
@@ -934,9 +1087,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       servicing many, diverse applications has been less than optimal.</para>
       servicing many, diverse applications has been less than optimal.</para>
 
 
       <para>HPCC makes setting up your Active Directory OU's relatively easy.
       <para>HPCC makes setting up your Active Directory OU's relatively easy.
-      ESP creates all the OU's for you when it comes up, based on the settings
-      you defined in Configuration Manager. You can then start Dali/ESP and
-      use ECLWatch to add or modify users or groups.</para>
+      ESP creates all the OU's for you when it starts up, based on the
+      settings you defined in Configuration Manager. You can then start
+      Dali/ESP and use ECLWatch to add or modify users or groups.</para>
 
 
       <para>You can assign permissions to each user individually, however it
       <para>You can assign permissions to each user individually, however it
       is more manageable to assign these permissions to groups, and then add
       is more manageable to assign these permissions to groups, and then add
@@ -950,8 +1103,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       <sect2>
       <sect2>
         <title>Active Directory, and LDAP Commonality</title>
         <title>Active Directory, and LDAP Commonality</title>
 
 
-        <para>There are a few relevant notable terms, that may need some
-        further explanation. <variablelist>
+        <para>There are components that are common to both Active Directory
+        and LDAP. There are a few relevant terms, that may need some further
+        explanation. <variablelist>
             <varlistentry>
             <varlistentry>
               <term>filesBasedn</term>
               <term>filesBasedn</term>
 
 
@@ -1107,8 +1261,8 @@ lock=/var/lock/HPCCSystems</programlisting>
 
 
         <para>You can run multiple active instances of the ECLCC Server for
         <para>You can run multiple active instances of the ECLCC Server for
         redundancy. There is no need for a load balancer or VIP for this
         redundancy. There is no need for a load balancer or VIP for this
-        either. Will routinely check for workunits. Should one fail, the
-        other(s) will continue to compile.</para>
+        either. Each instance will routinely check for workunits. Should one
+        fail, the other(s) will continue to compile.</para>
       </sect2>
       </sect2>
 
 
       <sect2>
       <sect2>
@@ -1167,10 +1321,6 @@ lock=/var/lock/HPCCSystems</programlisting>
         server. One primary, or active, and the other passive. No load
         server. One primary, or active, and the other passive. No load
         balancer needed. If the active instance fails, then you can fail over
         balancer needed. If the active instance fails, then you can fail over
         to the passive.</para>
         to the passive.</para>
-
-        <para>Make sure you give significant resources to your key components.
-        Dali is RAM intensive. Eclagent and Eclserver are processor dependent.
-        Thor should have a minimum of 4GB RAM per node.</para>
       </sect2>
       </sect2>
     </sect1>
     </sect1>
 
 
@@ -1243,16 +1393,15 @@ lock=/var/lock/HPCCSystems</programlisting>
           <para>Thor has the ability to do a “Thor copy” which copies data
           <para>Thor has the ability to do a “Thor copy” which copies data
           from one cluster to another. You can also do this through ECL code.
           from one cluster to another. You can also do this through ECL code.
           Additionally, you may decide you don’t want, or need to have a “hot”
           Additionally, you may decide you don’t want, or need to have a “hot”
-          DR Thor. In that case, the most common disasters [minor] (major
-          switch outage, total power down, multiple fiber cuts) cause only a
+          DR Thor. In that case, the most common minor disasters cause only a
           relatively brief, less than 1 day disaster. Since Thor is
           relatively brief, less than 1 day disaster. Since Thor is
           responsible for creating data updates it can take a day or a few to
           responsible for creating data updates it can take a day or a few to
           recover. The data just is not quite as fresh but as long as the
           recover. The data just is not quite as fresh but as long as the
           Roxies are replicated the data is still flowing. In the case of a
           Roxies are replicated the data is still flowing. In the case of a
-          major disaster (a major earthquake, or a tidal wave), the likelihood
-          of that occurring does not justify the cost of preventing against
-          it. It could also take between 7 to 14 days to recover by building
-          out a whole new Thor cluster.</para>
+          major disaster such as, a major earthquake, a tidal wave, extended
+          total power loss, multiple fiber cuts, where the systems will be out
+          for a day or more. The likelihood of that occurring may not justify
+          the costs of preventing against it. </para>
         </sect3>
         </sect3>
 
 
         <sect3>
         <sect3>
@@ -1336,115 +1485,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       </sect2>
       </sect2>
     </sect1>
     </sect1>
 
 
-    <sect1 id="Sample_sizings">
-      <title>System Sizings</title>
-
-      <para>This section provides some guidance in determining the sizing
-      requirements for an initial installation. The following are some
-      suggested sample configuration guides that can be helpful when planning
-      your system.</para>
-
-      <sect2>
-        <title>Sample Sizing for High Data volume (Typical)</title>
-
-        <para>The most typical scenario for HPCC is utilizing it with a high
-        volume of data. This suggested sample sizing would be appropriate for
-        a site with large volumes of data. A good policy is to set the Thor
-        size to 4 times the source data on your HPCC. Typically, Roxie would
-        be about ¼ the size of Thor. This is because the data is compressed
-        and the system does not hold any transient data in Roxie.</para>
-
-        <sect3>
-          <title>High Data Thor sizing considerations</title>
-
-          <para>Each Thor node can hold about 2.5 TB of data (MAX), so plan
-          for the number of Thor nodes accordingly for your data.</para>
-
-          <para>If possible, SAS drives for both Thor and Roxie as they almost
-          equal to SATA drives now. If not for both, get SAS drives at least
-          for your Roxie cluster.</para>
-
-          <para>Thor replicates data, typically configured for 2
-          copies.</para>
-        </sect3>
-
-        <sect3>
-          <title>High Data Roxie sizing considerations</title>
-
-          <para>Roxie keeps most of its data in memory, so you should allocate
-          plenty of memory for Roxie. Calculate the approximate size of your
-          data, and allocate appropriately. You should either increase the
-          number of nodes, or increase the amount of memory.</para>
-
-          <para>A good practice is to allocate a Dali for every Roxie
-          cluster.</para>
-
-          <para>Roxie+Dali needs to have a mirror. This is because, when you
-          need to update indexes, you update the mirror and make that primary
-          and bring the other one down. This is not really a necessity except
-          for high availability and performance requirements.</para>
-        </sect3>
-      </sect2>
-
-      <sect2>
-        <title>Sample Sizing for Heavy Processing on Low Data Volume</title>
-
-        <para>The following section provides some sample sizing for heavy
-        processing with approximately the amount of data indicated.</para>
-
-        <sect3>
-          <title>750 GB of Raw Data</title>
-
-          <para>Thor = 3 (slaves) + 2 (management) = 5 Nodes</para>
-
-          <para>Roxie = 3 (agents) + 1 (Dali) = 4 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 13 nodes</para>
-        </sect3>
-
-        <sect3>
-          <title>1250 GB of Raw Data</title>
-
-          <para>Thor = 6 (slaves) + 2 (management) = 8 Nodes</para>
-
-          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 17 nodes</para>
-        </sect3>
-
-        <sect3>
-          <title>2000 GB of Raw Data</title>
-
-          <para>Thor = 8 (slaves) + 3 (management) = 11 Nodes</para>
-
-          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 20 nodes</para>
-        </sect3>
-
-        <sect3>
-          <title>3500 GB of Raw Data</title>
-
-          <para>Thor = 12 (slaves) + 5 (management) = 17 Nodes</para>
-
-          <para>Roxie = 6 (agents) + 1 (Dali) = 7 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 28 nodes</para>
-        </sect3>
-      </sect2>
-    </sect1>
+    <xi:include href="HPCCSystemAdmin/SA-Mods/SysAdminConfigMod.xml"
+                xpointer="Sample_Sizings"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
   </chapter>
   </chapter>
 
 
   <chapter id="Resources">
   <chapter id="Resources">
@@ -1456,8 +1499,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       <title>HPCC Resources</title>
       <title>HPCC Resources</title>
 
 
       <para>The resources link can be found under the Operations Icon link.
       <para>The resources link can be found under the Operations Icon link.
-      The resources link in ECL Watch provides a link to the HPCC Systems web
-      portal. Visit the HPCC Systems Web Portal at <ulink
+      The resources link in ECL Watch provides a link to the HPCC
+      Systems<superscript>®</superscript> web portal. Visit the HPCC
+      Systems<superscript>®</superscript> Web Portal at <ulink
       url="http://hpccsystems.com/">http://hpccsystems.com/</ulink> for
       url="http://hpccsystems.com/">http://hpccsystems.com/</ulink> for
       software updates, plug-ins, support, documentation, and more. This is
       software updates, plug-ins, support, documentation, and more. This is
       where you can find resources useful for running and maintaining HPCC on
       where you can find resources useful for running and maintaining HPCC on
@@ -1469,5 +1513,25 @@ lock=/var/lock/HPCCSystems</programlisting>
       This is the page where you can download Installation packages, virtual
       This is the page where you can download Installation packages, virtual
       images, source code, documentation, and tutorials.</para>
       images, source code, documentation, and tutorials.</para>
     </sect1>
     </sect1>
+
+    <sect1>
+      <title>Additional Resources</title>
+
+      <para>Additional help for Learning ECL is also available. There are
+      online courses.</para>
+
+      <para><ulink
+      url="https://learn.lexisnexis.com/lexisnexis/resources/courses">https://learn.lexisnexis.com/lexisnexis/resources/courses
+      </ulink></para>
+
+      <para>There are training videos online.</para>
+
+      <para><ulink
+      url="https://learn.lexisnexis.com/lexisnexis/resources/courses/HPCC/Summit2014/NewECLWatch50Features/NewECLWatch50Features.html">Legacy
+      ECL Watch and New 5.0 ECL Watch</ulink></para>
+
+      <para>A quick summary of the differences in the interface, goes into
+      particular detail. Helpful for learning how to deploy Roxies.</para>
+    </sect1>
   </chapter>
   </chapter>
 </book>
 </book>

+ 275 - 0
docs/HPCCSystemAdmin/SA-Mods/SysAdminConfigMod.xml

@@ -370,4 +370,279 @@ sudo -u hpcc cp /etc/HPCCSystems/source/NewEnvironment.xml /etc/HPCCSystems/envi
       </sect2>
       </sect2>
     </sect1>
     </sect1>
   </chapter>
   </chapter>
+
+  <chapter id="Advance-SysAdmin-Topic-Chapter">
+    <title>Advanced Systems Administrator Topics</title>
+
+    <para>This chapter contains information about certain advanced HPCC
+    Systems<superscript>®</superscript> Administrators topics.</para>
+
+    <sect1 id="Admin-System-Topic">
+      <title>Admin System Topics</title>
+
+      <para>This is a System Administrative topic designed to provide some
+      insight as to an aspect of System Administration for you HPCC
+      System.</para>
+    </sect1>
+
+    <sect1 id="System_sizings">
+      <title>System Sizings</title>
+
+      <para>This section provides some guidance in determining the sizing
+      requirements for an initial installation of HPCC. The following are some
+      suggested configuration guides that can be helpful when planning your
+      system.</para>
+
+      <sect2 role="nobrk">
+        <title>Minimum Suggested Hardware</title>
+
+        <para>HPCC was designed to run on common commodity hardware, and could
+        function on even lesser hardware. The following list is the suggested
+        minimum hardware specifications. At the very minimum you should
+        consider the following hardware components for your HPCC system. These
+        guidelines were put together based on real world usage of mission
+        critical (uptime) with high volume data. <informaltable border="all"
+            colsep="1" rowsep="1">
+            <tgroup cols="3">
+              <colspec colwidth="94.50pt" />
+
+              <colspec colwidth="84.50pt" />
+
+              <tbody>
+                <row>
+                  <entry><emphasis role="bold">Thor slave</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor per</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>8GB per daemon</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>RAID - 200MB/sec Sequential Read/Write per
+                  node</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Network</entry>
+
+                  <entry>1 Gb/sec bandwidth</entry>
+                </row>
+
+                <row>
+                  <entry><emphasis role="bold">Roxie</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>12GB per Roxie</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>400 IOPS &amp; 2 Volumes per (RAID optional)</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Network</entry>
+
+                  <entry>1 Gb/sec bandwidth</entry>
+                </row>
+
+                <row>
+                  <entry><emphasis role="bold">Dali</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor each</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>24GB per Dali</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>RAID 1, 5, 6, 10 Volume 200GB</entry>
+                </row>
+
+                <row>
+                  <entry><emphasis role="bold">Other</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>12GB</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>RAID 1, 5, 6, 10 Volume 200GB</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Network</entry>
+
+                  <entry>1 Gb/sec bandwidth</entry>
+                </row>
+              </tbody>
+            </tgroup>
+          </informaltable></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="Sample_Sizings">
+      <title>Sample Sizings</title>
+
+      <para>This section illustrates sample system sizings for various work
+      environments. Unlike system requirements, the following samples are
+      suggestions for setting up you system for various operating
+      conditions.</para>
+
+      <sect2 id="Sample-Size-HighDataVolume">
+        <title>Sample Sizing for High Data volume (Typical)</title>
+
+        <para>The most typical scenario for HPCC is utilizing it with a high
+        volume of data. This suggested sample sizing would be appropriate for
+        a site with large volumes of data. A good policy is to set the Thor
+        size to 4 times the source data on your HPCC. Typically, Roxie would
+        be about ¼ the size of Thor. This is because the data is compressed
+        and the system does not hold any transient data in Roxie.</para>
+
+        <sect3>
+          <title>High Data Thor sizing considerations</title>
+
+          <para>Each Thor node can hold about 2.5 TB of data (MAX), so plan
+          for the number of Thor nodes accordingly for your data.</para>
+
+          <para>If possible, SAS drives for both Thor and Roxie as they almost
+          equal to SATA drives now. If not for both, get SAS drives at least
+          for your Roxie cluster.</para>
+
+          <para>Thor replicates data and is typically configured for two
+          copies.</para>
+        </sect3>
+
+        <sect3>
+          <title>High Data Roxie sizing considerations</title>
+
+          <para>Roxie keeps most of its data in memory, so you should allocate
+          plenty of memory for Roxie. Calculate the approximate size of your
+          data, and allocate appropriately. You should either increase the
+          number of nodes, or increase the amount of memory.</para>
+
+          <para>A good practice is to allocate a Dali for every Roxie
+          cluster.</para>
+
+          <para>Roxie should have a mirror. This is useful, when you need to
+          update data. You update the mirror then make that primary and bring
+          the other one down. This is a good practice but not really a
+          necessity except in the case of high availability.</para>
+        </sect3>
+      </sect2>
+
+      <sect2>
+        <title>Sample Sizing for Heavy Processing on Low Data Volume</title>
+
+        <para>The following section provides some sample sizing for heavy
+        processing with approximately the amount of data indicated.</para>
+
+        <sect3>
+          <title>750 GB of Raw Data</title>
+
+          <para>Thor = 3 (slaves) + 2 (management) = 5 Nodes</para>
+
+          <para>Roxie = 3 (agents) + 1 (Dali) = 4 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 13 nodes</para>
+        </sect3>
+
+        <sect3>
+          <title>1250 GB of Raw Data</title>
+
+          <para>Thor = 6 (slaves) + 2 (management) = 8 Nodes</para>
+
+          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 17 nodes</para>
+        </sect3>
+
+        <sect3>
+          <title>2000 GB of Raw Data</title>
+
+          <para>Thor = 8 (slaves) + 3 (management) = 11 Nodes</para>
+
+          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 20 nodes</para>
+        </sect3>
+
+        <sect3>
+          <title>3500 GB of Raw Data</title>
+
+          <para>Thor = 12 (slaves) + 5 (management) = 17 Nodes</para>
+
+          <para>Roxie = 6 (agents) + 1 (Dali) = 7 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 28 nodes</para>
+        </sect3>
+      </sect2>
+    </sect1>
+  </chapter>
 </book>
 </book>

+ 569 - 0
docs/Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml

@@ -0,0 +1,569 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book>
+  <bookinfo>
+    <title>Installing the HPCC Platform: Hardware Module</title>
+
+    <mediaobject>
+      <imageobject>
+        <imagedata fileref="../../images/redswooshWithLogo3.jpg" />
+      </imageobject>
+    </mediaobject>
+
+    <author>
+      <surname>Boca Raton Documentation Team</surname>
+    </author>
+
+    <legalnotice>
+      <para>We welcome your comments and feedback about this document via
+      email to <email>docfeedback@hpccsystems.com</email></para>
+
+      <para>Please include <emphasis role="bold">Documentation
+      Feedback</emphasis> in the subject line and reference the document name,
+      page numbers, and current Version Number in the text of the
+      message.</para>
+
+      <para>LexisNexis and the Knowledge Burst logo are registered trademarks
+      of Reed Elsevier Properties Inc., used under license.</para>
+
+      <para>HPCC Systems is a registered trademark of LexisNexis Risk Data
+      Management Inc.</para>
+
+      <para>Other products, logos, and services may be trademarks or
+      registered trademarks of their respective companies. All names and
+      example data used in this manual are fictitious. Any similarity to
+      actual persons, living or dead, is purely coincidental.</para>
+
+      <para></para>
+    </legalnotice>
+
+    <xi:include href="../../common/Version.xml" xpointer="FooterInfo"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="../../common/Version.xml" xpointer="DateVer"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <corpname>HPCC Systems</corpname>
+
+    <xi:include href="../../common/Version.xml" xpointer="Copyright"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <mediaobject role="logo">
+      <imageobject>
+        <imagedata fileref="images/LN_Rightjustified.jpg" />
+      </imageobject>
+    </mediaobject>
+  </bookinfo>
+
+  <chapter id="Hardware-and-Software-Chapter">
+    <title>Hardware and Software Requirements</title>
+
+    <para>The following section describes the various hardware and software
+    required in order to run the HPCC.</para>
+
+    <sect1 id="HW-Switch" role="nobrk">
+      <title>Network Switch</title>
+
+      <para>A significant component of HPCC is the infrastructure it runs on,
+      specifically the switch.</para>
+
+      <sect2 id="Switch-Requirements">
+        <title>Switch requirements</title>
+
+        <itemizedlist spacing="compact">
+          <listitem>
+            <para>Sufficient number of ports to allow all nodes to be
+            connected directly to it;</para>
+          </listitem>
+
+          <listitem>
+            <para>IGMP v.2 support </para>
+          </listitem>
+
+          <listitem>
+            <para>IGMP snooping support</para>
+          </listitem>
+        </itemizedlist>
+
+        <para><emphasis role="bold">Small:</emphasis> For a very small test
+        system, almost any gigabit switch will suffice. These are inexpensive
+        and readily available in six to 20-port models.</para>
+
+        <para><figure>
+            <title>1 GigE 8-port Switch</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/DHSMC8508T.jpg"
+                           vendor="hardwareSS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+
+        <para><emphasis role="bold">Medium</emphasis>: For medium sized (10-48
+        node) systems, we recommend using a Force10 s25, s50, s55, or s60
+        switch</para>
+
+        <para><figure>
+            <title>Force10 S55 48-port Network Switch</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/s55.jpg"
+                           vendor="hardwareSS,force10SS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+
+        <para><?hard-pagebreak ?><emphasis role="bold">Large</emphasis>: For
+        large (48-350 node) system, the Force10 c150 or c300 are good
+        choices.</para>
+
+        <para><figure>
+            <title>Force 10 c150</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/c150-lg.jpg"
+                           vendor="hardwareSS,force10SS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+
+        <para><?hard-pagebreak ?><emphasis role="bold">Very Large</emphasis>:
+        For very large (more than 300 nodes) system, the Force10 e600 or e1200
+        are good choices.</para>
+
+        <para><figure>
+            <title>Force 10 e600 and e1200</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/Force10_ExaScaleE6001200.jpg"
+                           vendor="hardwareSS,force10SS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+      </sect2>
+
+      <sect2 id="Switch-additional-recommend">
+        <title>Switch additional recommended features</title>
+
+        <para><itemizedlist mark="square" spacing="compact">
+            <listitem>
+              <para>Non-blocking backplane</para>
+            </listitem>
+
+            <listitem>
+              <para>Low latency (under 35usec)</para>
+            </listitem>
+
+            <listitem>
+              <para>Layer 3 switching</para>
+            </listitem>
+
+            <listitem>
+              <para>Managed and monitored (SNMP is a plus)</para>
+            </listitem>
+
+            <listitem>
+              <para>Port channel (port bundling) support</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="HW-LoadBalancer">
+      <title>Load Balancer</title>
+
+      <para>In order to take full advantage of a Roxie cluster, a load
+      balancer is required. Each Roxie Node is capable of receiving requests
+      and returning results. Therefore, a load balancer distributes the load
+      in an efficient manner to get the best performance and avoid a potential
+      bottleneck.</para>
+
+      <para>We recommend the Web Accelerator product line from F5 Networks.
+      See <ulink
+      url="http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf">http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf
+      </ulink> for more information<phrase></phrase>.</para>
+
+      <para><figure>
+          <title>F5 Load Balancers</title>
+
+          <mediaobject>
+            <imageobject>
+              <imagedata fileref="../../images/IR-009a.jpg"
+                         vendor="hardwareSS,F5SS" />
+            </imageobject>
+          </mediaobject>
+        </figure></para>
+
+      <sect2>
+        <title>Load Balancer Requirements</title>
+
+        <sect3>
+          <title>Minimum requirements</title>
+
+          <para><itemizedlist spacing="compact">
+              <listitem>
+                <para>Throughput: 1Gbps Gigabit</para>
+              </listitem>
+
+              <listitem>
+                <para>Ethernet ports: 2</para>
+              </listitem>
+
+              <listitem>
+                <para>Balancing Strategy: Round Robin</para>
+              </listitem>
+            </itemizedlist></para>
+        </sect3>
+
+        <sect3>
+          <title>Standard requirements</title>
+
+          <para><itemizedlist spacing="compact">
+              <listitem>
+                <para>Throughput: 8Gbps</para>
+              </listitem>
+
+              <listitem>
+                <para>Gigabit Ethernet ports: 4</para>
+              </listitem>
+
+              <listitem>
+                <para>Balancing Strategy: Flexible (F5 iRules or
+                equivalent)</para>
+              </listitem>
+            </itemizedlist></para>
+        </sect3>
+
+        <sect3 role="brk">
+          <title>Recommended capabilities</title>
+
+          <para><itemizedlist spacing="compact">
+              <listitem>
+                <para>Ability to provide cyclic load rotation (not load
+                balancing).</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to forward SOAP/HTTP traffic</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to provide triangulation/n-path routing (traffic
+                incoming through the load balancer to the node, replies sent
+                out the via the switch).</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to treat a cluster of nodes as a single entity
+                (for load balancing clusters not nodes)</para>
+
+                <para>or</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to stack or tier the load balancers for multiple
+                levels if not.</para>
+              </listitem>
+            </itemizedlist></para>
+        </sect3>
+      </sect2>
+    </sect1>
+
+    <sect1 id="Nodes-Hardware">
+      <title>Nodes-Hardware</title>
+
+      <para>The HPCC can run as a single node system or a multi node
+      system.</para>
+
+      <para>These hardware recommendations are intended for a multi-node
+      production system. A test system can use less stringent specifications.
+      Also, while it is easier to manage a system where all nodes are
+      identical, this is not required. However, it is important to note that
+      your system will only run as fast as its slowest node.</para>
+
+      <sect2 id="Node-Min-requirements">
+        <title>Node minimum requirements</title>
+
+        <itemizedlist mark="square" spacing="compact">
+          <listitem>
+            <para>Pentium 4 or newer CPU</para>
+          </listitem>
+
+          <listitem>
+            <para>32-bit</para>
+          </listitem>
+
+          <listitem>
+            <para>1GB RAM per slave</para>
+
+            <para>(Note: If you configure more than 1 slave per node, memory
+            is shared. For example, if you want 2 slaves per node with each
+            having 4 GB of memory, the server would need 8 GB total.)</para>
+          </listitem>
+
+          <listitem>
+            <para>One Hard Drive (with sufficient free space to handle the
+            size of the data you plan to process) or Network Attached
+            Storage.</para>
+          </listitem>
+
+          <listitem>
+            <para>1 GigE network interface</para>
+          </listitem>
+        </itemizedlist>
+      </sect2>
+
+      <sect2 id="Node-recommended-specifications">
+        <title>Node recommended specifications</title>
+
+        <para><itemizedlist mark="square" spacing="compact">
+            <listitem>
+              <para>Nehalem Core i7 CPU</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit</para>
+            </listitem>
+
+            <listitem>
+              <para>4 GB RAM (or more) per slave</para>
+            </listitem>
+
+            <listitem>
+              <para>1 GigE network interface</para>
+            </listitem>
+
+            <listitem>
+              <para>PXE boot support in BIOS</para>
+
+              <para>PXE boot support is recommended so you can manage OS,
+              packages, and other settings when you have a large system</para>
+            </listitem>
+
+            <listitem>
+              <para>Optionally IPMI and KVM over IP support</para>
+
+              <para><emphasis role="bold">For Roxie nodes:</emphasis></para>
+            </listitem>
+
+            <listitem>
+              <para>Two 10K RPM (or faster) SAS Hard Drives</para>
+
+              <para>Typically, drive speed is the priority for Roxie
+              nodes</para>
+
+              <para><emphasis role="bold">For Thor nodes:</emphasis></para>
+            </listitem>
+
+            <listitem>
+              <para>Two 7200K RPM (or faster) SATA Hard Drives (Thor)</para>
+            </listitem>
+
+            <listitem>
+              <para>Optionally 3 or more hard drives can be configured in a
+              RAID 5 container for increased performance and
+              availability</para>
+
+              <para>Typically, drive capacity is the priority for Thor
+              nodes</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="Nodes-Software">
+      <title>Nodes-Software</title>
+
+      <para>All nodes must have the identical operating systems. We recommend
+      all nodes have identical BIOS settings, and packages installed. This
+      significantly reduces variables when troubleshooting. It is easier to
+      manage a system where all nodes are identical, but this is not
+      required.</para>
+
+      <sect2 id="Operating-System-Requirements">
+        <title>Operating System Requirements</title>
+
+        <para>Binary packages are available for the following:</para>
+
+        <para><itemizedlist mark="square" spacing="compact">
+            <listitem>
+              <para>64-bit CentOS 5</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit CentOS 6</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit RedHat Enterprise 5</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit RedHat Enterprise 6</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit Ubuntu 12.04 (LTS)</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit Ubuntu 13.10</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit Ubuntu 14.04 (LTS)</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+
+      <sect2 id="configuration-manager">
+        <title>Dependencies</title>
+
+        <para>Installing HPCC on your system depends on having required
+        component packages installed on the system. The required dependencies
+        can vary depending on your platform. In some cases the dependencies
+        are included in the installation packages. In other instances the
+        installation may fail, and the package management utility will prompt
+        you for the required packages. Installation of these packages can vary
+        depending on your platform. For details of the specific installation
+        commands for obtaining and installing these packages, see the commands
+        specific to your Operating System. <variablelist>
+            <varlistentry>
+              <term>Note:</term>
+
+              <listitem>
+                <para>For CentOS installations, the Fedora EPEL repository is
+                required.</para>
+              </listitem>
+            </varlistentry>
+          </variablelist></para>
+      </sect2>
+
+      <sect2 id="SSH_Keys" role="brk">
+        <title>SSH Keys</title>
+
+        <para>The HPCC components use ssh keys to authenticate each other.
+        This is required for communication between nodes. A script to generate
+        keys has been provided .You should run that script and distribute the
+        public and private keys to all nodes after you have installed the
+        packages on all nodes, but before you configure a multi-node
+        HPCC.</para>
+
+        <para><itemizedlist spacing="compact">
+            <listitem>
+              <para>As root (or sudo as shown below), generate a new key using
+              this command:</para>
+
+              <para><programlisting>sudo /opt/HPCCSystems/sbin/keygen.sh</programlisting></para>
+            </listitem>
+
+            <listitem>
+              <para>Distribute the keys to all nodes. From the <emphasis
+              role="bold">/home/hpcc/.ssh</emphasis> directory, copy these
+              three files to the same directory (<emphasis
+              role="bold">/home/hpcc/.ssh</emphasis>) on each node:</para>
+
+              <itemizedlist spacing="compact">
+                <listitem>
+                  <para><emphasis role="bold">id_rsa</emphasis></para>
+                </listitem>
+
+                <listitem>
+                  <para><emphasis role="bold">id_rsa.pub</emphasis></para>
+                </listitem>
+
+                <listitem>
+                  <para><emphasis
+                  role="bold">authorized_keys</emphasis></para>
+                </listitem>
+              </itemizedlist>
+
+              <para>Make sure that files retain permissions when they are
+              distributed. These keys need to be owned by the user "<emphasis
+              role="bold">hpcc</emphasis>".</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="workstation-requirements">
+      <title>User Workstation Requirements</title>
+
+      <itemizedlist spacing="compact">
+        <listitem>
+          <para>Running the HPCC platform requires communication from your
+          user workstation with a browser to the HPCC. You will use it to
+          access ECL Watch—a Web-based interface to your HPCC system. ECL
+          Watch enables you to examine and manage many aspects of the HPCC and
+          allows you to see information about jobs you run, data files, and
+          system metrics.</para>
+
+          <para>Use one of the supported web browsers with Javascript
+          enabled.</para>
+
+          <itemizedlist spacing="compact">
+            <listitem>
+              <para>Internet Explorer® 9 (or later)</para>
+            </listitem>
+
+            <listitem>
+              <para>Firefox™ 3.0 (or later.)</para>
+
+              <!--***Add additional browsers when approved-->
+            </listitem>
+
+            <listitem>
+              <para>Google Chrome 10 (or later)</para>
+            </listitem>
+          </itemizedlist>
+
+          <para>If browser security is set to <emphasis
+          role="bold">High</emphasis>, you should add ECLWatch as a Trusted
+          Site to allow Javascript execution.</para>
+
+          <!--note: window users may want to use the 32 bit graph control***-->
+        </listitem>
+
+        <listitem>
+          <para>Install the ECL IDE</para>
+
+          <para>The ECL IDE (Integrated Development Environment) is the tool
+          used to create queries into your data and ECL files with which to
+          build your queries.</para>
+
+          <para>Download the ECL IDE from the HPCC Systems web portal.
+          http://hpccsystems.com</para>
+
+          <para>You can find the ECL IDE and Client Tools on this page using
+          the following URL:</para>
+
+          <para><ulink
+          url="http://hpccsystems.com/download/free-community-edition/ecl-ide">http://hpccsystems.com/download/free-community-edition/ecl-ide</ulink></para>
+
+          <para>The ECL IDE was designed to run on Windows machines. See the
+          appendix for instructions on running on Linux workstations using
+          Wine.</para>
+        </listitem>
+
+        <listitem>
+          <para>Microsoft VS 2008 C++ compiler (either Express or Professional
+          edition). This is needed if you are running Windows and want to
+          compile queries locally. This allows you to compile and run ECL code
+          on your Windows workstation.</para>
+        </listitem>
+
+        <listitem>
+          <para>GCC. This is needed if you are running under Linux and want to
+          compile queries locally on a standalone Linux machine, (although it
+          may already be available to you since it usually comes with the
+          operating system).</para>
+        </listitem>
+      </itemizedlist>
+    </sect1>
+  </chapter>
+</book>

+ 23 - 527
docs/Installing_and_RunningTheHPCCPlatform/Installing_and_RunningTheHPCCPlatform.xml

@@ -29,17 +29,15 @@
       <para>LexisNexis and the Knowledge Burst logo are registered trademarks
       <para>LexisNexis and the Knowledge Burst logo are registered trademarks
       of Reed Elsevier Properties Inc., used under license.</para>
       of Reed Elsevier Properties Inc., used under license.</para>
 
 
-      <para>HPCC Systems is a registered trademark of LexisNexis Risk Data
-      Management Inc.</para>
+      <para>HPCC Systems<superscript>®</superscript> is a registered trademark
+      of LexisNexis Risk Data Management Inc.</para>
 
 
       <para>Other products, logos, and services may be trademarks or
       <para>Other products, logos, and services may be trademarks or
-      registered trademarks of their respective companies. A</para>
+      registered trademarks of their respective companies. All names and
+      example data used in this manual are fictitious. Any similarity to
+      actual persons, living or dead, is purely coincidental.</para>
 
 
-      <para>ll names and example data used in this manual are fictitious. Any
-      similarity to actual persons, living or dead, is purely
-      coincidental.</para>
-
-      <para></para>
+      <para> </para>
     </legalnotice>
     </legalnotice>
 
 
     <xi:include href="common/Version.xml" xpointer="FooterInfo"
     <xi:include href="common/Version.xml" xpointer="FooterInfo"
@@ -48,7 +46,7 @@
     <xi:include href="common/Version.xml" xpointer="DateVer"
     <xi:include href="common/Version.xml" xpointer="DateVer"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 
 
-    <corpname>HPCC Systems</corpname>
+    <corpname>HPCC Systems<superscript>®</superscript></corpname>
 
 
     <xi:include href="common/Version.xml" xpointer="Copyright"
     <xi:include href="common/Version.xml" xpointer="Copyright"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
@@ -211,512 +209,9 @@
     </sect1>
     </sect1>
   </chapter>
   </chapter>
 
 
-  <chapter id="Hardware-and-Software-Requirements">
-    <title>Hardware and Software Requirements</title>
-
-    <para>The following section describes the various hardware and software
-    required in order to run the HPCC.</para>
-
-    <sect1 id="Switch" role="nobrk">
-      <title>Network Switch</title>
-
-      <para>A significant component of HPCC is the infrastructure it runs on,
-      specifically the switch.</para>
-
-      <sect2 id="Switch-Requirements">
-        <title>Switch requirements</title>
-
-        <itemizedlist spacing="compact">
-          <listitem>
-            <para>Sufficient number of ports to allow all nodes to be
-            connected directly to it;</para>
-          </listitem>
-
-          <listitem>
-            <para>IGMP v.2 support </para>
-          </listitem>
-
-          <listitem>
-            <para>IGMP snooping support</para>
-          </listitem>
-        </itemizedlist>
-
-        <para><emphasis role="bold">Small:</emphasis> For a very small test
-        system, almost any gigabit switch will suffice. These are inexpensive
-        and readily available in six to 20-port models.</para>
-
-        <para><figure>
-            <title>1 GigE 8-port Switch</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/DHSMC8508T.jpg" vendor="hardwareSS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-
-        <para><emphasis role="bold">Medium</emphasis>: For medium sized (10-48
-        node) systems, we recommend using a Force10 s25, s50, s55, or s60
-        switch</para>
-
-        <para><figure>
-            <title>Force10 S55 48-port Network Switch</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/s55.jpg"
-                           vendor="hardwareSS,force10SS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-
-        <para><?hard-pagebreak ?><emphasis role="bold">Large</emphasis>: For
-        large (48-350 node) system, the Force10 c150 or c300 are good
-        choices.</para>
-
-        <para><figure>
-            <title>Force 10 c150</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/c150-lg.jpg"
-                           vendor="hardwareSS,force10SS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-
-        <para><?hard-pagebreak ?><emphasis role="bold">Very Large</emphasis>:
-        For very large (more than 300 nodes) system, the Force10 e600 or e1200
-        are good choices.</para>
-
-        <para><figure>
-            <title>Force 10 e600 and e1200</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/Force10_ExaScaleE6001200.jpg"
-                           vendor="hardwareSS,force10SS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-      </sect2>
-
-      <sect2 id="Switch-additional-recommend">
-        <title>Switch additional recommended features</title>
-
-        <para><itemizedlist mark="square" spacing="compact">
-            <listitem>
-              <para>Non-blocking backplane</para>
-            </listitem>
-
-            <listitem>
-              <para>Low latency (under 35usec)</para>
-            </listitem>
-
-            <listitem>
-              <para>Layer 3 switching</para>
-            </listitem>
-
-            <listitem>
-              <para>Managed and monitored (SNMP is a plus)</para>
-            </listitem>
-
-            <listitem>
-              <para>Port channel (port bundling) support</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-    </sect1>
-
-    <sect1>
-      <title>Load Balancer</title>
-
-      <para>In order to take full advantage of a Roxie cluster, a load
-      balancer is required. Each Roxie Node is capable of receiving requests
-      and returning results. Therefore, a load balancer distributes the load
-      in an efficient manner to get the best performance and avoid a potential
-      bottleneck.</para>
-
-      <para>We recommend the Web Accelerator product line from F5 Networks.
-      See <ulink
-      url="http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf">http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf
-      </ulink> for more information<phrase></phrase>.</para>
-
-      <para><figure>
-          <title>F5 Load Balancers</title>
-
-          <mediaobject>
-            <imageobject>
-              <imagedata fileref="images/IR-009a.jpg" vendor="hardwareSS,F5SS" />
-            </imageobject>
-          </mediaobject>
-        </figure></para>
-
-      <sect2>
-        <title>Load Balancer Requirements</title>
-
-        <sect3>
-          <title>Minimum requirements</title>
-
-          <para><itemizedlist spacing="compact">
-              <listitem>
-                <para>Throughput: 1Gbps Gigabit</para>
-              </listitem>
-
-              <listitem>
-                <para>Ethernet ports: 2</para>
-              </listitem>
-
-              <listitem>
-                <para>Balancing Strategy: Round Robin</para>
-              </listitem>
-            </itemizedlist></para>
-        </sect3>
-
-        <sect3>
-          <title>Standard requirements</title>
-
-          <para><itemizedlist spacing="compact">
-              <listitem>
-                <para>Throughput: 8Gbps</para>
-              </listitem>
-
-              <listitem>
-                <para>Gigabit Ethernet ports: 4</para>
-              </listitem>
-
-              <listitem>
-                <para>Balancing Strategy: Flexible (F5 iRules or
-                equivalent)</para>
-              </listitem>
-            </itemizedlist></para>
-        </sect3>
-
-        <sect3 role="brk">
-          <title>Recommended capabilities</title>
-
-          <para><itemizedlist spacing="compact">
-              <listitem>
-                <para>Ability to provide cyclic load rotation (not load
-                balancing).</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to forward SOAP/HTTP traffic</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to provide triangulation/n-path routing (traffic
-                incoming through the load balancer to the node, replies sent
-                out the via the switch).</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to treat a cluster of nodes as a single entity
-                (for load balancing clusters not nodes)</para>
-
-                <para>or</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to stack or tier the load balancers for multiple
-                levels if not.</para>
-              </listitem>
-            </itemizedlist></para>
-        </sect3>
-      </sect2>
-    </sect1>
-
-    <sect1 id="Nodes-Hardware">
-      <title>Nodes-Hardware</title>
-
-      <para>The HPCC can run as a single node system or a multi node
-      system.</para>
-
-      <para>These hardware recommendations are intended for a multi-node
-      production system. A test system can use less stringent specifications.
-      Also, while it is easier to manage a system where all nodes are
-      identical, this is not required. However, it is important to note that
-      your system will only run as fast as its slowest node.</para>
-
-      <sect2 id="Node-Min-requirements">
-        <title>Node minimum requirements</title>
-
-        <itemizedlist mark="square" spacing="compact">
-          <listitem>
-            <para>Pentium 4 or newer CPU</para>
-          </listitem>
-
-          <listitem>
-            <para>32-bit</para>
-          </listitem>
-
-          <listitem>
-            <para>1GB RAM per slave</para>
-
-            <para>(Note: If you configure more than 1 slave per node, memory
-            is shared. For example, if you want 2 slaves per node with each
-            having 4 GB of memory, the server would need 8 GB total.)</para>
-          </listitem>
-
-          <listitem>
-            <para>One Hard Drive (with sufficient free space to handle the
-            size of the data you plan to process) or Network Attached
-            Storage.</para>
-          </listitem>
-
-          <listitem>
-            <para>1 GigE network interface</para>
-          </listitem>
-        </itemizedlist>
-      </sect2>
-
-      <sect2 id="Node-recommended-specifications">
-        <title>Node recommended specifications</title>
-
-        <para><itemizedlist mark="square" spacing="compact">
-            <listitem>
-              <para>Nehalem Core i7 CPU</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit</para>
-            </listitem>
-
-            <listitem>
-              <para>4 GB RAM (or more) per slave</para>
-            </listitem>
-
-            <listitem>
-              <para>1 GigE network interface</para>
-            </listitem>
-
-            <listitem>
-              <para>PXE boot support in BIOS</para>
-
-              <para>PXE boot support is recommended so you can manage OS,
-              packages, and other settings when you have a large system</para>
-            </listitem>
-
-            <listitem>
-              <para>Optionally IPMI and KVM over IP support</para>
-
-              <para><emphasis role="bold">For Roxie nodes:</emphasis></para>
-            </listitem>
-
-            <listitem>
-              <para>Two 10K RPM (or faster) SAS Hard Drives</para>
-
-              <para>Typically, drive speed is the priority for Roxie
-              nodes</para>
-
-              <para><emphasis role="bold">For Thor nodes:</emphasis></para>
-            </listitem>
-
-            <listitem>
-              <para>Two 7200K RPM (or faster) SATA Hard Drives (Thor)</para>
-            </listitem>
-
-            <listitem>
-              <para>Optionally 3 or more hard drives can be configured in a
-              RAID 5 container for increased performance and
-              availability</para>
-
-              <para>Typically, drive capacity is the priority for Thor
-              nodes</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-    </sect1>
-
-    <sect1 id="Nodes-Software">
-      <title>Nodes-Software</title>
-
-      <para>All nodes must have the identical operating systems. We recommend
-      all nodes have identical BIOS settings, and packages installed. This
-      significantly reduces variables when troubleshooting. It is easier to
-      manage a system where all nodes are identical, but this is not
-      required.</para>
-
-      <sect2 id="Operating-System-Requirements">
-        <title>Operating System Requirements</title>
-
-        <para>Binary packages are available for the following:</para>
-
-        <para><itemizedlist mark="square" spacing="compact">
-            <listitem>
-              <para>64-bit CentOS 5</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit CentOS 6</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit RedHat Enterprise 5</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit RedHat Enterprise 6</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit Ubuntu 12.04 (LTS)</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit Ubuntu 13.10</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit Ubuntu 14.04 (LTS)</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-
-      <sect2 id="configuration-manager">
-        <title>Dependencies</title>
-
-        <para>Installing HPCC on your system depends on having required
-        component packages installed on the system. The required dependencies
-        can vary depending on your platform. In some cases the dependencies
-        are included in the installation packages. In other instances the
-        installation may fail, and the package management utility will prompt
-        you for the required packages. Installation of these packages can vary
-        depending on your platform. For details of the specific installation
-        commands for obtaining and installing these packages, see the commands
-        specific to your Operating System. <variablelist>
-            <varlistentry>
-              <term>Note:</term>
-
-              <listitem>
-                <para>For CentOS installations, the Fedora EPEL repository is
-                required.</para>
-              </listitem>
-            </varlistentry>
-          </variablelist></para>
-      </sect2>
-
-      <sect2 id="SSH_Keys" role="brk">
-        <title>SSH Keys</title>
-
-        <para>The HPCC components use ssh keys to authenticate each other.
-        This is required for communication between nodes. A script to generate
-        keys has been provided .You should run that script and distribute the
-        public and private keys to all nodes after you have installed the
-        packages on all nodes, but before you configure a multi-node
-        HPCC.</para>
-
-        <para><itemizedlist spacing="compact">
-            <listitem>
-              <para>As root (or sudo as shown below), generate a new key using
-              this command:</para>
-
-              <para><programlisting>sudo /opt/HPCCSystems/sbin/keygen.sh</programlisting></para>
-            </listitem>
-
-            <listitem>
-              <para>Distribute the keys to all nodes. From the <emphasis
-              role="bold">/home/hpcc/.ssh</emphasis> directory, copy these
-              three files to the same directory (<emphasis
-              role="bold">/home/hpcc/.ssh</emphasis>) on each node:</para>
-
-              <itemizedlist spacing="compact">
-                <listitem>
-                  <para><emphasis role="bold">id_rsa</emphasis></para>
-                </listitem>
-
-                <listitem>
-                  <para><emphasis role="bold">id_rsa.pub</emphasis></para>
-                </listitem>
-
-                <listitem>
-                  <para><emphasis
-                  role="bold">authorized_keys</emphasis></para>
-                </listitem>
-              </itemizedlist>
-
-              <para>Make sure that files retain permissions when they are
-              distributed. These keys need to be owned by the user "<emphasis
-              role="bold">hpcc</emphasis>".</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-    </sect1>
-
-    <sect1 id="workstation-requirements">
-      <title>User Workstation Requirements</title>
-
-      <itemizedlist spacing="compact">
-        <listitem>
-          <para>Running the HPCC platform requires communication from your
-          user workstation with a browser to the HPCC. You will use it to
-          access ECL Watch—a Web-based interface to your HPCC system. ECL
-          Watch enables you to examine and manage many aspects of the HPCC and
-          allows you to see information about jobs you run, data files, and
-          system metrics.</para>
-
-          <para>Use one of the supported web browsers with Javascript
-          enabled.</para>
-
-          <itemizedlist spacing="compact">
-            <listitem>
-              <para>Internet Explorer® 8 (or later)</para>
-            </listitem>
-
-            <listitem>
-              <para>Firefox™ 3.0 (or later.)</para>
-
-              <!--***Add additional browsers when approved-->
-            </listitem>
-
-            <listitem>
-              <para>Google Chrome 10 (or later)</para>
-            </listitem>
-          </itemizedlist>
-
-          <para>If browser security is set to <emphasis
-          role="bold">High</emphasis>, you should add ECLWatch as a Trusted
-          Site to allow Javascript execution.</para>
-        </listitem>
-
-        <listitem>
-          <para>Install the ECL IDE</para>
-
-          <para>The ECL IDE (Integrated Development Environment) is the tool
-          used to create queries into your data and ECL files with which to
-          build your queries.</para>
-
-          <para>From the ECLWatch web page, download the Windows install set.
-          If the link is not visible, either follow the link to the HPCC
-          System's portal or install the Optional Packages.</para>
-
-          <para>You can reach this page using the following URL:</para>
-
-          <para>http://nnn.nnn.nnn.nnn:8010, where nnn.nnn.nnn.nnn is your
-          node's IP address.</para>
-
-          <para>The ECL IDE was designed to run on Windows machines. See the
-          appendix for instructions on running on Linux workstations using
-          Wine.</para>
-        </listitem>
-
-        <listitem>
-          <para>Microsoft VS 2008 C++ compiler (either Express or Professional
-          edition). This is needed if you are running Windows and want to
-          compile queries locally. This allows you to compile and run ECL code
-          on your Windows workstation.</para>
-        </listitem>
-
-        <listitem>
-          <para>GCC. This is needed if you are running under Linux and want to
-          compile queries locally on a standalone Linux machine, (although it
-          may already be available to you since it usually comes with the
-          operating system).</para>
-        </listitem>
-      </itemizedlist>
-    </sect1>
-  </chapter>
+  <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+              xpointer="Hardware-and-Software-Chapter"
+              xmlns:xi="http://www.w3.org/2001/XInclude" />
 
 
   <chapter id="HPCC-installation-and-startup">
   <chapter id="HPCC-installation-and-startup">
     <title>HPCC Installation and Startup</title>
     <title>HPCC Installation and Startup</title>
@@ -767,7 +262,7 @@
         packages will fail to install if their dependencies are missing from
         packages will fail to install if their dependencies are missing from
         the target system.</para>
         the target system.</para>
 
 
-        <para>Packages are available from the HPCC Systems website: <ulink
+        <para>Packages are available from the HPCC Systems<superscript>®</superscript> website: <ulink
         url="http://hpccsystems.com/download/free-community-edition">http://hpccsystems.com/download/free-community-edition</ulink></para>
         url="http://hpccsystems.com/download/free-community-edition">http://hpccsystems.com/download/free-community-edition</ulink></para>
 
 
         <para>To install the package, follow the appropriate installation
         <para>To install the package, follow the appropriate installation
@@ -1278,7 +773,7 @@
 
 
         <para>This section details reconfiguring a system to use multiple
         <para>This section details reconfiguring a system to use multiple
         nodes. Before you start this section, you must have already downloaded
         nodes. Before you start this section, you must have already downloaded
-        the correct packages for your distro from the HPCC Systems website:
+        the correct packages for your distro from the HPCC Systems<superscript>®</superscript> website:
         <ulink
         <ulink
         url="http://hpccsystems.com/download/free-community-edition">http://hpccsystems.com/download/free-community-edition</ulink>.</para>
         url="http://hpccsystems.com/download/free-community-edition">http://hpccsystems.com/download/free-community-edition</ulink>.</para>
 
 
@@ -2573,7 +2068,7 @@ OUTPUT(ValidWords)
         </listitem>
         </listitem>
       </itemizedlist></para>
       </itemizedlist></para>
 
 
-    <para>The HPCC Systems Portal is also a valuable resource for more
+    <para>The HPCC Systems<superscript>®</superscript> Portal is also a valuable resource for more
     information including:</para>
     information including:</para>
 
 
     <itemizedlist spacing="compact">
     <itemizedlist spacing="compact">
@@ -3372,8 +2867,9 @@ sudo /sbin/service hpcc-init -c esp start
           </listitem>
           </listitem>
 
 
           <listitem>
           <listitem>
-            <para>Start the HPCC Systems platform (restart if it is already
-            running) in order to read the new configuration.</para>
+            <para>Start the HPCC Systems<superscript>®</superscript> platform
+            (restart if it is already running) in order to read the new
+            configuration.</para>
 
 
             <para>For example :</para>
             <para>For example :</para>
 
 
@@ -3394,9 +2890,9 @@ sudo /sbin/service hpcc-init -c esp start
 
 
             <para>Test the Java integration.</para>
             <para>Test the Java integration.</para>
 
 
-            <para>The HPCC Systems platform comes with a Java example class.
-            You can execute some Java code either in your ECL IDE or the ECL
-            Playground.</para>
+            <para>The HPCC Systems<superscript>®</superscript> platform comes
+            with a Java example class. You can execute some Java code either
+            in your ECL IDE or the ECL Playground.</para>
 
 
             <para>For example:</para>
             <para>For example:</para>
 
 
@@ -3425,7 +2921,7 @@ add1(10);
       <sect2 id="Add_On_Javascript" role="brk">
       <sect2 id="Add_On_Javascript" role="brk">
         <title>JavaScript</title>
         <title>JavaScript</title>
 
 
-        <para>To enable JavaScript support within the HPCC Systems
+        <para>To enable JavaScript support within the HPCC Systems<superscript>®</superscript>
         Platform:</para>
         Platform:</para>
 
 
         <orderedlist>
         <orderedlist>
@@ -3490,8 +2986,8 @@ add1(10);
       <sect2 id="Add_Python_support" role="brk">
       <sect2 id="Add_Python_support" role="brk">
         <title>Python</title>
         <title>Python</title>
 
 
-        <para>To enable Python support within the HPCC Systems
-        Platform:</para>
+        <para>To enable Python support within the HPCC
+        Systems<superscript>®</superscript> Platform:</para>
 
 
         <orderedlist>
         <orderedlist>
           <listitem>
           <listitem>
@@ -3548,7 +3044,7 @@ split_words('Once upon a time');
       <sect2 id="R" role="brk">
       <sect2 id="R" role="brk">
         <title>R</title>
         <title>R</title>
 
 
-        <para>To enable R support within The HPCC Systems Platform:</para>
+        <para>To enable R support within The HPCC Systems<superscript>®</superscript> Platform:</para>
 
 
         <orderedlist>
         <orderedlist>
           <listitem>
           <listitem>

+ 1 - 1
esp/src/eclwatch/templates/DFUWUDetailsWidget.html

@@ -69,7 +69,7 @@
                                 </li>
                                 </li>
                                 <li>
                                 <li>
                                     <label for="${id}PercentDone">${i18n.PercentDone}:</label>
                                     <label for="${id}PercentDone">${i18n.PercentDone}:</label>
-                                    <div id="${id}PercentDone" style="width: 190px" data-dojo-props="maximum:10" data-dojo-type="dijit.ProgressBar"></div>
+                                    <div id="${id}PercentDone" style="width: 190px" data-dojo-props="maximum:100" data-dojo-type="dijit.ProgressBar"></div>
                                 </li>
                                 </li>
                                 <li>
                                 <li>
                                     <label for="${id}ProgressMessage">${i18n.ProgressMessage}:</label>
                                     <label for="${id}ProgressMessage">${i18n.ProgressMessage}:</label>

+ 48 - 6
initfiles/sbin/complete-uninstall.sh.in

@@ -39,6 +39,35 @@ message() {
 MESSAGE_MARKER
 MESSAGE_MARKER
 }
 }
 
 
+canonicalize_path() {
+    # canonicalize path argument by removing trailing slashes
+    # for test -h and readlink to work properly
+    local dir=${1}
+    if [ -z "${dir}" ] ; then
+        echo "${dir}"
+        return 0
+    fi
+    echo "${dir}" | sed 's/\/*$//'
+    return 0
+}
+
+removedir() {
+    local dir=$(canonicalize_path ${1})
+    # echo "canonicalized dir = ${dir}"
+    if [ -z "${dir}" ] ; then
+        return 0
+    fi
+    if [ ! -d "${dir}" ] ; then
+        return 0
+    fi
+    if [ -h "${dir}" ] ; then
+        # echo "${dir} is a soft link"
+        find ${dir}/ -depth -mindepth 1 -exec rm -rf {} \;
+    else
+        # echo "${dir} is not a soft link"
+        rm -rf ${dir}
+    fi
+}
 
 
 force=0
 force=0
 leaveenv=0
 leaveenv=0
@@ -62,6 +91,13 @@ done
 
 
 set_environmentvars
 set_environmentvars
 
 
+mklink=""
+lpath=$(canonicalize_path ${path})
+if [ -n "${lpath}" -a -h "${lpath}" ] ; then
+    mklink=$(readlink "${lpath}")
+    # echo "\"${lpath}\" is a soft-link to \"${mklink}\""
+fi
+
 if [ -e /etc/debian_version ]; then
 if [ -e /etc/debian_version ]; then
     echo "Removing DEB"
     echo "Removing DEB"
     if [ $force -eq 0 ]; then
     if [ $force -eq 0 ]; then
@@ -88,24 +124,30 @@ elif [ -e /etc/redhat-release -o -e /etc/SuSE-release ]; then
 fi
 fi
 
 
 echo "Removing Directory - ${path}"
 echo "Removing Directory - ${path}"
-rm -rf ${path}
+removedir ${path}
+
+if [ -n "${mklink}" -a -n "${lpath}" ] ; then
+    # echo "recreating soft-link"
+    ln -s "${mklink}" "${lpath}"
+    removedir ${lpath}
+fi
 
 
 if [ $leaveenv -eq 0 ]; then
 if [ $leaveenv -eq 0 ]; then
     echo "Removing Directory - ${configs}"
     echo "Removing Directory - ${configs}"
-    rm -rf ${configs}
+    removedir ${configs}
 fi
 fi
 
 
 echo "Removing Directory - ${lock}"
 echo "Removing Directory - ${lock}"
-rm -rf ${lock}
+removedir ${lock}
 
 
 echo "Removing Directory - ${log}"
 echo "Removing Directory - ${log}"
-rm -rf ${log}
+removedir ${log}
 
 
 echo "Removing Directory - ${pid}"
 echo "Removing Directory - ${pid}"
-rm -rf ${pid}
+removedir ${pid}
 
 
 echo "Removing Directory - ${runtime}"
 echo "Removing Directory - ${runtime}"
-rm -rf ${runtime}
+removedir ${runtime}
 
 
 echo "Removing user - ${user}"
 echo "Removing user - ${user}"
 if [ -e /usr/sbin/userdel ]; then
 if [ -e /usr/sbin/userdel ]; then

+ 34 - 23
plugins/fileservices/fileservices.cpp

@@ -1030,6 +1030,28 @@ ReplaceSuperFile(const varstring lsuperfn,const varstring lfn,const varstring by
 FinishSuperFileTransaction(boolean rollback=false);
 FinishSuperFileTransaction(boolean rollback=false);
 */
 */
 
 
+class CImplicitSuperTransaction
+{
+    IDistributedFileTransaction *transaction;
+public:
+    CImplicitSuperTransaction(IDistributedFileTransaction *_transaction)
+    {
+        if (!_transaction->active()) // then created implicitly
+        {
+            transaction = _transaction;
+            transaction->start();
+        }
+        else
+            transaction = NULL;
+    }
+    ~CImplicitSuperTransaction()
+    {
+        if (transaction)
+            transaction->commit();
+    }
+};
+
+
 static bool lookupSuperFile(ICodeContext *ctx, const char *lsuperfn, Owned<IDistributedSuperFile> &file, bool throwerr, StringBuffer &lsfn, bool allowforeign, bool cacheFiles=false)
 static bool lookupSuperFile(ICodeContext *ctx, const char *lsuperfn, Owned<IDistributedSuperFile> &file, bool throwerr, StringBuffer &lsfn, bool allowforeign, bool cacheFiles=false)
 {
 {
     lsfn.clear();
     lsfn.clear();
@@ -1122,6 +1144,7 @@ FILESERVICES_API unsigned FILESERVICES_CALL fsGetSuperFileSubCount(ICodeContext
     Owned<ISimpleSuperFileEnquiry> enq = getSimpleSuperFileEnquiry(ctx, lsuperfn);
     Owned<ISimpleSuperFileEnquiry> enq = getSimpleSuperFileEnquiry(ctx, lsuperfn);
     if (enq)
     if (enq)
         return enq->numSubFiles();
         return enq->numSubFiles();
+    CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
     Owned<IDistributedSuperFile> file;
     Owned<IDistributedSuperFile> file;
     StringBuffer lsfn;
     StringBuffer lsfn;
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -1139,6 +1162,7 @@ FILESERVICES_API char *  FILESERVICES_CALL fsGetSuperFileSubName(ICodeContext *c
             return CTXSTRDUP(parentCtx, "");
             return CTXSTRDUP(parentCtx, "");
         return ret.detach();
         return ret.detach();
     }
     }
+    CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
     Owned<IDistributedSuperFile> file;
     Owned<IDistributedSuperFile> file;
     StringBuffer lsfn;
     StringBuffer lsfn;
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -1157,6 +1181,7 @@ FILESERVICES_API unsigned FILESERVICES_CALL fsFindSuperFileSubName(ICodeContext
         unsigned n = enq->findSubName(lfn.str());
         unsigned n = enq->findSubName(lfn.str());
         return (n==NotFound)?0:n+1;
         return (n==NotFound)?0:n+1;
     }
     }
+    CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
     Owned<IDistributedSuperFile> file;
     Owned<IDistributedSuperFile> file;
     StringBuffer lsfn;
     StringBuffer lsfn;
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -1190,27 +1215,6 @@ FILESERVICES_API void FILESERVICES_CALL fsAddSuperFile(IGlobalCodeContext *gctx,
 }
 }
 
 
 
 
-class CImplicitSuperTransaction
-{
-    IDistributedFileTransaction *transaction;
-public:
-    CImplicitSuperTransaction(IDistributedFileTransaction *_transaction)
-    {
-        if (!_transaction->active()) // then created implicitly
-        {
-            transaction = _transaction;
-            transaction->start();
-        }
-        else
-            transaction = NULL;
-    }
-    ~CImplicitSuperTransaction()
-    {
-        if (transaction)
-            transaction->commit();
-    }
-};
-
 FILESERVICES_API void FILESERVICES_CALL fslAddSuperFile(ICodeContext *ctx, const char *lsuperfn,const char *_lfn,unsigned atpos,bool addcontents, bool strict)
 FILESERVICES_API void FILESERVICES_CALL fslAddSuperFile(ICodeContext *ctx, const char *lsuperfn,const char *_lfn,unsigned atpos,bool addcontents, bool strict)
 {
 {
     Owned<IDistributedSuperFile> file;
     Owned<IDistributedSuperFile> file;
@@ -1708,6 +1712,7 @@ FILESERVICES_API void FILESERVICES_CALL fsSuperFileContents(ICodeContext *ctx, s
         }
         }
     }
     }
     else {
     else {
+        CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
         Owned<IDistributedSuperFile> file;
         Owned<IDistributedSuperFile> file;
         StringBuffer lsfn;
         StringBuffer lsfn;
         lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
         lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -2167,12 +2172,18 @@ FILESERVICES_API void  FILESERVICES_CALL fsDeleteExternalFile(ICodeContext * ctx
     AuditMessage(ctx,"DeleteExternalFile",path);
     AuditMessage(ctx,"DeleteExternalFile",path);
 }
 }
 
 
-FILESERVICES_API void  FILESERVICES_CALL fsCreateExternalDirectory(ICodeContext * ctx,const char *location,const char *path)
+FILESERVICES_API void  FILESERVICES_CALL fsCreateExternalDirectory(ICodeContext * ctx,const char *location,const char *_path)
 {
 {
     SocketEndpoint ep(location);
     SocketEndpoint ep(location);
     if (ep.isNull())
     if (ep.isNull())
-        throw MakeStringException(-1,"fsCreateExternalDirectory: Cannot resolve location %s",location);
+        throw MakeStringException(-1, "fsCreateExternalDirectory: Cannot resolve location %s",location);
     CDfsLogicalFileName lfn;
     CDfsLogicalFileName lfn;
+    StringBuffer path(_path);
+    if (0 == path.length())
+        throw MakeStringException(-1, "fsCreateExternalDirectory: empty directory");
+    // remove trailing path separator if present to make it look like a regular LFN after lfn.setExternal
+    if (isPathSepChar(path.charAt(path.length()-1)))
+        path.remove(path.length()-1, 1);
     lfn.setExternal(location,path);
     lfn.setExternal(location,path);
     checkExternalFileRights(ctx,lfn,false,true);
     checkExternalFileRights(ctx,lfn,false,true);
     RemoteFilename rfn;
     RemoteFilename rfn;