ソースを参照

Merge branch 'candidate-5.2.0'

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 10 年 前
コミット
0b258b2209

+ 226 - 162
docs/HPCCSystemAdmin/HPCCSystemAdministratorsGuide.xml

@@ -29,8 +29,8 @@
       <para>LexisNexis and the Knowledge Burst logo are registered trademarks
       of Reed Elsevier Properties Inc., used under license.</para>
 
-      <para>HPCC Systems is a registered trademark of LexisNexis Risk Data
-      Management Inc.</para>
+      <para>HPCC Systems<superscript>®</superscript> is a registered trademark
+      of LexisNexis Risk Data Management Inc.</para>
 
       <para>Other products, logos, and services may be trademarks or
       registered trademarks of their respective companies.</para>
@@ -48,7 +48,7 @@
     <xi:include href="common/Version.xml" xpointer="DateVer"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 
-    <corpname>HPCC Systems</corpname>
+    <corpname>HPCC Systems<superscript>®</superscript></corpname>
 
     <xi:include href="common/Version.xml" xpointer="Copyright"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
@@ -61,7 +61,8 @@
   </bookinfo>
 
   <chapter>
-    <title>Introducing HPCC Systems Administraton</title>
+    <title>Introducing HPCC Systems<superscript>®</superscript>
+    Administraton</title>
 
     <sect1 id="HPCC_SysAdminIntro" role="nobrk">
       <title>Introduction</title>
@@ -81,9 +82,9 @@
     <sect1 id="HPCC_Architectural_Overview">
       <title>Architectural Overview</title>
 
-      <para>An HPCC Systems Platform consists of the following components:
-      Thor, Roxie, ESP Server, Dali, Sasha, DFU Server, and ECLCC Server. LDAP
-      security is optionally available.</para>
+      <para>An HPCC Systems<superscript>®</superscript> Platform consists of
+      the following components: Thor, Roxie, ESP Server, Dali, Sasha, DFU
+      Server, and ECLCC Server. LDAP security is optionally available.</para>
 
       <para><figure>
           <title>HPCC Architectural Diagram</title>
@@ -153,11 +154,12 @@
         <sect3>
           <title>ECL Agent</title>
 
-          <para>The ECL Agent can act as a single-node cluster. That is called
-          spawning an hThor cluster. hThor is used to process simple jobs that
-          would otherwise be an inefficient use of Thor. For simple tasks, the
-          ECL Agent will make a determination and perform the execution itself
-          by acting as an hThor cluster. <figure>
+          <para>The ECL Agent's primary function is to send the job to execute
+          on the appropriate cluster. The ECL Agent can act as a single-node
+          cluster. That is called spawning an hThor cluster. hThor is used to
+          process simple jobs that would otherwise be an inefficient use of
+          Thor. For simple tasks, the ECL Agent will make a determination and
+          perform the execution itself by acting as an hThor cluster. <figure>
               <title>Clusters</title>
 
               <mediaobject>
@@ -236,7 +238,7 @@
           <para>When you submit workunits for execution on Thor, they are
           first converted to executable code by the ECLCC Server.</para>
 
-          <para>When you submit a Workunit to Roxie, code is compiled and
+          <para>When you submit a workunit to Roxie, code is compiled and
           later published to the Roxie cluster, where it is available to
           execute multiple times.</para>
 
@@ -284,8 +286,9 @@
               </listitem>
             </itemizedlist></para>
 
-          <para>Examples of protocols supported by the ESP Server framework
-          include: HTTP, HTTPS, SOAP, and JSON.</para>
+          <para>The ESP Server supports both XML and JSON Formats.</para>
+
+          <!--formerly : protocols - HTTP, HTTPS, SOAP, and JSON - -->
         </sect3>
 
         <sect3>
@@ -293,7 +296,7 @@
 
           <para>You can incorporate a Lightweight Directory Access Protocol
           (LDAP) server to work with Dali to enforce the security restrictions
-          for data, file, workunit scopes, and feature access.</para>
+          for file scopes, workunit scopes, and feature access.</para>
 
           <para>When LDAP is configured, you need to authenticate when
           accessing ECL Watch, WsECL, ECL IDE, or any other client tools.
@@ -330,16 +333,16 @@
           to create and execute queries into your data on an HPCC platform
           using Enterprise Control Language (ECL). Eclipse is open-source, and
           multi-platform and it can be used to interface with your data and
-          workunits on HPCC. The ECL plug-in for Eclipse is also open
-          source.</para>
+          workunits on HPCC. The ECL plug-in for Eclipse is also
+          open-source.</para>
         </sect3>
 
         <sect3>
           <title>ECL IDE</title>
 
-          <para>ECL IDE is a full-featured GUI for ECL development providing
-          access to the ECL repository and many of the ECL Watch capabilities.
-          ECL IDE uses various ESP services via SOAP.</para>
+          <para>ECL IDE is a full-featured GUI providing access to your ECL
+          code for ECL development. ECL IDE uses various ESP services via
+          SOAP.</para>
 
           <para>The ECL IDE provides access to ECL Definitions to build your
           queries. These definitions are created by coding an expression that
@@ -370,7 +373,7 @@
               </listitem>
 
               <listitem>
-                <para>See status of all system servers.</para>
+                <para>See the status of all system servers.</para>
               </listitem>
 
               <listitem>
@@ -394,6 +397,9 @@
           provide command line access to functionality provided by the ECL
           Watch web pages. They work by communicating with the corresponding
           ESP service via SOAP.</para>
+
+          <para>See the <emphasis>Client Tools </emphasis>Manual for more
+          details.</para>
         </sect3>
       </sect2>
     </sect1>
@@ -401,12 +407,159 @@
     <!--Inclusion-from-ClientTool-As-Sect1: REMOVED-->
   </chapter>
 
+  <chapter>
+    <title>Hardware and Software Requirements</title>
+
+    <para>This chapter consists of various Hardware and Software requirements
+    that HPCC works well on. HPCC is designed to run on commodity hardware,
+    which makes building and maintaining large scale (petabytes) clusters
+    economically feasible. When planning your cluster hardware, you will need
+    to balance a number of considerations.</para>
+
+    <para>This section provides some insight as to what sort of hardware and
+    infrastructure optimally HPCC works well on. This is not an exclusive
+    comprehensive set of instructions, nor a mandate on what hardware you must
+    have. Consider this as a guide to use when looking to implement or scale
+    your HPCC system. These suggestions should be taken into consideration for
+    your specific enterprise needs.</para>
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="HW-Switch"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="HW-LoadBalancer"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="Nodes-Hardware"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="HPCCSystemAdmin/SA-Mods/SysAdminConfigMod.xml"
+                xpointer="System_sizings"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="Nodes-Software"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+                xpointer="workstation-requirements"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+  </chapter>
+
+  <chapter>
+    <title>Hardware and Component Sizing</title>
+
+    <para>This section provides some insight as to what sort of hardware and
+    infrastructure optimally HPCC works well on. This is not an exclusive
+    comprehensive set of instructions, nor a mandate on what hardware you must
+    have. Consider this as a guide to use when looking to implement or scale
+    your HPCC system. These suggestions should be taken into consideration for
+    your specific enterprise needs.</para>
+
+    <para>HPCC is designed to run on commodity hardware, which makes building
+    and maintaining large scale (petabytes) clusters economically feasible.
+    When planning your cluster hardware, you will need to balance a number of
+    considerations, including fail-over domains and potential performance
+    issues. Hardware planning should include distributing HPCC across multiple
+    physical hosts, such as a cluster. Generally, one type of best practice is
+    to run HPCC processes of a particular type, for example Thor, Roxie, or
+    Dali, on a host configured specifically for that type of process.</para>
+
+    <sect1>
+      <title>Thor Hardware</title>
+
+      <para>Thor slave nodes require a proper balance of CPU, RAM, network,
+      and disk I/O in order to operate most efficiently. A single Thor slave
+      node works optimally when allocated 4 CPU cores, 8GB RAM, 1Gb/sec
+      network and 200MB/sec sequential read/write disk I/O.</para>
+
+      <para>Hardware architecture can provide higher value within a single
+      physical server. In such cases you can use multi-slave to configure your
+      larger physical servers to run multiple Thor slave nodes per physical
+      server.</para>
+
+      <para>It is important to note that HPCC by nature is a parallel
+      processing system and all Thor slave nodes will be exercising at
+      precisely the same time. So when allocating more than one HPCC Thor
+      slave per physical machine assure that each slave meets the recommended
+      requirements.</para>
+
+      <para>For instance, 1 physical server with 48 cores, 96GB RAM, 10Gb/sec
+      network and 2GB/sec sequential I/O would be capable of running ten (10)
+      HPCC Thor slaves at optimal efficiency. The order of optimization for
+      resource usage in a Thor slave node is disk I/O 60%, network 30%, and
+      CPU 10%. Any increase in sequential I/O will have the most impact on
+      speed, followed by improvements in network, followed by improvements in
+      CPU.</para>
+
+      <para>Network architecture is also an important consideration. HPCC Thor
+      nodes work optimally in a streamlined network architecture between all
+      Thor slave processes.</para>
+
+      <para>RAID is recommended and all RAID levels suitable for sequential
+      read/write operations and high availability are acceptable. For example,
+      RAID1, RAID10, RAID5 (preferred), and RAID6.</para>
+    </sect1>
+
+    <sect1>
+      <title>Roxie Hardware Configurations</title>
+
+      <para>HPCC Roxie processes require require a proper, yet different (from
+      Thor) balance of CPU, RAM, network, and disk I/O in order to ensure
+      efficient operations. A single HPCC Roxie node works optimally when
+      allocated 6 or more CPU cores, 24GB RAM, 1Gb/sec network backbone, and
+      400/sec 4k random read IOPS. </para>
+
+      <para>Each HPCC Roxie node is presented two hard drives, each capable of
+      200/sec 4k random seek IOPS. Hard drive recommendations for Roxie
+      efficiency are 15K SAS, or SSD. A good rule of thumb is the more random
+      read IOPS the better and faster your Roxie will perform.</para>
+
+      <para>Running multiple HPCC Roxie nodes on a single physical server is
+      not recommended, except in the cases of virtualization or
+      containers.</para>
+    </sect1>
+
+    <sect1>
+      <title>Dali and Sasha Hardware Configurations</title>
+
+      <para>HPCC Dali processes store cluster metadata in RAM. For optimal
+      efficiency, provide at least 48GB of RAM, 6 or more CPU cores, 1Gb/sec
+      network interface and a high availability disk for a single HPCC Dali.
+      HPCC's Dali processes are one of the few active/passive components.
+      Using standard “swinging disk” clustering is recommended for a high
+      availability setup. For a single HPCC Dali process, any suitable High
+      Availability (HA) RAID level is fine.</para>
+
+      <para>Sasha does not store any data. Sasha reads data from Dali then
+      processes it. Sasha does store archived workunits (WUs) on a disk.
+      Allocating a larger disk for Sasha reduces the amount of housekeeping
+      needed. Since Sasha assists Dali by performing housekeeping, it works
+      best when on its own node. You should avoid putting Sasha and Dali on
+      the same node.</para>
+    </sect1>
+
+    <sect1>
+      <title>Other HPCC Components</title>
+
+      <para>ECL Agent, ECLCC Server, DFU Server, the Thor master, and ECL
+      Watch are administrative processes which are used for supporting
+      components of the main clusters.</para>
+
+      <para>For maximum efficiency you should provide 24GB RAM, 6+ CPU cores,
+      1Gb/sec network and high availability disk(s). These components can be
+      made highly available in an active/active fashion.</para>
+    </sect1>
+  </chapter>
+
   <chapter id="Routine_Maintenance">
     <title>Routine Maintenance</title>
 
-    <para>There is some care required to ensure that your HPCC system keeps
-    operating optimally. The following sections address the routine
-    maintenance tasks for your HPCC system.</para>
+    <para>In order to ensure that your HPCC system keeps running optimally,
+    some care and maintenance is required. The following sections address
+    routine maintenance tasks for your HPCC system.</para>
 
     <!--***SYSTEM HEALTH CHECK UP***TO COME***-->
 
@@ -417,16 +570,16 @@
       essential data. Devise a back up strategy to meet the needs of your
       organization. This section is not meant to replace your current back up
       strategy, instead this section supplements it by outlining special
-      considerations for HPCC Systems.</para>
+      considerations for HPCC Systems<superscript>®</superscript>.</para>
 
       <sect2>
         <title>Back Up Considerations</title>
 
         <para>You probably already have some sort of a back up strategy in
-        place, by adding HPCC Systems into your operating environment there
-        are some additional considerations to be aware of. The following
-        sections discuss back up considerations for the individual HPCC system
-        components.</para>
+        place, by adding HPCC Systems<superscript>®</superscript> into your
+        operating environment there are some additional considerations to be
+        aware of. The following sections discuss back up considerations for
+        the individual HPCC system components.</para>
 
         <sect3>
           <title>Dali</title>
@@ -499,10 +652,10 @@
           <title>Thor</title>
 
           <para>Thor, the data refinery, as one of the critical components of
-          HPCC Systems needs to be backed up. Back up Thor by configuring
-          replication and setting up a nightly back up cron task. Back up Thor
-          on demand before and/or after any node swap or drive swap if you do
-          not have a RAID configured.</para>
+          HPCC Systems<superscript>®</superscript> needs to be backed up. Back
+          up Thor by configuring replication and setting up a nightly back up
+          cron task. Back up Thor on demand before and/or after any node swap
+          or drive swap if you do not have a RAID configured.</para>
 
           <para>A very important part of administering Thor is to check the
           logs to ensure the previous back ups completed successfully.</para>
@@ -934,9 +1087,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       servicing many, diverse applications has been less than optimal.</para>
 
       <para>HPCC makes setting up your Active Directory OU's relatively easy.
-      ESP creates all the OU's for you when it comes up, based on the settings
-      you defined in Configuration Manager. You can then start Dali/ESP and
-      use ECLWatch to add or modify users or groups.</para>
+      ESP creates all the OU's for you when it starts up, based on the
+      settings you defined in Configuration Manager. You can then start
+      Dali/ESP and use ECLWatch to add or modify users or groups.</para>
 
       <para>You can assign permissions to each user individually, however it
       is more manageable to assign these permissions to groups, and then add
@@ -950,8 +1103,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       <sect2>
         <title>Active Directory, and LDAP Commonality</title>
 
-        <para>There are a few relevant notable terms, that may need some
-        further explanation. <variablelist>
+        <para>There are components that are common to both Active Directory
+        and LDAP. There are a few relevant terms, that may need some further
+        explanation. <variablelist>
             <varlistentry>
               <term>filesBasedn</term>
 
@@ -1107,8 +1261,8 @@ lock=/var/lock/HPCCSystems</programlisting>
 
         <para>You can run multiple active instances of the ECLCC Server for
         redundancy. There is no need for a load balancer or VIP for this
-        either. Will routinely check for workunits. Should one fail, the
-        other(s) will continue to compile.</para>
+        either. Each instance will routinely check for workunits. Should one
+        fail, the other(s) will continue to compile.</para>
       </sect2>
 
       <sect2>
@@ -1167,10 +1321,6 @@ lock=/var/lock/HPCCSystems</programlisting>
         server. One primary, or active, and the other passive. No load
         balancer needed. If the active instance fails, then you can fail over
         to the passive.</para>
-
-        <para>Make sure you give significant resources to your key components.
-        Dali is RAM intensive. Eclagent and Eclserver are processor dependent.
-        Thor should have a minimum of 4GB RAM per node.</para>
       </sect2>
     </sect1>
 
@@ -1243,16 +1393,15 @@ lock=/var/lock/HPCCSystems</programlisting>
           <para>Thor has the ability to do a “Thor copy” which copies data
           from one cluster to another. You can also do this through ECL code.
           Additionally, you may decide you don’t want, or need to have a “hot”
-          DR Thor. In that case, the most common disasters [minor] (major
-          switch outage, total power down, multiple fiber cuts) cause only a
+          DR Thor. In that case, the most common minor disasters cause only a
           relatively brief, less than 1 day disaster. Since Thor is
           responsible for creating data updates it can take a day or a few to
           recover. The data just is not quite as fresh but as long as the
           Roxies are replicated the data is still flowing. In the case of a
-          major disaster (a major earthquake, or a tidal wave), the likelihood
-          of that occurring does not justify the cost of preventing against
-          it. It could also take between 7 to 14 days to recover by building
-          out a whole new Thor cluster.</para>
+          major disaster such as, a major earthquake, a tidal wave, extended
+          total power loss, multiple fiber cuts, where the systems will be out
+          for a day or more. The likelihood of that occurring may not justify
+          the costs of preventing against it. </para>
         </sect3>
 
         <sect3>
@@ -1336,115 +1485,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       </sect2>
     </sect1>
 
-    <sect1 id="Sample_sizings">
-      <title>System Sizings</title>
-
-      <para>This section provides some guidance in determining the sizing
-      requirements for an initial installation. The following are some
-      suggested sample configuration guides that can be helpful when planning
-      your system.</para>
-
-      <sect2>
-        <title>Sample Sizing for High Data volume (Typical)</title>
-
-        <para>The most typical scenario for HPCC is utilizing it with a high
-        volume of data. This suggested sample sizing would be appropriate for
-        a site with large volumes of data. A good policy is to set the Thor
-        size to 4 times the source data on your HPCC. Typically, Roxie would
-        be about ¼ the size of Thor. This is because the data is compressed
-        and the system does not hold any transient data in Roxie.</para>
-
-        <sect3>
-          <title>High Data Thor sizing considerations</title>
-
-          <para>Each Thor node can hold about 2.5 TB of data (MAX), so plan
-          for the number of Thor nodes accordingly for your data.</para>
-
-          <para>If possible, SAS drives for both Thor and Roxie as they almost
-          equal to SATA drives now. If not for both, get SAS drives at least
-          for your Roxie cluster.</para>
-
-          <para>Thor replicates data, typically configured for 2
-          copies.</para>
-        </sect3>
-
-        <sect3>
-          <title>High Data Roxie sizing considerations</title>
-
-          <para>Roxie keeps most of its data in memory, so you should allocate
-          plenty of memory for Roxie. Calculate the approximate size of your
-          data, and allocate appropriately. You should either increase the
-          number of nodes, or increase the amount of memory.</para>
-
-          <para>A good practice is to allocate a Dali for every Roxie
-          cluster.</para>
-
-          <para>Roxie+Dali needs to have a mirror. This is because, when you
-          need to update indexes, you update the mirror and make that primary
-          and bring the other one down. This is not really a necessity except
-          for high availability and performance requirements.</para>
-        </sect3>
-      </sect2>
-
-      <sect2>
-        <title>Sample Sizing for Heavy Processing on Low Data Volume</title>
-
-        <para>The following section provides some sample sizing for heavy
-        processing with approximately the amount of data indicated.</para>
-
-        <sect3>
-          <title>750 GB of Raw Data</title>
-
-          <para>Thor = 3 (slaves) + 2 (management) = 5 Nodes</para>
-
-          <para>Roxie = 3 (agents) + 1 (Dali) = 4 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 13 nodes</para>
-        </sect3>
-
-        <sect3>
-          <title>1250 GB of Raw Data</title>
-
-          <para>Thor = 6 (slaves) + 2 (management) = 8 Nodes</para>
-
-          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 17 nodes</para>
-        </sect3>
-
-        <sect3>
-          <title>2000 GB of Raw Data</title>
-
-          <para>Thor = 8 (slaves) + 3 (management) = 11 Nodes</para>
-
-          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 20 nodes</para>
-        </sect3>
-
-        <sect3>
-          <title>3500 GB of Raw Data</title>
-
-          <para>Thor = 12 (slaves) + 5 (management) = 17 Nodes</para>
-
-          <para>Roxie = 6 (agents) + 1 (Dali) = 7 Nodes (This will mean that
-          the environment will be down during query deployment)</para>
-
-          <para>Spares = 2</para>
-
-          <para>Total = 28 nodes</para>
-        </sect3>
-      </sect2>
-    </sect1>
+    <xi:include href="HPCCSystemAdmin/SA-Mods/SysAdminConfigMod.xml"
+                xpointer="Sample_Sizings"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
   </chapter>
 
   <chapter id="Resources">
@@ -1456,8 +1499,9 @@ lock=/var/lock/HPCCSystems</programlisting>
       <title>HPCC Resources</title>
 
       <para>The resources link can be found under the Operations Icon link.
-      The resources link in ECL Watch provides a link to the HPCC Systems web
-      portal. Visit the HPCC Systems Web Portal at <ulink
+      The resources link in ECL Watch provides a link to the HPCC
+      Systems<superscript>®</superscript> web portal. Visit the HPCC
+      Systems<superscript>®</superscript> Web Portal at <ulink
       url="http://hpccsystems.com/">http://hpccsystems.com/</ulink> for
       software updates, plug-ins, support, documentation, and more. This is
       where you can find resources useful for running and maintaining HPCC on
@@ -1469,5 +1513,25 @@ lock=/var/lock/HPCCSystems</programlisting>
       This is the page where you can download Installation packages, virtual
       images, source code, documentation, and tutorials.</para>
     </sect1>
+
+    <sect1>
+      <title>Additional Resources</title>
+
+      <para>Additional help for Learning ECL is also available. There are
+      online courses.</para>
+
+      <para><ulink
+      url="https://learn.lexisnexis.com/lexisnexis/resources/courses">https://learn.lexisnexis.com/lexisnexis/resources/courses
+      </ulink></para>
+
+      <para>There are training videos online.</para>
+
+      <para><ulink
+      url="https://learn.lexisnexis.com/lexisnexis/resources/courses/HPCC/Summit2014/NewECLWatch50Features/NewECLWatch50Features.html">Legacy
+      ECL Watch and New 5.0 ECL Watch</ulink></para>
+
+      <para>A quick summary of the differences in the interface, goes into
+      particular detail. Helpful for learning how to deploy Roxies.</para>
+    </sect1>
   </chapter>
 </book>

+ 275 - 0
docs/HPCCSystemAdmin/SA-Mods/SysAdminConfigMod.xml

@@ -370,4 +370,279 @@ sudo -u hpcc cp /etc/HPCCSystems/source/NewEnvironment.xml /etc/HPCCSystems/envi
       </sect2>
     </sect1>
   </chapter>
+
+  <chapter id="Advance-SysAdmin-Topic-Chapter">
+    <title>Advanced Systems Administrator Topics</title>
+
+    <para>This chapter contains information about certain advanced HPCC
+    Systems<superscript>®</superscript> Administrators topics.</para>
+
+    <sect1 id="Admin-System-Topic">
+      <title>Admin System Topics</title>
+
+      <para>This is a System Administrative topic designed to provide some
+      insight as to an aspect of System Administration for you HPCC
+      System.</para>
+    </sect1>
+
+    <sect1 id="System_sizings">
+      <title>System Sizings</title>
+
+      <para>This section provides some guidance in determining the sizing
+      requirements for an initial installation of HPCC. The following are some
+      suggested configuration guides that can be helpful when planning your
+      system.</para>
+
+      <sect2 role="nobrk">
+        <title>Minimum Suggested Hardware</title>
+
+        <para>HPCC was designed to run on common commodity hardware, and could
+        function on even lesser hardware. The following list is the suggested
+        minimum hardware specifications. At the very minimum you should
+        consider the following hardware components for your HPCC system. These
+        guidelines were put together based on real world usage of mission
+        critical (uptime) with high volume data. <informaltable border="all"
+            colsep="1" rowsep="1">
+            <tgroup cols="3">
+              <colspec colwidth="94.50pt" />
+
+              <colspec colwidth="84.50pt" />
+
+              <tbody>
+                <row>
+                  <entry><emphasis role="bold">Thor slave</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor per</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>8GB per daemon</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>RAID - 200MB/sec Sequential Read/Write per
+                  node</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Network</entry>
+
+                  <entry>1 Gb/sec bandwidth</entry>
+                </row>
+
+                <row>
+                  <entry><emphasis role="bold">Roxie</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>12GB per Roxie</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>400 IOPS &amp; 2 Volumes per (RAID optional)</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Network</entry>
+
+                  <entry>1 Gb/sec bandwidth</entry>
+                </row>
+
+                <row>
+                  <entry><emphasis role="bold">Dali</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor each</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>24GB per Dali</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>RAID 1, 5, 6, 10 Volume 200GB</entry>
+                </row>
+
+                <row>
+                  <entry><emphasis role="bold">Other</emphasis></entry>
+
+                  <entry>Processor</entry>
+
+                  <entry>4 x 64-bit Intel Processor</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>RAM</entry>
+
+                  <entry>12GB</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Storage</entry>
+
+                  <entry>RAID 1, 5, 6, 10 Volume 200GB</entry>
+                </row>
+
+                <row>
+                  <entry></entry>
+
+                  <entry>Network</entry>
+
+                  <entry>1 Gb/sec bandwidth</entry>
+                </row>
+              </tbody>
+            </tgroup>
+          </informaltable></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="Sample_Sizings">
+      <title>Sample Sizings</title>
+
+      <para>This section illustrates sample system sizings for various work
+      environments. Unlike system requirements, the following samples are
+      suggestions for setting up you system for various operating
+      conditions.</para>
+
+      <sect2 id="Sample-Size-HighDataVolume">
+        <title>Sample Sizing for High Data volume (Typical)</title>
+
+        <para>The most typical scenario for HPCC is utilizing it with a high
+        volume of data. This suggested sample sizing would be appropriate for
+        a site with large volumes of data. A good policy is to set the Thor
+        size to 4 times the source data on your HPCC. Typically, Roxie would
+        be about ¼ the size of Thor. This is because the data is compressed
+        and the system does not hold any transient data in Roxie.</para>
+
+        <sect3>
+          <title>High Data Thor sizing considerations</title>
+
+          <para>Each Thor node can hold about 2.5 TB of data (MAX), so plan
+          for the number of Thor nodes accordingly for your data.</para>
+
+          <para>If possible, SAS drives for both Thor and Roxie as they almost
+          equal to SATA drives now. If not for both, get SAS drives at least
+          for your Roxie cluster.</para>
+
+          <para>Thor replicates data and is typically configured for two
+          copies.</para>
+        </sect3>
+
+        <sect3>
+          <title>High Data Roxie sizing considerations</title>
+
+          <para>Roxie keeps most of its data in memory, so you should allocate
+          plenty of memory for Roxie. Calculate the approximate size of your
+          data, and allocate appropriately. You should either increase the
+          number of nodes, or increase the amount of memory.</para>
+
+          <para>A good practice is to allocate a Dali for every Roxie
+          cluster.</para>
+
+          <para>Roxie should have a mirror. This is useful, when you need to
+          update data. You update the mirror then make that primary and bring
+          the other one down. This is a good practice but not really a
+          necessity except in the case of high availability.</para>
+        </sect3>
+      </sect2>
+
+      <sect2>
+        <title>Sample Sizing for Heavy Processing on Low Data Volume</title>
+
+        <para>The following section provides some sample sizing for heavy
+        processing with approximately the amount of data indicated.</para>
+
+        <sect3>
+          <title>750 GB of Raw Data</title>
+
+          <para>Thor = 3 (slaves) + 2 (management) = 5 Nodes</para>
+
+          <para>Roxie = 3 (agents) + 1 (Dali) = 4 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 13 nodes</para>
+        </sect3>
+
+        <sect3>
+          <title>1250 GB of Raw Data</title>
+
+          <para>Thor = 6 (slaves) + 2 (management) = 8 Nodes</para>
+
+          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 17 nodes</para>
+        </sect3>
+
+        <sect3>
+          <title>2000 GB of Raw Data</title>
+
+          <para>Thor = 8 (slaves) + 3 (management) = 11 Nodes</para>
+
+          <para>Roxie = 4 (agents) + 1 (Dali) = 5 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 20 nodes</para>
+        </sect3>
+
+        <sect3>
+          <title>3500 GB of Raw Data</title>
+
+          <para>Thor = 12 (slaves) + 5 (management) = 17 Nodes</para>
+
+          <para>Roxie = 6 (agents) + 1 (Dali) = 7 Nodes (This will mean that
+          the environment will be down during query deployment)</para>
+
+          <para>Spares = 2</para>
+
+          <para>Total = 28 nodes</para>
+        </sect3>
+      </sect2>
+    </sect1>
+  </chapter>
 </book>

+ 569 - 0
docs/Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml

@@ -0,0 +1,569 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<book>
+  <bookinfo>
+    <title>Installing the HPCC Platform: Hardware Module</title>
+
+    <mediaobject>
+      <imageobject>
+        <imagedata fileref="../../images/redswooshWithLogo3.jpg" />
+      </imageobject>
+    </mediaobject>
+
+    <author>
+      <surname>Boca Raton Documentation Team</surname>
+    </author>
+
+    <legalnotice>
+      <para>We welcome your comments and feedback about this document via
+      email to <email>docfeedback@hpccsystems.com</email></para>
+
+      <para>Please include <emphasis role="bold">Documentation
+      Feedback</emphasis> in the subject line and reference the document name,
+      page numbers, and current Version Number in the text of the
+      message.</para>
+
+      <para>LexisNexis and the Knowledge Burst logo are registered trademarks
+      of Reed Elsevier Properties Inc., used under license.</para>
+
+      <para>HPCC Systems is a registered trademark of LexisNexis Risk Data
+      Management Inc.</para>
+
+      <para>Other products, logos, and services may be trademarks or
+      registered trademarks of their respective companies. All names and
+      example data used in this manual are fictitious. Any similarity to
+      actual persons, living or dead, is purely coincidental.</para>
+
+      <para></para>
+    </legalnotice>
+
+    <xi:include href="../../common/Version.xml" xpointer="FooterInfo"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="../../common/Version.xml" xpointer="DateVer"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <corpname>HPCC Systems</corpname>
+
+    <xi:include href="../../common/Version.xml" xpointer="Copyright"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <mediaobject role="logo">
+      <imageobject>
+        <imagedata fileref="images/LN_Rightjustified.jpg" />
+      </imageobject>
+    </mediaobject>
+  </bookinfo>
+
+  <chapter id="Hardware-and-Software-Chapter">
+    <title>Hardware and Software Requirements</title>
+
+    <para>The following section describes the various hardware and software
+    required in order to run the HPCC.</para>
+
+    <sect1 id="HW-Switch" role="nobrk">
+      <title>Network Switch</title>
+
+      <para>A significant component of HPCC is the infrastructure it runs on,
+      specifically the switch.</para>
+
+      <sect2 id="Switch-Requirements">
+        <title>Switch requirements</title>
+
+        <itemizedlist spacing="compact">
+          <listitem>
+            <para>Sufficient number of ports to allow all nodes to be
+            connected directly to it;</para>
+          </listitem>
+
+          <listitem>
+            <para>IGMP v.2 support </para>
+          </listitem>
+
+          <listitem>
+            <para>IGMP snooping support</para>
+          </listitem>
+        </itemizedlist>
+
+        <para><emphasis role="bold">Small:</emphasis> For a very small test
+        system, almost any gigabit switch will suffice. These are inexpensive
+        and readily available in six to 20-port models.</para>
+
+        <para><figure>
+            <title>1 GigE 8-port Switch</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/DHSMC8508T.jpg"
+                           vendor="hardwareSS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+
+        <para><emphasis role="bold">Medium</emphasis>: For medium sized (10-48
+        node) systems, we recommend using a Force10 s25, s50, s55, or s60
+        switch</para>
+
+        <para><figure>
+            <title>Force10 S55 48-port Network Switch</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/s55.jpg"
+                           vendor="hardwareSS,force10SS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+
+        <para><?hard-pagebreak ?><emphasis role="bold">Large</emphasis>: For
+        large (48-350 node) system, the Force10 c150 or c300 are good
+        choices.</para>
+
+        <para><figure>
+            <title>Force 10 c150</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/c150-lg.jpg"
+                           vendor="hardwareSS,force10SS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+
+        <para><?hard-pagebreak ?><emphasis role="bold">Very Large</emphasis>:
+        For very large (more than 300 nodes) system, the Force10 e600 or e1200
+        are good choices.</para>
+
+        <para><figure>
+            <title>Force 10 e600 and e1200</title>
+
+            <mediaobject>
+              <imageobject>
+                <imagedata fileref="../../images/Force10_ExaScaleE6001200.jpg"
+                           vendor="hardwareSS,force10SS" />
+              </imageobject>
+            </mediaobject>
+          </figure></para>
+      </sect2>
+
+      <sect2 id="Switch-additional-recommend">
+        <title>Switch additional recommended features</title>
+
+        <para><itemizedlist mark="square" spacing="compact">
+            <listitem>
+              <para>Non-blocking backplane</para>
+            </listitem>
+
+            <listitem>
+              <para>Low latency (under 35usec)</para>
+            </listitem>
+
+            <listitem>
+              <para>Layer 3 switching</para>
+            </listitem>
+
+            <listitem>
+              <para>Managed and monitored (SNMP is a plus)</para>
+            </listitem>
+
+            <listitem>
+              <para>Port channel (port bundling) support</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="HW-LoadBalancer">
+      <title>Load Balancer</title>
+
+      <para>In order to take full advantage of a Roxie cluster, a load
+      balancer is required. Each Roxie Node is capable of receiving requests
+      and returning results. Therefore, a load balancer distributes the load
+      in an efficient manner to get the best performance and avoid a potential
+      bottleneck.</para>
+
+      <para>We recommend the Web Accelerator product line from F5 Networks.
+      See <ulink
+      url="http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf">http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf
+      </ulink> for more information<phrase></phrase>.</para>
+
+      <para><figure>
+          <title>F5 Load Balancers</title>
+
+          <mediaobject>
+            <imageobject>
+              <imagedata fileref="../../images/IR-009a.jpg"
+                         vendor="hardwareSS,F5SS" />
+            </imageobject>
+          </mediaobject>
+        </figure></para>
+
+      <sect2>
+        <title>Load Balancer Requirements</title>
+
+        <sect3>
+          <title>Minimum requirements</title>
+
+          <para><itemizedlist spacing="compact">
+              <listitem>
+                <para>Throughput: 1Gbps Gigabit</para>
+              </listitem>
+
+              <listitem>
+                <para>Ethernet ports: 2</para>
+              </listitem>
+
+              <listitem>
+                <para>Balancing Strategy: Round Robin</para>
+              </listitem>
+            </itemizedlist></para>
+        </sect3>
+
+        <sect3>
+          <title>Standard requirements</title>
+
+          <para><itemizedlist spacing="compact">
+              <listitem>
+                <para>Throughput: 8Gbps</para>
+              </listitem>
+
+              <listitem>
+                <para>Gigabit Ethernet ports: 4</para>
+              </listitem>
+
+              <listitem>
+                <para>Balancing Strategy: Flexible (F5 iRules or
+                equivalent)</para>
+              </listitem>
+            </itemizedlist></para>
+        </sect3>
+
+        <sect3 role="brk">
+          <title>Recommended capabilities</title>
+
+          <para><itemizedlist spacing="compact">
+              <listitem>
+                <para>Ability to provide cyclic load rotation (not load
+                balancing).</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to forward SOAP/HTTP traffic</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to provide triangulation/n-path routing (traffic
+                incoming through the load balancer to the node, replies sent
+                out the via the switch).</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to treat a cluster of nodes as a single entity
+                (for load balancing clusters not nodes)</para>
+
+                <para>or</para>
+              </listitem>
+
+              <listitem>
+                <para>Ability to stack or tier the load balancers for multiple
+                levels if not.</para>
+              </listitem>
+            </itemizedlist></para>
+        </sect3>
+      </sect2>
+    </sect1>
+
+    <sect1 id="Nodes-Hardware">
+      <title>Nodes-Hardware</title>
+
+      <para>The HPCC can run as a single node system or a multi node
+      system.</para>
+
+      <para>These hardware recommendations are intended for a multi-node
+      production system. A test system can use less stringent specifications.
+      Also, while it is easier to manage a system where all nodes are
+      identical, this is not required. However, it is important to note that
+      your system will only run as fast as its slowest node.</para>
+
+      <sect2 id="Node-Min-requirements">
+        <title>Node minimum requirements</title>
+
+        <itemizedlist mark="square" spacing="compact">
+          <listitem>
+            <para>Pentium 4 or newer CPU</para>
+          </listitem>
+
+          <listitem>
+            <para>32-bit</para>
+          </listitem>
+
+          <listitem>
+            <para>1GB RAM per slave</para>
+
+            <para>(Note: If you configure more than 1 slave per node, memory
+            is shared. For example, if you want 2 slaves per node with each
+            having 4 GB of memory, the server would need 8 GB total.)</para>
+          </listitem>
+
+          <listitem>
+            <para>One Hard Drive (with sufficient free space to handle the
+            size of the data you plan to process) or Network Attached
+            Storage.</para>
+          </listitem>
+
+          <listitem>
+            <para>1 GigE network interface</para>
+          </listitem>
+        </itemizedlist>
+      </sect2>
+
+      <sect2 id="Node-recommended-specifications">
+        <title>Node recommended specifications</title>
+
+        <para><itemizedlist mark="square" spacing="compact">
+            <listitem>
+              <para>Nehalem Core i7 CPU</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit</para>
+            </listitem>
+
+            <listitem>
+              <para>4 GB RAM (or more) per slave</para>
+            </listitem>
+
+            <listitem>
+              <para>1 GigE network interface</para>
+            </listitem>
+
+            <listitem>
+              <para>PXE boot support in BIOS</para>
+
+              <para>PXE boot support is recommended so you can manage OS,
+              packages, and other settings when you have a large system</para>
+            </listitem>
+
+            <listitem>
+              <para>Optionally IPMI and KVM over IP support</para>
+
+              <para><emphasis role="bold">For Roxie nodes:</emphasis></para>
+            </listitem>
+
+            <listitem>
+              <para>Two 10K RPM (or faster) SAS Hard Drives</para>
+
+              <para>Typically, drive speed is the priority for Roxie
+              nodes</para>
+
+              <para><emphasis role="bold">For Thor nodes:</emphasis></para>
+            </listitem>
+
+            <listitem>
+              <para>Two 7200K RPM (or faster) SATA Hard Drives (Thor)</para>
+            </listitem>
+
+            <listitem>
+              <para>Optionally 3 or more hard drives can be configured in a
+              RAID 5 container for increased performance and
+              availability</para>
+
+              <para>Typically, drive capacity is the priority for Thor
+              nodes</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="Nodes-Software">
+      <title>Nodes-Software</title>
+
+      <para>All nodes must have the identical operating systems. We recommend
+      all nodes have identical BIOS settings, and packages installed. This
+      significantly reduces variables when troubleshooting. It is easier to
+      manage a system where all nodes are identical, but this is not
+      required.</para>
+
+      <sect2 id="Operating-System-Requirements">
+        <title>Operating System Requirements</title>
+
+        <para>Binary packages are available for the following:</para>
+
+        <para><itemizedlist mark="square" spacing="compact">
+            <listitem>
+              <para>64-bit CentOS 5</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit CentOS 6</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit RedHat Enterprise 5</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit RedHat Enterprise 6</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit Ubuntu 12.04 (LTS)</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit Ubuntu 13.10</para>
+            </listitem>
+
+            <listitem>
+              <para>64-bit Ubuntu 14.04 (LTS)</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+
+      <sect2 id="configuration-manager">
+        <title>Dependencies</title>
+
+        <para>Installing HPCC on your system depends on having required
+        component packages installed on the system. The required dependencies
+        can vary depending on your platform. In some cases the dependencies
+        are included in the installation packages. In other instances the
+        installation may fail, and the package management utility will prompt
+        you for the required packages. Installation of these packages can vary
+        depending on your platform. For details of the specific installation
+        commands for obtaining and installing these packages, see the commands
+        specific to your Operating System. <variablelist>
+            <varlistentry>
+              <term>Note:</term>
+
+              <listitem>
+                <para>For CentOS installations, the Fedora EPEL repository is
+                required.</para>
+              </listitem>
+            </varlistentry>
+          </variablelist></para>
+      </sect2>
+
+      <sect2 id="SSH_Keys" role="brk">
+        <title>SSH Keys</title>
+
+        <para>The HPCC components use ssh keys to authenticate each other.
+        This is required for communication between nodes. A script to generate
+        keys has been provided .You should run that script and distribute the
+        public and private keys to all nodes after you have installed the
+        packages on all nodes, but before you configure a multi-node
+        HPCC.</para>
+
+        <para><itemizedlist spacing="compact">
+            <listitem>
+              <para>As root (or sudo as shown below), generate a new key using
+              this command:</para>
+
+              <para><programlisting>sudo /opt/HPCCSystems/sbin/keygen.sh</programlisting></para>
+            </listitem>
+
+            <listitem>
+              <para>Distribute the keys to all nodes. From the <emphasis
+              role="bold">/home/hpcc/.ssh</emphasis> directory, copy these
+              three files to the same directory (<emphasis
+              role="bold">/home/hpcc/.ssh</emphasis>) on each node:</para>
+
+              <itemizedlist spacing="compact">
+                <listitem>
+                  <para><emphasis role="bold">id_rsa</emphasis></para>
+                </listitem>
+
+                <listitem>
+                  <para><emphasis role="bold">id_rsa.pub</emphasis></para>
+                </listitem>
+
+                <listitem>
+                  <para><emphasis
+                  role="bold">authorized_keys</emphasis></para>
+                </listitem>
+              </itemizedlist>
+
+              <para>Make sure that files retain permissions when they are
+              distributed. These keys need to be owned by the user "<emphasis
+              role="bold">hpcc</emphasis>".</para>
+            </listitem>
+          </itemizedlist></para>
+      </sect2>
+    </sect1>
+
+    <sect1 id="workstation-requirements">
+      <title>User Workstation Requirements</title>
+
+      <itemizedlist spacing="compact">
+        <listitem>
+          <para>Running the HPCC platform requires communication from your
+          user workstation with a browser to the HPCC. You will use it to
+          access ECL Watch—a Web-based interface to your HPCC system. ECL
+          Watch enables you to examine and manage many aspects of the HPCC and
+          allows you to see information about jobs you run, data files, and
+          system metrics.</para>
+
+          <para>Use one of the supported web browsers with Javascript
+          enabled.</para>
+
+          <itemizedlist spacing="compact">
+            <listitem>
+              <para>Internet Explorer® 9 (or later)</para>
+            </listitem>
+
+            <listitem>
+              <para>Firefox™ 3.0 (or later.)</para>
+
+              <!--***Add additional browsers when approved-->
+            </listitem>
+
+            <listitem>
+              <para>Google Chrome 10 (or later)</para>
+            </listitem>
+          </itemizedlist>
+
+          <para>If browser security is set to <emphasis
+          role="bold">High</emphasis>, you should add ECLWatch as a Trusted
+          Site to allow Javascript execution.</para>
+
+          <!--note: window users may want to use the 32 bit graph control***-->
+        </listitem>
+
+        <listitem>
+          <para>Install the ECL IDE</para>
+
+          <para>The ECL IDE (Integrated Development Environment) is the tool
+          used to create queries into your data and ECL files with which to
+          build your queries.</para>
+
+          <para>Download the ECL IDE from the HPCC Systems web portal.
+          http://hpccsystems.com</para>
+
+          <para>You can find the ECL IDE and Client Tools on this page using
+          the following URL:</para>
+
+          <para><ulink
+          url="http://hpccsystems.com/download/free-community-edition/ecl-ide">http://hpccsystems.com/download/free-community-edition/ecl-ide</ulink></para>
+
+          <para>The ECL IDE was designed to run on Windows machines. See the
+          appendix for instructions on running on Linux workstations using
+          Wine.</para>
+        </listitem>
+
+        <listitem>
+          <para>Microsoft VS 2008 C++ compiler (either Express or Professional
+          edition). This is needed if you are running Windows and want to
+          compile queries locally. This allows you to compile and run ECL code
+          on your Windows workstation.</para>
+        </listitem>
+
+        <listitem>
+          <para>GCC. This is needed if you are running under Linux and want to
+          compile queries locally on a standalone Linux machine, (although it
+          may already be available to you since it usually comes with the
+          operating system).</para>
+        </listitem>
+      </itemizedlist>
+    </sect1>
+  </chapter>
+</book>

+ 23 - 527
docs/Installing_and_RunningTheHPCCPlatform/Installing_and_RunningTheHPCCPlatform.xml

@@ -29,17 +29,15 @@
       <para>LexisNexis and the Knowledge Burst logo are registered trademarks
       of Reed Elsevier Properties Inc., used under license.</para>
 
-      <para>HPCC Systems is a registered trademark of LexisNexis Risk Data
-      Management Inc.</para>
+      <para>HPCC Systems<superscript>®</superscript> is a registered trademark
+      of LexisNexis Risk Data Management Inc.</para>
 
       <para>Other products, logos, and services may be trademarks or
-      registered trademarks of their respective companies. A</para>
+      registered trademarks of their respective companies. All names and
+      example data used in this manual are fictitious. Any similarity to
+      actual persons, living or dead, is purely coincidental.</para>
 
-      <para>ll names and example data used in this manual are fictitious. Any
-      similarity to actual persons, living or dead, is purely
-      coincidental.</para>
-
-      <para></para>
+      <para> </para>
     </legalnotice>
 
     <xi:include href="common/Version.xml" xpointer="FooterInfo"
@@ -48,7 +46,7 @@
     <xi:include href="common/Version.xml" xpointer="DateVer"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 
-    <corpname>HPCC Systems</corpname>
+    <corpname>HPCC Systems<superscript>®</superscript></corpname>
 
     <xi:include href="common/Version.xml" xpointer="Copyright"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
@@ -211,512 +209,9 @@
     </sect1>
   </chapter>
 
-  <chapter id="Hardware-and-Software-Requirements">
-    <title>Hardware and Software Requirements</title>
-
-    <para>The following section describes the various hardware and software
-    required in order to run the HPCC.</para>
-
-    <sect1 id="Switch" role="nobrk">
-      <title>Network Switch</title>
-
-      <para>A significant component of HPCC is the infrastructure it runs on,
-      specifically the switch.</para>
-
-      <sect2 id="Switch-Requirements">
-        <title>Switch requirements</title>
-
-        <itemizedlist spacing="compact">
-          <listitem>
-            <para>Sufficient number of ports to allow all nodes to be
-            connected directly to it;</para>
-          </listitem>
-
-          <listitem>
-            <para>IGMP v.2 support </para>
-          </listitem>
-
-          <listitem>
-            <para>IGMP snooping support</para>
-          </listitem>
-        </itemizedlist>
-
-        <para><emphasis role="bold">Small:</emphasis> For a very small test
-        system, almost any gigabit switch will suffice. These are inexpensive
-        and readily available in six to 20-port models.</para>
-
-        <para><figure>
-            <title>1 GigE 8-port Switch</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/DHSMC8508T.jpg" vendor="hardwareSS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-
-        <para><emphasis role="bold">Medium</emphasis>: For medium sized (10-48
-        node) systems, we recommend using a Force10 s25, s50, s55, or s60
-        switch</para>
-
-        <para><figure>
-            <title>Force10 S55 48-port Network Switch</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/s55.jpg"
-                           vendor="hardwareSS,force10SS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-
-        <para><?hard-pagebreak ?><emphasis role="bold">Large</emphasis>: For
-        large (48-350 node) system, the Force10 c150 or c300 are good
-        choices.</para>
-
-        <para><figure>
-            <title>Force 10 c150</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/c150-lg.jpg"
-                           vendor="hardwareSS,force10SS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-
-        <para><?hard-pagebreak ?><emphasis role="bold">Very Large</emphasis>:
-        For very large (more than 300 nodes) system, the Force10 e600 or e1200
-        are good choices.</para>
-
-        <para><figure>
-            <title>Force 10 e600 and e1200</title>
-
-            <mediaobject>
-              <imageobject>
-                <imagedata fileref="images/Force10_ExaScaleE6001200.jpg"
-                           vendor="hardwareSS,force10SS" />
-              </imageobject>
-            </mediaobject>
-          </figure></para>
-      </sect2>
-
-      <sect2 id="Switch-additional-recommend">
-        <title>Switch additional recommended features</title>
-
-        <para><itemizedlist mark="square" spacing="compact">
-            <listitem>
-              <para>Non-blocking backplane</para>
-            </listitem>
-
-            <listitem>
-              <para>Low latency (under 35usec)</para>
-            </listitem>
-
-            <listitem>
-              <para>Layer 3 switching</para>
-            </listitem>
-
-            <listitem>
-              <para>Managed and monitored (SNMP is a plus)</para>
-            </listitem>
-
-            <listitem>
-              <para>Port channel (port bundling) support</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-    </sect1>
-
-    <sect1>
-      <title>Load Balancer</title>
-
-      <para>In order to take full advantage of a Roxie cluster, a load
-      balancer is required. Each Roxie Node is capable of receiving requests
-      and returning results. Therefore, a load balancer distributes the load
-      in an efficient manner to get the best performance and avoid a potential
-      bottleneck.</para>
-
-      <para>We recommend the Web Accelerator product line from F5 Networks.
-      See <ulink
-      url="http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf">http://www.f5.com/pdf/products/big-ip-webaccelerator-ds.pdf
-      </ulink> for more information<phrase></phrase>.</para>
-
-      <para><figure>
-          <title>F5 Load Balancers</title>
-
-          <mediaobject>
-            <imageobject>
-              <imagedata fileref="images/IR-009a.jpg" vendor="hardwareSS,F5SS" />
-            </imageobject>
-          </mediaobject>
-        </figure></para>
-
-      <sect2>
-        <title>Load Balancer Requirements</title>
-
-        <sect3>
-          <title>Minimum requirements</title>
-
-          <para><itemizedlist spacing="compact">
-              <listitem>
-                <para>Throughput: 1Gbps Gigabit</para>
-              </listitem>
-
-              <listitem>
-                <para>Ethernet ports: 2</para>
-              </listitem>
-
-              <listitem>
-                <para>Balancing Strategy: Round Robin</para>
-              </listitem>
-            </itemizedlist></para>
-        </sect3>
-
-        <sect3>
-          <title>Standard requirements</title>
-
-          <para><itemizedlist spacing="compact">
-              <listitem>
-                <para>Throughput: 8Gbps</para>
-              </listitem>
-
-              <listitem>
-                <para>Gigabit Ethernet ports: 4</para>
-              </listitem>
-
-              <listitem>
-                <para>Balancing Strategy: Flexible (F5 iRules or
-                equivalent)</para>
-              </listitem>
-            </itemizedlist></para>
-        </sect3>
-
-        <sect3 role="brk">
-          <title>Recommended capabilities</title>
-
-          <para><itemizedlist spacing="compact">
-              <listitem>
-                <para>Ability to provide cyclic load rotation (not load
-                balancing).</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to forward SOAP/HTTP traffic</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to provide triangulation/n-path routing (traffic
-                incoming through the load balancer to the node, replies sent
-                out the via the switch).</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to treat a cluster of nodes as a single entity
-                (for load balancing clusters not nodes)</para>
-
-                <para>or</para>
-              </listitem>
-
-              <listitem>
-                <para>Ability to stack or tier the load balancers for multiple
-                levels if not.</para>
-              </listitem>
-            </itemizedlist></para>
-        </sect3>
-      </sect2>
-    </sect1>
-
-    <sect1 id="Nodes-Hardware">
-      <title>Nodes-Hardware</title>
-
-      <para>The HPCC can run as a single node system or a multi node
-      system.</para>
-
-      <para>These hardware recommendations are intended for a multi-node
-      production system. A test system can use less stringent specifications.
-      Also, while it is easier to manage a system where all nodes are
-      identical, this is not required. However, it is important to note that
-      your system will only run as fast as its slowest node.</para>
-
-      <sect2 id="Node-Min-requirements">
-        <title>Node minimum requirements</title>
-
-        <itemizedlist mark="square" spacing="compact">
-          <listitem>
-            <para>Pentium 4 or newer CPU</para>
-          </listitem>
-
-          <listitem>
-            <para>32-bit</para>
-          </listitem>
-
-          <listitem>
-            <para>1GB RAM per slave</para>
-
-            <para>(Note: If you configure more than 1 slave per node, memory
-            is shared. For example, if you want 2 slaves per node with each
-            having 4 GB of memory, the server would need 8 GB total.)</para>
-          </listitem>
-
-          <listitem>
-            <para>One Hard Drive (with sufficient free space to handle the
-            size of the data you plan to process) or Network Attached
-            Storage.</para>
-          </listitem>
-
-          <listitem>
-            <para>1 GigE network interface</para>
-          </listitem>
-        </itemizedlist>
-      </sect2>
-
-      <sect2 id="Node-recommended-specifications">
-        <title>Node recommended specifications</title>
-
-        <para><itemizedlist mark="square" spacing="compact">
-            <listitem>
-              <para>Nehalem Core i7 CPU</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit</para>
-            </listitem>
-
-            <listitem>
-              <para>4 GB RAM (or more) per slave</para>
-            </listitem>
-
-            <listitem>
-              <para>1 GigE network interface</para>
-            </listitem>
-
-            <listitem>
-              <para>PXE boot support in BIOS</para>
-
-              <para>PXE boot support is recommended so you can manage OS,
-              packages, and other settings when you have a large system</para>
-            </listitem>
-
-            <listitem>
-              <para>Optionally IPMI and KVM over IP support</para>
-
-              <para><emphasis role="bold">For Roxie nodes:</emphasis></para>
-            </listitem>
-
-            <listitem>
-              <para>Two 10K RPM (or faster) SAS Hard Drives</para>
-
-              <para>Typically, drive speed is the priority for Roxie
-              nodes</para>
-
-              <para><emphasis role="bold">For Thor nodes:</emphasis></para>
-            </listitem>
-
-            <listitem>
-              <para>Two 7200K RPM (or faster) SATA Hard Drives (Thor)</para>
-            </listitem>
-
-            <listitem>
-              <para>Optionally 3 or more hard drives can be configured in a
-              RAID 5 container for increased performance and
-              availability</para>
-
-              <para>Typically, drive capacity is the priority for Thor
-              nodes</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-    </sect1>
-
-    <sect1 id="Nodes-Software">
-      <title>Nodes-Software</title>
-
-      <para>All nodes must have the identical operating systems. We recommend
-      all nodes have identical BIOS settings, and packages installed. This
-      significantly reduces variables when troubleshooting. It is easier to
-      manage a system where all nodes are identical, but this is not
-      required.</para>
-
-      <sect2 id="Operating-System-Requirements">
-        <title>Operating System Requirements</title>
-
-        <para>Binary packages are available for the following:</para>
-
-        <para><itemizedlist mark="square" spacing="compact">
-            <listitem>
-              <para>64-bit CentOS 5</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit CentOS 6</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit RedHat Enterprise 5</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit RedHat Enterprise 6</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit Ubuntu 12.04 (LTS)</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit Ubuntu 13.10</para>
-            </listitem>
-
-            <listitem>
-              <para>64-bit Ubuntu 14.04 (LTS)</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-
-      <sect2 id="configuration-manager">
-        <title>Dependencies</title>
-
-        <para>Installing HPCC on your system depends on having required
-        component packages installed on the system. The required dependencies
-        can vary depending on your platform. In some cases the dependencies
-        are included in the installation packages. In other instances the
-        installation may fail, and the package management utility will prompt
-        you for the required packages. Installation of these packages can vary
-        depending on your platform. For details of the specific installation
-        commands for obtaining and installing these packages, see the commands
-        specific to your Operating System. <variablelist>
-            <varlistentry>
-              <term>Note:</term>
-
-              <listitem>
-                <para>For CentOS installations, the Fedora EPEL repository is
-                required.</para>
-              </listitem>
-            </varlistentry>
-          </variablelist></para>
-      </sect2>
-
-      <sect2 id="SSH_Keys" role="brk">
-        <title>SSH Keys</title>
-
-        <para>The HPCC components use ssh keys to authenticate each other.
-        This is required for communication between nodes. A script to generate
-        keys has been provided .You should run that script and distribute the
-        public and private keys to all nodes after you have installed the
-        packages on all nodes, but before you configure a multi-node
-        HPCC.</para>
-
-        <para><itemizedlist spacing="compact">
-            <listitem>
-              <para>As root (or sudo as shown below), generate a new key using
-              this command:</para>
-
-              <para><programlisting>sudo /opt/HPCCSystems/sbin/keygen.sh</programlisting></para>
-            </listitem>
-
-            <listitem>
-              <para>Distribute the keys to all nodes. From the <emphasis
-              role="bold">/home/hpcc/.ssh</emphasis> directory, copy these
-              three files to the same directory (<emphasis
-              role="bold">/home/hpcc/.ssh</emphasis>) on each node:</para>
-
-              <itemizedlist spacing="compact">
-                <listitem>
-                  <para><emphasis role="bold">id_rsa</emphasis></para>
-                </listitem>
-
-                <listitem>
-                  <para><emphasis role="bold">id_rsa.pub</emphasis></para>
-                </listitem>
-
-                <listitem>
-                  <para><emphasis
-                  role="bold">authorized_keys</emphasis></para>
-                </listitem>
-              </itemizedlist>
-
-              <para>Make sure that files retain permissions when they are
-              distributed. These keys need to be owned by the user "<emphasis
-              role="bold">hpcc</emphasis>".</para>
-            </listitem>
-          </itemizedlist></para>
-      </sect2>
-    </sect1>
-
-    <sect1 id="workstation-requirements">
-      <title>User Workstation Requirements</title>
-
-      <itemizedlist spacing="compact">
-        <listitem>
-          <para>Running the HPCC platform requires communication from your
-          user workstation with a browser to the HPCC. You will use it to
-          access ECL Watch—a Web-based interface to your HPCC system. ECL
-          Watch enables you to examine and manage many aspects of the HPCC and
-          allows you to see information about jobs you run, data files, and
-          system metrics.</para>
-
-          <para>Use one of the supported web browsers with Javascript
-          enabled.</para>
-
-          <itemizedlist spacing="compact">
-            <listitem>
-              <para>Internet Explorer® 8 (or later)</para>
-            </listitem>
-
-            <listitem>
-              <para>Firefox™ 3.0 (or later.)</para>
-
-              <!--***Add additional browsers when approved-->
-            </listitem>
-
-            <listitem>
-              <para>Google Chrome 10 (or later)</para>
-            </listitem>
-          </itemizedlist>
-
-          <para>If browser security is set to <emphasis
-          role="bold">High</emphasis>, you should add ECLWatch as a Trusted
-          Site to allow Javascript execution.</para>
-        </listitem>
-
-        <listitem>
-          <para>Install the ECL IDE</para>
-
-          <para>The ECL IDE (Integrated Development Environment) is the tool
-          used to create queries into your data and ECL files with which to
-          build your queries.</para>
-
-          <para>From the ECLWatch web page, download the Windows install set.
-          If the link is not visible, either follow the link to the HPCC
-          System's portal or install the Optional Packages.</para>
-
-          <para>You can reach this page using the following URL:</para>
-
-          <para>http://nnn.nnn.nnn.nnn:8010, where nnn.nnn.nnn.nnn is your
-          node's IP address.</para>
-
-          <para>The ECL IDE was designed to run on Windows machines. See the
-          appendix for instructions on running on Linux workstations using
-          Wine.</para>
-        </listitem>
-
-        <listitem>
-          <para>Microsoft VS 2008 C++ compiler (either Express or Professional
-          edition). This is needed if you are running Windows and want to
-          compile queries locally. This allows you to compile and run ECL code
-          on your Windows workstation.</para>
-        </listitem>
-
-        <listitem>
-          <para>GCC. This is needed if you are running under Linux and want to
-          compile queries locally on a standalone Linux machine, (although it
-          may already be available to you since it usually comes with the
-          operating system).</para>
-        </listitem>
-      </itemizedlist>
-    </sect1>
-  </chapter>
+  <xi:include href="Installing_and_RunningTheHPCCPlatform/Inst-Mods/Hardware.xml"
+              xpointer="Hardware-and-Software-Chapter"
+              xmlns:xi="http://www.w3.org/2001/XInclude" />
 
   <chapter id="HPCC-installation-and-startup">
     <title>HPCC Installation and Startup</title>
@@ -767,7 +262,7 @@
         packages will fail to install if their dependencies are missing from
         the target system.</para>
 
-        <para>Packages are available from the HPCC Systems website: <ulink
+        <para>Packages are available from the HPCC Systems<superscript>®</superscript> website: <ulink
         url="http://hpccsystems.com/download/free-community-edition">http://hpccsystems.com/download/free-community-edition</ulink></para>
 
         <para>To install the package, follow the appropriate installation
@@ -1278,7 +773,7 @@
 
         <para>This section details reconfiguring a system to use multiple
         nodes. Before you start this section, you must have already downloaded
-        the correct packages for your distro from the HPCC Systems website:
+        the correct packages for your distro from the HPCC Systems<superscript>®</superscript> website:
         <ulink
         url="http://hpccsystems.com/download/free-community-edition">http://hpccsystems.com/download/free-community-edition</ulink>.</para>
 
@@ -2573,7 +2068,7 @@ OUTPUT(ValidWords)
         </listitem>
       </itemizedlist></para>
 
-    <para>The HPCC Systems Portal is also a valuable resource for more
+    <para>The HPCC Systems<superscript>®</superscript> Portal is also a valuable resource for more
     information including:</para>
 
     <itemizedlist spacing="compact">
@@ -3372,8 +2867,9 @@ sudo /sbin/service hpcc-init -c esp start
           </listitem>
 
           <listitem>
-            <para>Start the HPCC Systems platform (restart if it is already
-            running) in order to read the new configuration.</para>
+            <para>Start the HPCC Systems<superscript>®</superscript> platform
+            (restart if it is already running) in order to read the new
+            configuration.</para>
 
             <para>For example :</para>
 
@@ -3394,9 +2890,9 @@ sudo /sbin/service hpcc-init -c esp start
 
             <para>Test the Java integration.</para>
 
-            <para>The HPCC Systems platform comes with a Java example class.
-            You can execute some Java code either in your ECL IDE or the ECL
-            Playground.</para>
+            <para>The HPCC Systems<superscript>®</superscript> platform comes
+            with a Java example class. You can execute some Java code either
+            in your ECL IDE or the ECL Playground.</para>
 
             <para>For example:</para>
 
@@ -3425,7 +2921,7 @@ add1(10);
       <sect2 id="Add_On_Javascript" role="brk">
         <title>JavaScript</title>
 
-        <para>To enable JavaScript support within the HPCC Systems
+        <para>To enable JavaScript support within the HPCC Systems<superscript>®</superscript>
         Platform:</para>
 
         <orderedlist>
@@ -3490,8 +2986,8 @@ add1(10);
       <sect2 id="Add_Python_support" role="brk">
         <title>Python</title>
 
-        <para>To enable Python support within the HPCC Systems
-        Platform:</para>
+        <para>To enable Python support within the HPCC
+        Systems<superscript>®</superscript> Platform:</para>
 
         <orderedlist>
           <listitem>
@@ -3548,7 +3044,7 @@ split_words('Once upon a time');
       <sect2 id="R" role="brk">
         <title>R</title>
 
-        <para>To enable R support within The HPCC Systems Platform:</para>
+        <para>To enable R support within The HPCC Systems<superscript>®</superscript> Platform:</para>
 
         <orderedlist>
           <listitem>

+ 1 - 1
esp/src/eclwatch/templates/DFUWUDetailsWidget.html

@@ -69,7 +69,7 @@
                                 </li>
                                 <li>
                                     <label for="${id}PercentDone">${i18n.PercentDone}:</label>
-                                    <div id="${id}PercentDone" style="width: 190px" data-dojo-props="maximum:10" data-dojo-type="dijit.ProgressBar"></div>
+                                    <div id="${id}PercentDone" style="width: 190px" data-dojo-props="maximum:100" data-dojo-type="dijit.ProgressBar"></div>
                                 </li>
                                 <li>
                                     <label for="${id}ProgressMessage">${i18n.ProgressMessage}:</label>

+ 48 - 6
initfiles/sbin/complete-uninstall.sh.in

@@ -39,6 +39,35 @@ message() {
 MESSAGE_MARKER
 }
 
+canonicalize_path() {
+    # canonicalize path argument by removing trailing slashes
+    # for test -h and readlink to work properly
+    local dir=${1}
+    if [ -z "${dir}" ] ; then
+        echo "${dir}"
+        return 0
+    fi
+    echo "${dir}" | sed 's/\/*$//'
+    return 0
+}
+
+removedir() {
+    local dir=$(canonicalize_path ${1})
+    # echo "canonicalized dir = ${dir}"
+    if [ -z "${dir}" ] ; then
+        return 0
+    fi
+    if [ ! -d "${dir}" ] ; then
+        return 0
+    fi
+    if [ -h "${dir}" ] ; then
+        # echo "${dir} is a soft link"
+        find ${dir}/ -depth -mindepth 1 -exec rm -rf {} \;
+    else
+        # echo "${dir} is not a soft link"
+        rm -rf ${dir}
+    fi
+}
 
 force=0
 leaveenv=0
@@ -62,6 +91,13 @@ done
 
 set_environmentvars
 
+mklink=""
+lpath=$(canonicalize_path ${path})
+if [ -n "${lpath}" -a -h "${lpath}" ] ; then
+    mklink=$(readlink "${lpath}")
+    # echo "\"${lpath}\" is a soft-link to \"${mklink}\""
+fi
+
 if [ -e /etc/debian_version ]; then
     echo "Removing DEB"
     if [ $force -eq 0 ]; then
@@ -88,24 +124,30 @@ elif [ -e /etc/redhat-release -o -e /etc/SuSE-release ]; then
 fi
 
 echo "Removing Directory - ${path}"
-rm -rf ${path}
+removedir ${path}
+
+if [ -n "${mklink}" -a -n "${lpath}" ] ; then
+    # echo "recreating soft-link"
+    ln -s "${mklink}" "${lpath}"
+    removedir ${lpath}
+fi
 
 if [ $leaveenv -eq 0 ]; then
     echo "Removing Directory - ${configs}"
-    rm -rf ${configs}
+    removedir ${configs}
 fi
 
 echo "Removing Directory - ${lock}"
-rm -rf ${lock}
+removedir ${lock}
 
 echo "Removing Directory - ${log}"
-rm -rf ${log}
+removedir ${log}
 
 echo "Removing Directory - ${pid}"
-rm -rf ${pid}
+removedir ${pid}
 
 echo "Removing Directory - ${runtime}"
-rm -rf ${runtime}
+removedir ${runtime}
 
 echo "Removing user - ${user}"
 if [ -e /usr/sbin/userdel ]; then

+ 34 - 23
plugins/fileservices/fileservices.cpp

@@ -1030,6 +1030,28 @@ ReplaceSuperFile(const varstring lsuperfn,const varstring lfn,const varstring by
 FinishSuperFileTransaction(boolean rollback=false);
 */
 
+class CImplicitSuperTransaction
+{
+    IDistributedFileTransaction *transaction;
+public:
+    CImplicitSuperTransaction(IDistributedFileTransaction *_transaction)
+    {
+        if (!_transaction->active()) // then created implicitly
+        {
+            transaction = _transaction;
+            transaction->start();
+        }
+        else
+            transaction = NULL;
+    }
+    ~CImplicitSuperTransaction()
+    {
+        if (transaction)
+            transaction->commit();
+    }
+};
+
+
 static bool lookupSuperFile(ICodeContext *ctx, const char *lsuperfn, Owned<IDistributedSuperFile> &file, bool throwerr, StringBuffer &lsfn, bool allowforeign, bool cacheFiles=false)
 {
     lsfn.clear();
@@ -1122,6 +1144,7 @@ FILESERVICES_API unsigned FILESERVICES_CALL fsGetSuperFileSubCount(ICodeContext
     Owned<ISimpleSuperFileEnquiry> enq = getSimpleSuperFileEnquiry(ctx, lsuperfn);
     if (enq)
         return enq->numSubFiles();
+    CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
     Owned<IDistributedSuperFile> file;
     StringBuffer lsfn;
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -1139,6 +1162,7 @@ FILESERVICES_API char *  FILESERVICES_CALL fsGetSuperFileSubName(ICodeContext *c
             return CTXSTRDUP(parentCtx, "");
         return ret.detach();
     }
+    CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
     Owned<IDistributedSuperFile> file;
     StringBuffer lsfn;
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -1157,6 +1181,7 @@ FILESERVICES_API unsigned FILESERVICES_CALL fsFindSuperFileSubName(ICodeContext
         unsigned n = enq->findSubName(lfn.str());
         return (n==NotFound)?0:n+1;
     }
+    CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
     Owned<IDistributedSuperFile> file;
     StringBuffer lsfn;
     lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -1190,27 +1215,6 @@ FILESERVICES_API void FILESERVICES_CALL fsAddSuperFile(IGlobalCodeContext *gctx,
 }
 
 
-class CImplicitSuperTransaction
-{
-    IDistributedFileTransaction *transaction;
-public:
-    CImplicitSuperTransaction(IDistributedFileTransaction *_transaction)
-    {
-        if (!_transaction->active()) // then created implicitly
-        {
-            transaction = _transaction;
-            transaction->start();
-        }
-        else
-            transaction = NULL;
-    }
-    ~CImplicitSuperTransaction()
-    {
-        if (transaction)
-            transaction->commit();
-    }
-};
-
 FILESERVICES_API void FILESERVICES_CALL fslAddSuperFile(ICodeContext *ctx, const char *lsuperfn,const char *_lfn,unsigned atpos,bool addcontents, bool strict)
 {
     Owned<IDistributedSuperFile> file;
@@ -1708,6 +1712,7 @@ FILESERVICES_API void FILESERVICES_CALL fsSuperFileContents(ICodeContext *ctx, s
         }
     }
     else {
+        CImplicitSuperTransaction implicitTransaction(ctx->querySuperFileTransaction());
         Owned<IDistributedSuperFile> file;
         StringBuffer lsfn;
         lookupSuperFile(ctx, lsuperfn, file, true, lsfn, true);
@@ -2167,12 +2172,18 @@ FILESERVICES_API void  FILESERVICES_CALL fsDeleteExternalFile(ICodeContext * ctx
     AuditMessage(ctx,"DeleteExternalFile",path);
 }
 
-FILESERVICES_API void  FILESERVICES_CALL fsCreateExternalDirectory(ICodeContext * ctx,const char *location,const char *path)
+FILESERVICES_API void  FILESERVICES_CALL fsCreateExternalDirectory(ICodeContext * ctx,const char *location,const char *_path)
 {
     SocketEndpoint ep(location);
     if (ep.isNull())
-        throw MakeStringException(-1,"fsCreateExternalDirectory: Cannot resolve location %s",location);
+        throw MakeStringException(-1, "fsCreateExternalDirectory: Cannot resolve location %s",location);
     CDfsLogicalFileName lfn;
+    StringBuffer path(_path);
+    if (0 == path.length())
+        throw MakeStringException(-1, "fsCreateExternalDirectory: empty directory");
+    // remove trailing path separator if present to make it look like a regular LFN after lfn.setExternal
+    if (isPathSepChar(path.charAt(path.length()-1)))
+        path.remove(path.length()-1, 1);
     lfn.setExternal(location,path);
     checkExternalFileRights(ctx,lfn,false,true);
     RemoteFilename rfn;