13 gadi atpakaļ · 719005e535
--- a/docs/HDFS_Stream/HDFS_Mods/HDFS_Install.xml
+++ b/docs/HDFS_Stream/HDFS_Mods/HDFS_Install.xml
@@ -31,14 +31,21 @@
 
				 
			
 
				         <programlisting>sudo rpm -Uvh &lt;rpm file name&gt;</programlisting>
			
 
				 
			
 
				-        <blockquote>
			
 
				-          <para><emphasis role="bold">NOTE :</emphasis> For ANY version of
			
 
				-          SuSe you must set a password for the hpcc user on all nodes. One way
			
 
				-          to do do this is to issue the following command:</para>
			
 
				+        <variablelist>
			
 
				+          <varlistentry>
			
 
				+            <term>Note:</term>
			
 
				 
			
 
				-          <para><programlisting>sudo passwd hpcc</programlisting>Be sure to
			
 
				-          set the password on ALL nodes.</para>
			
 
				-        </blockquote>
			
 
				+            <listitem>
			
 
				+              <para>For ANY version of SuSe you must set a password for the
			
 
				+              <emphasis role="bluebold">hpcc</emphasis> user on all nodes. One
			
 
				+              way to do do this is to issue the following command:</para>
			
 
				+
			
 
				+              <programlisting>sudo passwd hpcc</programlisting>
			
 
				+
			
 
				+              <para>Be sure to set the password on ALL nodes</para>
			
 
				+            </listitem>
			
 
				+          </varlistentry>
			
 
				+        </variablelist>
			
 
				 
			
 
				         <para><emphasis role="bold">Ubuntu/Debian </emphasis></para>
			
 
				 
			
@@ -54,7 +61,7 @@
 
				     <title>Editing and distributing the Configuration file</title>
			
 
				 
			
 
				     <para>After you install the HDFS to HPCC Connector package, you must edit
			
 
				-    the configuration file and push it out to all nodes. </para>
			
 
				+    the configuration file and push it out to all nodes.</para>
			
 
				 
			
 
				     <orderedlist>
			
 
				       <listitem>
			
@@ -63,7 +70,7 @@
 
				 
			
 
				       <listitem>
			
 
				         <para>Edit the configuration file <emphasis
			
 
				-        role="bluebold">/opt/HPCCSystems/etc/HPCCSystems/hdfsstream.conf</emphasis>.</para>
			
 
				+        role="bluebold">/opt/HPCCSystems/etc/HPCCSystems/hdfsconnector.conf</emphasis>.</para>
			
 
				 
			
 
				         <para>The configuration file contains one line:</para>
			
 
				 
			
@@ -78,18 +85,18 @@
 
				         push.sh script:</para>
			
 
				 
			
 
				         <para><programlisting>sudo -u hpcc /opt/HPCCSystems/sbin/hpcc-push.sh /
			
 
				-             /opt/HPCCSystems/etc/HPCCSystems/hdfsstream.conf /
			
 
				-             /opt/HPCCSystems/etc/HPCCSystems/hdfsstream.conf</programlisting></para>
			
 
				+             /opt/HPCCSystems/etc/HPCCSystems/hdfsconnector.conf /
			
 
				+             /opt/HPCCSystems/etc/HPCCSystems/hdfsconnector.conf</programlisting></para>
			
 
				       </listitem>
			
 
				     </orderedlist>
			
 
				 
			
 
				     <para></para>
			
 
				   </sect2>
			
 
				 
			
 
				-  <sect2>
			
 
				+  <sect2 role="brk">
			
 
				     <title>Installing the ECL library to your ECL IDE source folder</title>
			
 
				 
			
 
				-    <para>The HDSF-to-HPCC Connector library is a single ECL file containing
			
 
				+    <para>The HDSF to HPCC Connector library is a single ECL file containing
			
 
				     three MACROs. These steps explain how to install to your ECL source
			
 
				     repository.</para>
			
 
				 
			
@@ -100,8 +107,8 @@
 
				 
			
 
				         <listitem>
			
 
				           <para>Extract the contents of the zip file to the ECL IDE source
			
 
				-          folder. Make sure to enable the option to keep the folder structure
			
 
				-          within Zip file.</para>
			
 
				+          folder. Make sure to select the option to use the folder names from
			
 
				+          the Zip file.</para>
			
 
				 
			
 
				           <para>The ECL Source folder is typically located at <emphasis
			
 
				           role="bluebold">C:\Users\Public\Documents\HPCC Systems\ECL\My
			
@@ -112,7 +119,9 @@
 
				           Compiler &gt;&gt; ECL Folders</emphasis>.</para>
			
 
				 
			
 
				           <para>When you are finished, the library will be in a repository
			
 
				-          folder named <emphasis role="bluebold">DataStream</emphasis>.</para>
			
 
				+          folder named <emphasis role="bluebold">DataConnectors</emphasis>. It
			
 
				+          will contain one file named
			
 
				+          <emphasis>HDFSConnector.ecl</emphasis>.</para>
			
 
				         </listitem>
			
 
				       </orderedlist></para>
			
 
				   </sect2>
			
--- a/docs/HDFSConnector/HDFS_Mods/HDFS_Intro.xml
+++ b/docs/HDFSConnector/HDFS_Mods/HDFS_Intro.xml
@@ -0,0 +1,93 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
			
 
				+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
			
 
				+<sect1 id="IntroHDFS" role="nobrk">
			
 
				+  <title>Introduction</title>
			
 
				+
			
 
				+  <para>The HDFS to HPCC Connector provides a means to import data from
			
 
				+  Hadoop's HDFS into an HPCC Systems Thor platform. It also supports exporting
			
 
				+  the data back to HDFS or exporting and merging it. This allows you to use an
			
 
				+  HPCC cluster in conjunction with your Hadoop-based cluster.</para>
			
 
				+
			
 
				+  <para>The H2H Connector is an add-on to an HPCC Cluster and consists of
			
 
				+  server-side components and ECL Macros that invoke them.</para>
			
 
				+
			
 
				+  <para><itemizedlist>
			
 
				+      <listitem>
			
 
				+        <para><emphasis role="bold">Server-side
			
 
				+        components:</emphasis><itemizedlist>
			
 
				+            <listitem>
			
 
				+              <para>The executable ( /opt/HPCCSystems/bin/hdfsconnector
			
 
				+              )</para>
			
 
				+            </listitem>
			
 
				+
			
 
				+            <listitem>
			
 
				+              <para>The shell script (/opt/HPCCSystems/bin/hdfspipe)</para>
			
 
				+            </listitem>
			
 
				+
			
 
				+            <listitem>
			
 
				+              <para>The configuration file
			
 
				+              (/opt/HPCCSystems/etc/HPCCSystems/hdfsconnector.conf)</para>
			
 
				+
			
 
				+              <para>The configuration file contains the location where Hadoop
			
 
				+              is installed, as shown in the example below: </para>
			
 
				+
			
 
				+              <programlisting>HADOOP_LOCATION=/usr/local/hadoop</programlisting>
			
 
				+
			
 
				+              <para>This allows access to the libhdfs (API) library.</para>
			
 
				+
			
 
				+              <variablelist>
			
 
				+                <varlistentry>
			
 
				+                  <term>Note:</term>
			
 
				+
			
 
				+                  <listitem>
			
 
				+                    <para>The HDFS Connector writes log files to a folder
			
 
				+                    named <emphasis
			
 
				+                    role="bluebold">mydataconnectors</emphasis> in the the
			
 
				+                    HPCC log directory (the HPCC log location can be set using
			
 
				+                    Configuration Manager). </para>
			
 
				+
			
 
				+                    <para>The default location is:<programlisting>/var/log/HPCCSystems/mydataconnectors/</programlisting></para>
			
 
				+
			
 
				+                    <para>The log files are written following the following
			
 
				+                    pattern:<programlisting>HDFSCONNECTOR.&lt;nodeid&gt;.&lt;PID&gt;.log</programlisting></para>
			
 
				+                  </listitem>
			
 
				+                </varlistentry>
			
 
				+              </variablelist>
			
 
				+            </listitem>
			
 
				+          </itemizedlist></para>
			
 
				+      </listitem>
			
 
				+
			
 
				+      <listitem>
			
 
				+        <para><emphasis role="bold">ECL Macros
			
 
				+        (HDFSConnector.ecl)</emphasis></para>
			
 
				+
			
 
				+        <itemizedlist>
			
 
				+          <listitem>
			
 
				+            <para>HDFSConnector.PipeIn</para>
			
 
				+
			
 
				+            <para>Imports data from Hadoop's file system (HDFS) to a Thor
			
 
				+            Cluster.</para>
			
 
				+          </listitem>
			
 
				+
			
 
				+          <listitem>
			
 
				+            <para>HDFSConnector.PipeOut</para>
			
 
				+
			
 
				+            <para>Exports data from a Thor Cluster to Hadoop's file system
			
 
				+            (HDFS).</para>
			
 
				+          </listitem>
			
 
				+
			
 
				+          <listitem>
			
 
				+            <para>HDFSConnector.PipeOutAndMerge</para>
			
 
				+
			
 
				+            <para>Exports data from a Thor Cluster to Hadoop's file system
			
 
				+            (HDFS) and merges the data.</para>
			
 
				+          </listitem>
			
 
				+        </itemizedlist>
			
 
				+      </listitem>
			
 
				+
			
 
				+      <listitem>
			
 
				+        <para>The HDFS to HPCC Connector User's Guide</para>
			
 
				+      </listitem>
			
 
				+    </itemizedlist></para>
			
 
				+</sect1>
			
--- a/docs/HDFS_Stream/HDFS_Mods/HDFS_PipeIn.xml
+++ b/docs/HDFS_Stream/HDFS_Mods/HDFS_PipeIn.xml
@@ -2,10 +2,10 @@
 
				 <!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
			
 
				 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
			
 
				 <sect1 id="PIPEIN" role="nobrk">
			
 
				-  <title>HDFSPipe.PipeIn</title>
			
 
				+  <title>HDFSConnector.PipeIn</title>
			
 
				 
			
 
				-  <para><emphasis role="bold">HDFSPipe.PipeIn </emphasis><emphasis>( ECL_RS,
			
 
				-  HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDFSPort
			
 
				+  <para><emphasis role="bold">HDFSConnector.PipeIn </emphasis><emphasis>(
			
 
				+  ECL_RS, HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDFSPort
			
 
				   )</emphasis></para>
			
 
				 
			
 
				   <para><informaltable colsep="0" frame="none" rowsep="0">
			
@@ -49,20 +49,21 @@
 
				           <row>
			
 
				             <entry><emphasis>HDFSPort</emphasis></entry>
			
 
				 
			
 
				-            <entry>The Hadoop DFS port number.</entry>
			
 
				+            <entry>The Hadoop NameNode port number.</entry>
			
 
				           </row>
			
 
				         </tbody>
			
 
				       </tgroup>
			
 
				     </informaltable></para>
			
 
				 
			
 
				-  <para>The <emphasis role="bold">HDFSPipe.PipeIn </emphasis>macro is called
			
 
				-  to pipe in data from the Hadoop file system (HDFS) to a Thor Cluster.</para>
			
 
				+  <para>The <emphasis role="bold">HDFSConnector.PipeIn </emphasis>macro is
			
 
				+  called to pipe in data from the Hadoop file system (HDFS) to a Thor
			
 
				+  Cluster.</para>
			
 
				 
			
 
				   <para>Example:</para>
			
 
				 
			
 
				   <programlisting>#OPTION('pickBestEngine', 0);
			
 
				 IMPORT std;
			
 
				-IMPORT DataStream;
			
 
				+IMPORT DataConnectors;
			
 
				 Layout_Flat := RECORD
			
 
				   STRING10  fname;
			
 
				   STRING10  lname;
			
@@ -75,19 +76,19 @@ Layout_Flat := RECORD
 
				   UNSIGNED1 one;
			
 
				   UNSIGNED8 id;
			
 
				 END;
			
 
				-DataStream.HDFSPipe.PipeIn(certrecords, 
			
 
				-                           '/user/hadoop/test/cert1', 
			
 
				-                           Layout_Flat, 
			
 
				-                           FLAT, 
			
 
				+DataConnectors.HDFSConnector.PipeIn(MyDataFile, 
			
 
				+                           '/user/hadoop/test/MyData1', 
			
 
				+                           Layout_Flat, FLAT, 
			
 
				                            '192.168.56.120', 
			
 
				-                           54310)
			
 
				+                           54310);
			
 
				+OUTPUT(MyDataFile);
			
 
				 </programlisting>
			
 
				 
			
 
				   <?hard-pagebreak ?>
			
 
				 
			
 
				   <programlisting>#OPTION('pickBestEngine', 0);
			
 
				 IMPORT std;
			
 
				-IMPORT DataStream;
			
 
				+IMPORT DataConnectors;
			
 
				 Layout_CSV := RECORD
			
 
				   STRING10 fname;
			
 
				   STRING10 lname;
			
@@ -100,12 +101,12 @@ Layout_CSV := RECORD
 
				   STRING3  one;
			
 
				   STRING20 id;
			
 
				 END;
			
 
				-DataStream.HDFSPipe.PipeIn(certrecords, 
			
 
				-                           '/user/Administrator/test/cert1', 
			
 
				-                           Layout_CSV, 
			
 
				-                           CSV(SEPARATOR('|')), 
			
 
				+DataConnectors.HDFSConnector.PipeIn(MyDataFile, 
			
 
				+                           '/user/Administrator/test/MyData1', 
			
 
				+                           Layout_CSV, CSV(SEPARATOR('|')), 
			
 
				                            '192.168.56.120', 
			
 
				-                           54310)
			
 
				+                           54310);
			
 
				+OUTPUT(MyDataFile);
			
 
				 </programlisting>
			
 
				 
			
 
				   <para></para>
			
--- a/docs/HDFS_Stream/HDFS_Mods/HDFS_PipeOut.xml
+++ b/docs/HDFS_Stream/HDFS_Mods/HDFS_PipeOut.xml
@@ -2,11 +2,11 @@
 
				 <!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
			
 
				 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
			
 
				 <sect1 id="PIPEOUT">
			
 
				-  <title>HDFSPipe.PipeOut</title>
			
 
				+  <title>HDFSConnector.PipeOut</title>
			
 
				 
			
 
				-  <para><emphasis role="bold">HDFSPipe.PipeOut </emphasis><emphasis> (ECL_RS,
			
 
				-  HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDFSPort, HDFSUser
			
 
				-  )</emphasis></para>
			
 
				+  <para><emphasis role="bold">HDFSConnector.PipeOut </emphasis><emphasis>
			
 
				+  (ECL_RS, HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDFSPort,
			
 
				+  HDFSUser )</emphasis></para>
			
 
				 
			
 
				   <para><informaltable colsep="0" frame="none" rowsep="0">
			
 
				       <tgroup cols="2">
			
@@ -18,7 +18,7 @@
 
				           <row>
			
 
				             <entry><emphasis>ECL_RS</emphasis></entry>
			
 
				 
			
 
				-            <entry>The ECL recordset to stream out.</entry>
			
 
				+            <entry>The ECL recordset to export.</entry>
			
 
				           </row>
			
 
				 
			
 
				           <row>
			
@@ -48,7 +48,7 @@
 
				           <row>
			
 
				             <entry><emphasis>HDFSPort</emphasis></entry>
			
 
				 
			
 
				-            <entry>The Hadoop DFS port number.</entry>
			
 
				+            <entry>The Hadoop NameNode port number.</entry>
			
 
				           </row>
			
 
				 
			
 
				           <row>
			
@@ -62,17 +62,18 @@
 
				       </tgroup>
			
 
				     </informaltable></para>
			
 
				 
			
 
				-  <para>The <emphasis role="bold">HDFSPipe.Pipeout </emphasis>macro writes the
			
 
				-  given <emphasis>ECL_RS</emphasis> recordset to the target HDFS system in
			
 
				-  file parts -- one file part for each HPCC Thor node. You can then use other
			
 
				-  means to merge the file parts or you can use <emphasis
			
 
				-  role="bold">HDFSPipe.PipeOutAndMerge</emphasis> to do both tasks.</para>
			
 
				+  <para>The <emphasis role="bold">HDFSConnector.Pipeout </emphasis>macro
			
 
				+  writes the given <emphasis>ECL_RS</emphasis> recordset to the target HDFS
			
 
				+  system in file parts -- one file part for each HPCC Thor node. You can then
			
 
				+  use other means to merge the file parts or you can use <emphasis
			
 
				+  role="bold">HDFSConnector.PipeOutAndMerge</emphasis> to do both
			
 
				+  tasks.</para>
			
 
				 
			
 
				   <para>Examples:</para>
			
 
				 
			
 
				   <programlisting>#OPTION('pickBestEngine', 0);  
			
 
				 IMPORT std;
			
 
				-IMPORT DataStream;
			
 
				+IMPORT DataConnectors;
			
 
				 Layout_Flat :=RECORD
			
 
				   STRING10 fname;
			
 
				   STRING10 lname;
			
@@ -85,21 +86,21 @@ Layout_Flat :=RECORD
 
				   UNSIGNED1 one;
			
 
				   UNSIGNED8 id;
			
 
				 END;
			
 
				-hpcccertrecords := DATASET('~certification::full_test_distributed',Layout_Flat, FLAT);
			
 
				+MyDataFile := DATASET('~certification::full_test_distributed',Layout_Flat, FLAT);
			
 
				 //piping out hpcccertrecords to a flat file in HDFS called /user/hadoop/test/cert1,
			
 
				-DataStream.HDFSPipe.PipeOut(hpcccertrecords, 
			
 
				-                            '/user/hadoop/test/cert1',
			
 
				-                            Layout_Flat, 
			
 
				-                            FLAT, 
			
 
				+DataConnectors.HDFSConnector.PipeOut(MyDataFile, 
			
 
				+                            '/user/hadoop/test/MyData1',
			
 
				+                            Layout_Flat, FLAT, 
			
 
				                             '192.168.56.120', 
			
 
				                             54310 
			
 
				-                            'hadoop' )</programlisting>
			
 
				+                            'hadoopusername' );
			
 
				+</programlisting>
			
 
				 
			
 
				   <?hard-pagebreak ?>
			
 
				 
			
 
				   <programlisting>#OPTION('pickBestEngine', 0);  
			
 
				 IMPORT std;
			
 
				-IMPORT DataStream;
			
 
				+IMPORT DataConnectors;
			
 
				 Layout_CSV := RECORD
			
 
				   STRING10 fname;
			
 
				   STRING10 lname;
			
@@ -112,15 +113,15 @@ Layout_CSV := RECORD
 
				   STRING3  one;
			
 
				   STRING20 id;
			
 
				 END;
			
 
				-hpcccertrecords := DATASET('~certification::full_test_distributed',Layout_CSV, CSV);
			
 
				+MyDataFile := DATASET('~certification::full_test_distributed',Layout_CSV, CSV);
			
 
				 //piping out hpcccertrecords to a CSV file in HDFS called /user/hadoop/test/cert1,
			
 
				-DataStream.HDFSPipe.PipeOut(hpcccertrecords, 
			
 
				-                            '/user/hadoop/test/cert1', 
			
 
				-                            Layout_CSV, 
			
 
				-                            CSV, 
			
 
				+DataConnectors.HDFSConnector.PipeOut(MyDataFile, 
			
 
				+                            '/user/hadoop/test/MyData1', 
			
 
				+                            Layout_CSV, CSV, 
			
 
				                             '192.168.56.120', 
			
 
				                             54310 
			
 
				-                            'hadoop' )</programlisting>
			
 
				+                            'hadoopusername' );
			
 
				+</programlisting>
			
 
				 
			
 
				   <para></para>
			
 
				 </sect1>
			
--- a/docs/HDFS_Stream/HDFS_Mods/HDFS_PipeOutandMerge.xml
+++ b/docs/HDFS_Stream/HDFS_Mods/HDFS_PipeOutandMerge.xml
@@ -2,11 +2,12 @@
 
				 <!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
			
 
				 "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
			
 
				 <sect1 id="PIPEOUTANDMERGE">
			
 
				-  <title>HDFSPipe.PipeOutAndMerge</title>
			
 
				+  <title>HDFSConnector.PipeOutAndMerge</title>
			
 
				 
			
 
				-  <para><emphasis role="bold">HDFSPipe.PipeOutAndMerge</emphasis><emphasis>
			
 
				-  (ECL_RS, HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDFSPort,
			
 
				-  HDFSUser )</emphasis></para>
			
 
				+  <para><emphasis
			
 
				+  role="bold">HDFSConnector.PipeOutAndMerge</emphasis><emphasis> (ECL_RS,
			
 
				+  HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDFSPort, HDFSUser
			
 
				+  )</emphasis></para>
			
 
				 
			
 
				   <para><informaltable colsep="0" frame="none" rowsep="0">
			
 
				       <tgroup cols="2">
			
@@ -18,7 +19,7 @@
 
				           <row>
			
 
				             <entry><emphasis>ECL_RS</emphasis></entry>
			
 
				 
			
 
				-            <entry>The ECL recordset to stream out.</entry>
			
 
				+            <entry>The ECL recordset to export.</entry>
			
 
				           </row>
			
 
				 
			
 
				           <row>
			
@@ -48,7 +49,7 @@
 
				           <row>
			
 
				             <entry><emphasis>HDFSPort</emphasis></entry>
			
 
				 
			
 
				-            <entry>The Hadoop DFS port number.</entry>
			
 
				+            <entry>The Hadoop NameNode port number.</entry>
			
 
				           </row>
			
 
				 
			
 
				           <row>
			
@@ -62,16 +63,16 @@
 
				       </tgroup>
			
 
				     </informaltable></para>
			
 
				 
			
 
				-  <para>The <emphasis role="bold">HDFSPipe.PipeOutAndMerge </emphasis>macro
			
 
				-  writes the given <emphasis>ECL_RS</emphasis> recordset to the target HDFS
			
 
				-  system in file parts and merges them together to form a single target file
			
 
				-  on the HDFS system.</para>
			
 
				+  <para>The <emphasis role="bold">HDFSConnector.PipeOutAndMerge
			
 
				+  </emphasis>macro writes the given <emphasis>ECL_RS</emphasis> recordset to
			
 
				+  the target HDFS system in file parts and merges them together to form a
			
 
				+  single target file on the HDFS system.</para>
			
 
				 
			
 
				   <para>Example:</para>
			
 
				 
			
 
				   <programlisting>#OPTION('pickBestEngine', 0);  
			
 
				 IMPORT std;
			
 
				-IMPORT DataStream;
			
 
				+IMPORT DataConnectors;
			
 
				 Layout_Flat :=RECORD
			
 
				   STRING10  fname;
			
 
				   STRING10  lname;
			
@@ -84,21 +85,20 @@ Layout_Flat :=RECORD
 
				   UNSIGNED1 one;
			
 
				   UNSIGNED8 id;
			
 
				 END;
			
 
				-hpcccertrecords := DATASET('~certification::full_test_distributed',Layout_Flat, FLAT);
			
 
				-DataStream.HDFSPipe.PipeOutAndMerge(hpcccertrecords, 
			
 
				-                                    '/user/hadoop/test/cert1', 
			
 
				-                                    Layout_Flat, 
			
 
				-                                    FLAT, 
			
 
				+MyDataFile := DATASET('~certification::full_test_distributed',Layout_Flat, FLAT);
			
 
				+DataConnectors.HDFSConnector.PipeOutAndMerge(MyDataFile, 
			
 
				+                                    '/user/hadoop/test/MyData1', 
			
 
				+                                    Layout_Flat, FLAT, 
			
 
				                                     '192.168.56.120', 
			
 
				                                     54310, 
			
 
				-                                    'hadoop' )
			
 
				+                                    'hadoopusername' );
			
 
				 </programlisting>
			
 
				 
			
 
				   <?hard-pagebreak ?>
			
 
				 
			
 
				   <programlisting>#OPTION('pickBestEngine', 0);  
			
 
				 IMPORT std;
			
 
				-IMPORT DataStream;
			
 
				+IMPORT DataConnectors;
			
 
				 Layout_CSV := RECORD
			
 
				   STRING10 fname;
			
 
				   STRING10 lname;
			
@@ -111,14 +111,13 @@ Layout_CSV := RECORD
 
				   STRING3  one;
			
 
				   STRING20 id;
			
 
				 END;
			
 
				-hpcccertrecords := DATASET('~certification::full_test_distributed',Layout_CSV, CSV);
			
 
				-DataStream.HDFSPipe.PipeOutAndMerge(hpcccertrecords, 
			
 
				-                                    '/user/hadoop/test/cert1', 
			
 
				-                                    Layout_CSV, 
			
 
				-                                    CSV, 
			
 
				+MyDataFile := DATASET('~certification::full_test_distributed',Layout_CSV, CSV);
			
 
				+DataConnectors.HDFSConnector.PipeOutAndMerge(MyDataFile, 
			
 
				+                                    '/user/hadoop/test/MyData1', 
			
 
				+                                    Layout_CSV, CSV, 
			
 
				                                     '192.168.56.120', 
			
 
				                                     54310, 
			
 
				-                                    'hadoop' )
			
 
				+                                    'hadoopusername' );
			
 
				 </programlisting>
			
 
				 
			
 
				   <para></para>
			
--- a/docs/HDFS_Stream/HDFS_to_HPCC_ConnectorIncluder.xml
+++ b/docs/HDFS_Stream/HDFS_to_HPCC_ConnectorIncluder.xml
@@ -53,11 +53,11 @@
 
				   <chapter>
			
 
				     <title>HDFS to HPCC Connector</title>
			
 
				 
			
 
				-    <xi:include href="HDFS_Stream/HDFS_Mods/HDFS_Intro.xml"
			
 
				+    <xi:include href="HDFSConnector/HDFS_Mods/HDFS_Intro.xml"
			
 
				                 xpointer="element(/1)"
			
 
				                 xmlns:xi="http://www.w3.org/2001/XInclude" />
			
 
				     
			
 
				-    <xi:include href="HDFS_Stream/HDFS_Mods/HDFS_Install.xml"
			
 
				+    <xi:include href="HDFSConnector/HDFS_Mods/HDFS_Install.xml"
			
 
				                 xpointer="element(/1)"
			
 
				                 xmlns:xi="http://www.w3.org/2001/XInclude" />                
			
 
				   </chapter>
			
@@ -65,15 +65,15 @@
 
				   <chapter>
			
 
				     <title>ECL Macros</title>
			
 
				 
			
 
				-    <xi:include href="HDFS_Stream/HDFS_Mods/HDFS_PipeIn.xml"
			
 
				+    <xi:include href="HDFSConnector/HDFS_Mods/HDFS_PipeIn.xml"
			
 
				                 xpointer="element(/1)"
			
 
				                 xmlns:xi="http://www.w3.org/2001/XInclude" />
			
 
				 
			
 
				-    <xi:include href="HDFS_Stream/HDFS_Mods/HDFS_PipeOut.xml"
			
 
				+    <xi:include href="HDFSConnector/HDFS_Mods/HDFS_PipeOut.xml"
			
 
				                 xpointer="element(/1)"
			
 
				                 xmlns:xi="http://www.w3.org/2001/XInclude" />
			
 
				 
			
 
				-    <xi:include href="HDFS_Stream/HDFS_Mods/HDFS_PipeOutandMerge.xml"
			
 
				+    <xi:include href="HDFSConnector/HDFS_Mods/HDFS_PipeOutandMerge.xml"
			
 
				                 xpointer="element(/1)"
			
 
				                 xmlns:xi="http://www.w3.org/2001/XInclude" />
			
 
				   </chapter>
			
--- a/docs/HDFS_Stream/HDFS_Mods/HDFS_Intro.xml
+++ b/docs/HDFS_Stream/HDFS_Mods/HDFS_Intro.xml
@@ -1,74 +0,0 @@
 
				-<?xml version="1.0" encoding="UTF-8"?>
			
 
				-<!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
			
 
				-"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
			
 
				-<sect1 id="IntroHDFS" role="nobrk">
			
 
				-  <title>Introduction</title>
			
 
				-
			
 
				-  <para>The HDFS to HPCC Connector provides a means to import data from a
			
 
				-  Hadoop-based HDFS into an HPCC Systems Thor platform. It also supports
			
 
				-  exporting the data back to the HDFS or exporting and merging it.</para>
			
 
				-
			
 
				-  <para>This allows you to use an HPCC cluster in conjunction with your Hadoop
			
 
				-  based cluster.</para>
			
 
				-
			
 
				-  <para>The H2H Connector is an add-on to an HPCC Cluster and consists
			
 
				-  of:</para>
			
 
				-
			
 
				-  <para><itemizedlist>
			
 
				-      <listitem>
			
 
				-        <para>The Server-side components:<itemizedlist>
			
 
				-            <listitem>
			
 
				-              <para>The executable ( /opt/HPCCSystems/bin/hdfsstream )</para>
			
 
				-            </listitem>
			
 
				-
			
 
				-            <listitem>
			
 
				-              <para>The shell script (/opt/HPCCSystems/bin/hdfspipe)</para>
			
 
				-            </listitem>
			
 
				-
			
 
				-            <listitem>
			
 
				-              <para>The configuration file
			
 
				-              (/opt/HPCCSystems/etc/HPCCSystems/hdfsstream.conf)</para>
			
 
				-
			
 
				-              <para>The configuration file contains one line:</para>
			
 
				-
			
 
				-              <programlisting>HADOOP_LOCATION=/usr/local/hadoop</programlisting>
			
 
				-
			
 
				-              <para>where the value is set to the location where Hadoop is
			
 
				-              installed. This allows access to the libhdfs (API)
			
 
				-              library.</para>
			
 
				-            </listitem>
			
 
				-          </itemizedlist></para>
			
 
				-      </listitem>
			
 
				-
			
 
				-      <listitem>
			
 
				-        <para>The ECL Macros (HDFSPipe.ecl)</para>
			
 
				-
			
 
				-        <itemizedlist>
			
 
				-          <listitem>
			
 
				-            <para>HDFSPipe.PipeIn</para>
			
 
				-
			
 
				-            <para>Imports data from the Hadoop file system (HDFS) to a Thor
			
 
				-            Cluster.</para>
			
 
				-          </listitem>
			
 
				-
			
 
				-          <listitem>
			
 
				-            <para>HDFSPipe.PipeOut</para>
			
 
				-
			
 
				-            <para>Exports data from a Thor Cluster to a Hadoop file system
			
 
				-            (HDFS).</para>
			
 
				-          </listitem>
			
 
				-
			
 
				-          <listitem>
			
 
				-            <para>HDFSPipe.PipeOutAndMerge</para>
			
 
				-
			
 
				-            <para>Exports data from a Thor Cluster to a Hadoop file system
			
 
				-            (HDFS) and merges the data.</para>
			
 
				-          </listitem>
			
 
				-        </itemizedlist>
			
 
				-      </listitem>
			
 
				-
			
 
				-      <listitem>
			
 
				-        <para>The HDFS to HPCC Connector User's Guide</para>
			
 
				-      </listitem>
			
 
				-    </itemizedlist></para>
			
 
				-</sect1>
			
--- a/plugins/CMakeLists.txt
+++ b/plugins/CMakeLists.txt
@@ -24,6 +24,6 @@ add_subdirectory (parselib)
 
				 add_subdirectory (stringlib)
			
 
				 add_subdirectory (unicodelib)
			
 
				 add_subdirectory (workunitservices)
			
 
				-if (USE_HDFSSTREAM)
			
 
				-  add_subdirectory (datastream)
			
 
				+if (USE_HDFSCONNECTOR)
			
 
				+  add_subdirectory (dataconnectors)
			
 
				 endif()
			
--- a/plugins/datastream/CMakeLists.txt
+++ b/plugins/datastream/CMakeLists.txt
@@ -1,8 +1,8 @@
 
				-project (hpccsystems-datastream)
			
 
				+project (hpccsystems-dataconnector)
			
 
				 cmake_minimum_required (VERSION 2.6)
			
 
				 
			
 
				-set ( HPCC_DATASTREAM_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
			
 
				-set ( HPCC_SOURCE_DIR ${HPCC_DATASTREAM_SOURCE_DIR}/../../)
			
 
				+set ( HPCC_DATACONNECTOR_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
			
 
				+set ( HPCC_SOURCE_DIR ${HPCC_DATACONNECTOR_SOURCE_DIR}/../../)
			
 
				 include(${HPCC_SOURCE_DIR}/version.cmake)
			
 
				 
			
 
				 set ( CMAKE_MODULE_PATH "${HPCC_SOURCE_DIR}/cmake_modules")
			
@@ -55,7 +55,7 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
 
				     set(CPACK_STRIP_FILES TRUE)
			
 
				 endif()
			
 
				 
			
 
				-set ( CPACK_INSTALL_CMAKE_PROJECTS "${CMAKE_CURRENT_BINARY_DIR};hdfsstream;ALL;/")
			
 
				+set ( CPACK_INSTALL_CMAKE_PROJECTS "${CMAKE_CURRENT_BINARY_DIR};hdfsconnector;ALL;/")
			
 
				 
			
 
				 if ( CMAKE_SYSTEM MATCHES Linux )
			
 
				     EXECUTE_PROCESS (
			
@@ -137,6 +137,6 @@ else()
 
				     message("WARNING: CMAKE 2.8.1 or later required to create RPMs from this project")
			
 
				 endif()
			
 
				 
			
 
				-add_subdirectory (hdfsstream)
			
 
				+add_subdirectory (hdfsconnector)
			
 
				 
			
 
				 INCLUDE(CPack)
			
--- a/plugins/datastream/hdfsstream/CMakeLists.txt
+++ b/plugins/datastream/hdfsstream/CMakeLists.txt
@@ -1,7 +1,7 @@
 
				-project(hdfsstream)
			
 
				+project(hdfsconnector)
			
 
				 
			
 
				-option(USE_HDFSSTREAM "Configure use of hdstream plugin" OFF)
			
 
				-if ( USE_HDFSSTREAM )
			
 
				+option(USE_HDFSCONNECTOR "Configure use of hdfs data connector" OFF)
			
 
				+if ( USE_HDFSCONNECTOR )
			
 
				 add_subdirectory (ecl)
			
 
				 	option(HADOOP_PATH "Set the Hadoop path.")
			
 
				 	if( NOT HADOOP_PATH )
			
@@ -12,15 +12,17 @@ add_subdirectory (ecl)
 
				 	#generate config for script.
			
 
				 	#add script processor for vars.
			
 
				 
			
 
				-	configure_file("hdfsstream.conf.in" "hdfsstream.conf")
			
 
				+	set(HPCC_ETC_DIR "${CMAKE_INSTALL_PREFIX}/${OSSDIR}/etc")
			
 
				+	set(HPCC_CONF_DIR "${CMAKE_INSTALL_PREFIX}/${OSSDIR}${CONFIG_DIR}")
			
 
				+	set(HDFSCONN_CONF_FILE "hdfsconnector.conf")
			
 
				 
			
 
				-	set(HDFSCONFIG "${CMAKE_INSTALL_PREFIX}/${OSSDIR}${CONFIG_DIR}")
			
 
				+	configure_file("hdfsconnector.conf.in" "hdfsconnector.conf")
			
 
				 	configure_file("hdfspipe.in" "hdfspipe" @ONLY )
			
 
				 
			
 
				 	find_package(JNI REQUIRED)
			
 
				 	find_package(LIBHDFS REQUIRED)
			
 
				 
			
 
				-	set( SRC hdfsstream.cpp )
			
 
				+	set( SRC hdfsconnector.cpp )
			
 
				 
			
 
				 	include_directories (
			
 
				 					${CMAKE_BINARY_DIR}
			
@@ -30,13 +32,13 @@ add_subdirectory (ecl)
 
				 					${JAVA_INCLUDE_PATH2}
			
 
				 					${LIBHDFS_INCLUDE_DIR}	)
			
 
				 
			
 
				-	add_executable( hdfsstream ${SRC} )
			
 
				+	add_executable( hdfsconnector ${SRC} )
			
 
				 
			
 
				 	set ( INSTALLDIR "${OSSDIR}/bin")
			
 
				-	Install ( TARGETS hdfsstream DESTINATION ${INSTALLDIR} COMPONENT Runtime)
			
 
				+	Install ( TARGETS hdfsconnector DESTINATION ${INSTALLDIR} COMPONENT Runtime)
			
 
				 	Install ( PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/hdfspipe DESTINATION ${INSTALLDIR} COMPONENT Runtime )
			
 
				-	Install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/hdfsstream.conf DESTINATION ${HDFSCONFIG} COMPONENT Runtime )
			
 
				-	target_link_libraries ( hdfsstream
			
 
				+	Install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/hdfsconnector.conf DESTINATION ${HPCC_CONF_DIR} COMPONENT Runtime )
			
 
				+	target_link_libraries ( hdfsconnector
			
 
				 					${JAVA_JVM_LIBRARY}
			
 
				 					${LIBHDFS_LIBRARIES})
			
 
				 endif()
			
--- a/plugins/dataconnectors/hdfsconnector/ecl/CMakeLists.txt
+++ b/plugins/dataconnectors/hdfsconnector/ecl/CMakeLists.txt
@@ -0,0 +1 @@
 
				+Install ( FILES HDFSConnector.ecl DESTINATION "${OSSDIR}/share/DataConnectors" COMPONENT Runtime )
			
--- a/plugins/datastream/hdfsstream/ecl/HDFSPipe.ecl
+++ b/plugins/datastream/hdfsstream/ecl/HDFSPipe.ecl
@@ -1,4 +1,4 @@
 
				-/* HDFSPipe
			
 
				+/* HDFSConnector
			
 
				 Pipe data to and from Hadoop
			
 
				 
			
 
				 It is necessary to add this option to your workunit:
			
@@ -19,12 +19,12 @@ enable it add this to hdfs-site.xml:
 
				 
			
 
				 import std;
			
 
				 
			
 
				-EXPORT HDFSPipe := MODULE
			
 
				+EXPORT HDFSConnector := MODULE
			
 
				 
			
 
				     /*
			
 
				-   * HDFSPipe.PipeIn - this macro to be called by the user to pipe in data from the Hadoop file system (HDFS).
			
 
				+   * HDFSConnector.PipeIn - this macro to be called by the user to pipe in data from the Hadoop file system (HDFS).
			
 
				      *
			
 
				-     * @param ECL_RS            The ECL recordset to stream out.
			
 
				+     * @param ECL_RS            The ECL recordset to pipe into.
			
 
				      * @param HadoopFileName    The fully qualified target HDFS file name.
			
 
				      * @param Layout            The structure which describes the ECL_RS recordset.
			
 
				      * @param HadoopFileFormat  The Hadoop data file format : FLAT | CSV.
			
@@ -84,7 +84,7 @@ EXPORT HDFSPipe := MODULE
 
				 				ECL_RS:= PIPE('hdfspipe -si '
			
 
				 				+ ' -nodeid ' + STD.system.Thorlib.node()
			
 
				 				+ ' -clustercount ' + STD.system.Thorlib.nodes()
			
 
				-				+ ' -reclen ' + sizeof(Layout)
			
 
				+				+ ' -maxlen ' + sizeof(Layout, MAX)
			
 
				 				+ ' -filename ' + HadoopFileName
			
 
				 				+ ' -format '	+  %formatstr%[1..3]
			
 
				 				+ ' -terminator ' + %termcont2%
			
@@ -95,7 +95,7 @@ EXPORT HDFSPipe := MODULE
 
				 				ECL_RS:= PIPE('hdfspipe -si '
			
 
				 				+ ' -nodeid ' + STD.system.Thorlib.node()
			
 
				 				+ ' -clustercount ' + STD.system.Thorlib.nodes()
			
 
				-				+ ' -reclen ' + sizeof(Layout)
			
 
				+				+ ' -maxlen ' + sizeof(Layout, MAX)
			
 
				 				+ ' -filename ' + HadoopFileName
			
 
				 				+ ' -format '	+  %formatstr%[1..3]
			
 
				 				+ ' -host ' + HDFSHost	+ ' -port ' + HDSFPort,
			
@@ -114,10 +114,10 @@ EXPORT HDFSPipe := MODULE
 
				 	ENDMACRO;
			
 
				 
			
 
				     /*
			
 
				-    HadoopPipe.PipeOut - writes the given recordset 'ECL_RS' to the target HDFS system in
			
 
				+    HDFSConnector.PipeOut - writes the given recordset 'ECL_RS' to the target HDFS system in
			
 
				                                                 file parts. One file part for each HPCC node.
			
 
				 
			
 
				-    ECL_RS              - The ECL recordset to stream out.
			
 
				+    ECL_RS              - The ECL recordset to pipe out.
			
 
				     HadoopFileName      - The fully qualified target HDFS file name.
			
 
				     Layout              - The structure which describes the ECL_RS recordset.
			
 
				     HadoopFileFormat    - The Hadoop data file format : FLAT | CSV
			
@@ -128,8 +128,8 @@ EXPORT HDFSPipe := MODULE
 
				 
			
 
				     Example:
			
 
				 
			
 
				-    HadoopPipe.PipeOut(sue, '/user/hadoop/HDFSAccounts', Layout_CSV_Accounts, CSV, '192.168.56.102', '54310', 'hadoop');
			
 
				-    HadoopPipe.PipeOut(sue, '/user/hadoop/HDFSPersons', Layout_Flat_Persons, FLAT, '192.168.56.102', '54310', 'hadoop');
			
 
				+    HDFSConnector.PipeOut(sue, '/user/hadoop/HDFSAccounts', Layout_CSV_Accounts, CSV, '192.168.56.102', '54310', 'hadoop');
			
 
				+    HDFSConnector.PipeOut(sue, '/user/hadoop/HDFSPersons', Layout_Flat_Persons, FLAT, '192.168.56.102', '54310', 'hadoop');
			
 
				     */
			
 
				 
			
 
				 	export PipeOut(ECL_RS, HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDSFPort, HDFSUser) := MACRO
			
@@ -159,11 +159,11 @@ EXPORT HDFSPipe := MODULE
 
				 	ENDMACRO;
			
 
				 
			
 
				     /*
			
 
				-    HadoopPipe.PipeOutAndMerge - writes the given recordset 'ECL_RS' to the target HDFS system
			
 
				+    HDFSConnector.PipeOutAndMerge - writes the given recordset 'ECL_RS' to the target HDFS system
			
 
				                                                              in file parts and merges them together to form a single target file
			
 
				                                                              on the HDFS system.
			
 
				 
			
 
				-    ECL_RS          - The ECL recordset to stream out.
			
 
				+    ECL_RS          - The ECL recordset to pipe out.
			
 
				     HadoopFileName  - The fully qualified target HDFS file name.
			
 
				     Layout          - The structure which describes the ECL_RS recordset
			
 
				     HadoopFileFormat- The Hadoop data file format : FLAT | CSV
			
@@ -174,8 +174,8 @@ EXPORT HDFSPipe := MODULE
 
				 
			
 
				     Example:
			
 
				 
			
 
				-    HadoopPipe.PipeOut(sue, '/user/hadoop/HDFSAccounts', Layout_CSV_Accounts, CSV, '192.168.56.102', '54310', 'hadoop');
			
 
				-    HadoopPipe.PipeOut(sue, '/user/hadoop/HDFSPersons', Layout_Flat_Persons, FLAT, '192.168.56.102', '54310', 'hadoop');
			
 
				+    HDFSConnector.PipeOut(sue, '/user/hadoop/HDFSAccounts', Layout_CSV_Accounts, CSV, '192.168.56.102', '54310', 'hadoop');
			
 
				+    HDFSConnector.PipeOut(sue, '/user/hadoop/HDFSPersons', Layout_Flat_Persons, FLAT, '192.168.56.102', '54310', 'hadoop');
			
 
				     */
			
 
				 
			
 
				 	export PipeOutAndMerge(ECL_RS, HadoopFileName, Layout, HadoopFileFormat, HDFSHost, HDSFPort, HDFSUser) := MACRO
			
--- a/plugins/datastream/hdfsstream/hdfsstream.conf.in
+++ b/plugins/datastream/hdfsstream/hdfsstream.conf.in
@@ -1 +1,2 @@
 
				 HADOOP_LOCATION=${HADOOP_PATH}
			
 
				+LOGS_LOCATION=$log
			
--- a/plugins/datastream/hdfsstream/hdfsstream.cpp
+++ b/plugins/datastream/hdfsstream/hdfsstream.cpp
@@ -10,7 +10,6 @@ using namespace std;
 
				 using std::string;
			
 
				 using std::vector;
			
 
				 
			
 
				-//#define EOL "\n\r"
			
 
				 #define EOL "\n"
			
 
				 
			
 
				 tOffset getBlockSize(hdfsFS * filefs, const char * filename)
			
@@ -75,7 +74,7 @@ long getRecordCount(long fsize, int clustersize, int reclen, int nodeid)
 
				 	if ((fsize / reclen) % clustersize >= nodeid + 1)
			
 
				 	{
			
 
				 		readSize += 1;
			
 
				-		fprintf(stderr, "\nThis node will stream one extra rec\n");
			
 
				+		fprintf(stderr, "\nThis node will pipe one extra rec\n");
			
 
				 	}
			
 
				 	return readSize;
			
 
				 }
			
@@ -325,7 +324,7 @@ int readXMLOffset(hdfsFS * fs, const char * filename,
 
				 					firstRowfound = strcmp(currentTag.c_str(),
			
 
				 							openRowTag.c_str()) == 0;
			
 
				 					if (firstRowfound)
			
 
				-						fprintf(stderr, "--start streaming tag %s at %lu--\n",
			
 
				+						fprintf(stderr, "--start piping tag %s at %lu--\n",
			
 
				 								currentTag.c_str(), currentPos);
			
 
				 				}
			
 
				 
			
@@ -360,7 +359,7 @@ int readXMLOffset(hdfsFS * fs, const char * filename,
 
				 						&& strcmp(currentTag.c_str(), closeRowTag.c_str()) == 0)
			
 
				 				{
			
 
				 					fprintf(stdout, "%s", currentTag.c_str());
			
 
				-					fprintf(stderr, "--stop streaming at %s %lu--\n",
			
 
				+					fprintf(stderr, "--stop piping at %s %lu--\n",
			
 
				 							currentTag.c_str(), currentPos);
			
 
				 					bytesLeft = 0;
			
 
				 					break;
			
@@ -412,7 +411,7 @@ int readXMLOffset(hdfsFS * fs, const char * filename,
 
				 
			
 
				 int readCSVOffset(hdfsFS * fs, const char * filename, unsigned long seekPos,
			
 
				 		unsigned long readlen, const char * eolseq, unsigned long bufferSize, bool outputTerminator,
			
 
				-		unsigned long recLen, unsigned long maxlen, const char * quote)
			
 
				+		unsigned long recLen, unsigned long maxLen, const char * quote)
			
 
				 {
			
 
				 	fprintf(stderr, "CSV terminator: \'%s\' and quote: \'%c\'\n", eolseq, quote[0]);
			
 
				 	unsigned long recsFound = 0;
			
@@ -514,7 +513,7 @@ int readCSVOffset(hdfsFS * fs, const char * filename, unsigned long seekPos,
 
				 						currentPos = currentPos + eolseqlen - 1;
			
 
				 						bytesLeft = bytesLeft - eolseqlen;
			
 
				 
			
 
				-						fprintf(stderr, "\n--Start streaming: %ld--\n", currentPos);
			
 
				+						fprintf(stderr, "\n--Start reading: %ld--\n", currentPos);
			
 
				 
			
 
				 						firstEOLfound = true;
			
 
				 						continue;
			
@@ -534,7 +533,7 @@ int readCSVOffset(hdfsFS * fs, const char * filename, unsigned long seekPos,
 
				 					//fprintf(stderr, "\nrecsfound: %ld", recsFound);
			
 
				 					if (stopAtNextEOL)
			
 
				 					{
			
 
				-						fprintf(stderr, "\n--Stop streaming: %ld--\n", currentPos);
			
 
				+						fprintf(stderr, "\n--Stop piping: %ld--\n", currentPos);
			
 
				 						//fprintf(stdout, "%s", eolseq);
			
 
				 						bytesLeft = 0;
			
 
				 						break;
			
@@ -555,7 +554,7 @@ int readCSVOffset(hdfsFS * fs, const char * filename, unsigned long seekPos,
 
				 				}
			
 
				 			}
			
 
				 
			
 
				-			//don't stream until we're beyond the first EOL (if offset = 0 start streaming ASAP)
			
 
				+			//don't pipe until we're beyond the first EOL (if offset = 0 start piping ASAP)
			
 
				 			if (firstEOLfound)
			
 
				 			{
			
 
				 				fprintf(stdout, "%c", currChar);
			
@@ -565,7 +564,7 @@ int readCSVOffset(hdfsFS * fs, const char * filename, unsigned long seekPos,
 
				 			{
			
 
				 				fprintf(stderr, "%c", currChar);
			
 
				 				bytesLeft--;
			
 
				-				if(recLen > 0 && currentPos-seekPos > recLen * 100)
			
 
				+				if(maxLen > 0 && currentPos-seekPos > maxLen * 10)
			
 
				 				{
			
 
				 					fprintf(stderr, "\nFirst EOL was not found within the first %lu bytes", currentPos-seekPos);
			
 
				 					exit(-1);
			
@@ -575,7 +574,7 @@ int readCSVOffset(hdfsFS * fs, const char * filename, unsigned long seekPos,
 
				 			if (stopAtNextEOL)
			
 
				 				fprintf(stderr, "%c", currChar);
			
 
				 
			
 
				-			// ok, so if bytesLeft <= 0 at this point, we need to keep reading
			
 
				+			// ok, so if bytesLeft <= 0 at this point, we need to keep piping
			
 
				 			// IF the last char read was not an EOL char
			
 
				 			if (bytesLeft <= 0	&& currChar != eolseq[0])
			
 
				 			{
			
@@ -623,7 +622,7 @@ int readFileOffset(hdfsFS * fs, const char * filename, tOffset seekPos,
 
				 
			
 
				 	unsigned long currentPos = seekPos;
			
 
				 
			
 
				-	fprintf(stderr, "\n--Start streaming: %ld--\n", currentPos);
			
 
				+	fprintf(stderr, "\n--Start piping: %ld--\n", currentPos);
			
 
				 
			
 
				 	unsigned long bytesLeft = readlen;
			
 
				 	while(hdfsAvailable(*fs, readFile) && bytesLeft >0)
			
@@ -821,7 +820,7 @@ int writeFlatOffset(hdfsFS * fs, const char * filename, unsigned nodeid, unsigne
 
				 	size_t totalbytesread = 0;
			
 
				 	size_t totalbyteswritten = 0;
			
 
				 
			
 
				-	fprintf(stderr, "Writing %s to HDFS [.", filepartname);
			
 
				+	fprintf(stderr, "Writing %s to HDFS.", filepartname);
			
 
				  	while(!in.eof())
			
 
				  	{
			
 
				  		memset(&char_ptr[0], 0, sizeof(char_ptr));
			
@@ -831,7 +830,6 @@ int writeFlatOffset(hdfsFS * fs, const char * filename, unsigned nodeid, unsigne
 
				  		tSize num_written_bytes = hdfsWrite(*fs, writeFile, (void*)char_ptr, bytesread);
			
 
				  		totalbyteswritten += num_written_bytes;
			
 
				 
			
 
				- 		fprintf(stderr, ".");
			
 
				  		//Need to figure out how often this should be done
			
 
				  		//if(totalbyteswritten % )
			
 
				 
			
@@ -850,7 +848,6 @@ int writeFlatOffset(hdfsFS * fs, const char * filename, unsigned nodeid, unsigne
 
				  		fprintf(stderr, "Failed to 'flush' %s\n", filepartname);
			
 
				 		exit(-1);
			
 
				 	}
			
 
				- 	fprintf(stderr, "]");
			
 
				 
			
 
				 	fprintf(stderr,"\n total read: %lu, total written: %lu\n", totalbytesread, totalbyteswritten);
			
 
				 
			
--- a/plugins/datastream/hdfsstream/hdfspipe.in
+++ b/plugins/datastream/hdfsstream/hdfspipe.in
@@ -1,7 +1,9 @@
 
				 #!/bin/bash
			
 
				 
			
 
				+source @HPCC_ETC_DIR@/init.d/hpcc_common
			
 
				+set_environmentvars
			
 
				 
			
 
				-source @HDFSCONFIG@/hdfsstream.conf 
			
 
				+source @HPCC_CONF_DIR@/@HDFSCONN_CONF_FILE@ 
			
 
				 
			
 
				 CLASSPATH=$CLASSPATH:$HADOOP_LOCATION/conf
			
 
				 
			
@@ -22,7 +24,6 @@ nodeid=0;
 
				 
			
 
				 for p in $*;
			
 
				  do
			
 
				-#   echo "[$p]" >> $LOG;
			
 
				    if [ "$idfound" = "1" ];
			
 
				    then
			
 
				         nodeid=$p;
			
@@ -33,7 +34,17 @@ for p in $*;
 
				    fi
			
 
				 done;
			
 
				 
			
 
				-LOG=/tmp/HPCC-HadoopStream.log.$nodeid.$PID
			
 
				+#the log variable is read from the HPCC Platform config
			
 
				+LOGS_LOCATION=$log
			
 
				+HDFSCONNLOGLOC=$LOGS_LOCATION/mydataconnectors
			
 
				+LOG=$HDFSCONNLOGLOC/HDFSCONNECTOR.$nodeid.$PID.log
			
 
				+
			
 
				+if [ -e $HDFSCONNLOGLOC ]
			
 
				+  then
			
 
				+    echo "log file found"	>> $LOG
			
 
				+  else
			
 
				+    mkdir $HDFSCONNLOGLOC
			
 
				+fi
			
 
				 
			
 
				 echo "Script starting"		>> $LOG
			
 
				 echo "Running as user: $USER"   >> $LOG
			
@@ -47,10 +58,10 @@ then
 
				 	exit 1;
			
 
				 elif [ $1 = "-mf" ];
			
 
				 then
			
 
				-	/opt/HPCCSystems/bin/hdfsstream "${@}" 2>> $LOG;
			
 
				+	/opt/HPCCSystems/bin/hdfsconnector "${@}" 2>> $LOG;
			
 
				 elif [ $1 = "-si" ];
			
 
				 then
			
 
				-	/opt/HPCCSystems/bin/hdfsstream  "${@}" 2>> $LOG;
			
 
				+	/opt/HPCCSystems/bin/hdfsconnector  "${@}" 2>> $LOG;
			
 
				 elif [ $1 = "-so" ];
			
 
				 then
			
 
				 
			
@@ -68,9 +79,9 @@ then
 
				 
			
 
				 	ls -l "$HPCCTMPFILE" 				>> $LOG
			
 
				 
			
 
				-	echo "calling hdfsstream..." 		>> $LOG
			
 
				+	echo "calling hdfsconnector..." 		>> $LOG
			
 
				 
			
 
				-	/opt/HPCCSystems/bin/hdfsstream "${@}" -pipepath $HPCCTMPFILE  	2>> $LOG
			
 
				+	/opt/HPCCSystems/bin/hdfsconnector "${@}" -pipepath $HPCCTMPFILE  	2>> $LOG
			
 
				 
			
 
				 	echo "write exited with: $?" 			>> $LOG
			
 
				 elif [ $1 = "-sop" ];
			
@@ -85,9 +96,9 @@ then
 
				 	then
			
 
				 		rm -f /tmp/HPCC-FIFO.err.$PID 2> /dev/null
			
 
				 	else
			
 
				-		echo "  WARNING (hdfsstream mkfifo) error registered in file: /tmp/HPCC-FIFO.err.$PID " >> $LOG
			
 
				+		echo "  WARNING (hdfsconnector mkfifo) error registered in file: /tmp/HPCC-FIFO.err.$PID " >> $LOG
			
 
				 	fi
			
 
				-	/opt/HPCCSystems/bin/hdfsstream  "${@}" -pipepath $pipepath	2>> $LOG &
			
 
				+	/opt/HPCCSystems/bin/hdfsconnector  "${@}" -pipepath $pipepath	2>> $LOG &
			
 
				 	echo "redirecting stdin to named pipe ... " 	>> $LOG
			
 
				 	cat < /dev/stdin > "$pipepath"			2>> $LOG
			
 
				 
			
--- a/plugins/datastream/hdfsstream/ecl/CMakeLists.txt
+++ b/plugins/datastream/hdfsstream/ecl/CMakeLists.txt
@@ -1 +0,0 @@
 
				-Install ( FILES HDFSPipe.ecl DESTINATION "${OSSDIR}/share/DataStream" COMPONENT Runtime )
		`@@ -0,0 +1 @@`
		`+Install ( FILES HDFSConnector.ecl DESTINATION "${OSSDIR}/share/DataConnectors" COMPONENT Runtime )`
		`@@ -1 +0,0 @@`
		`-Install ( FILES HDFSPipe.ecl DESTINATION "${OSSDIR}/share/DataStream" COMPONENT Runtime )`