Browse Source

HPCC-17133 Document LOOKUP attribute

Signed-off-by: Jim DeFabia <jamesdefabia@lexisnexis.com>
Jim DeFabia 8 years ago
parent
commit
dba55c4bfd

+ 11 - 2
docs/ECLLanguageReference/ECLR_mods/Recrd-DATASET.xml

@@ -8,11 +8,11 @@
 
   <para><emphasis>attr</emphasis><emphasis role="bold"> :=
   DATASET(</emphasis><emphasis> file, struct, filetype </emphasis><emphasis
-  role="bold">);</emphasis></para>
+  role="bold">[,LOOKUP]);</emphasis></para>
 
   <para><emphasis>attr</emphasis><emphasis role="bold"> :=
   DATASET(</emphasis><emphasis> dataset, file, filetype </emphasis><emphasis
-  role="bold">);</emphasis></para>
+  role="bold">[,LOOKUP]);</emphasis></para>
 
   <para><emphasis>attr</emphasis><emphasis role="bold"> :=
   DATASET(</emphasis><emphasis> </emphasis><emphasis
@@ -109,6 +109,15 @@
         </row>
 
         <row>
+          <entry><emphasis role="bold">LOOKUP</emphasis></entry>
+
+          <entry>Optional. Specifies that the file layout should be looked up
+          at compile time. See <emphasis>File Layout Resolution at Compile
+          Time</emphasis> in the <emphasis>Programmer's Guide</emphasis> for
+          more details.</entry>
+        </row>
+
+        <row>
           <entry><emphasis role="bold">WORKUNIT</emphasis></entry>
 
           <entry>Specifies the DATASET is the result of an OUTPUT with the

+ 11 - 0
docs/ECLLanguageReference/ECLR_mods/Templ-OPTION.xml

@@ -606,6 +606,17 @@
                   <primary>processor affinity</primary>
                 </indexterm></entry>
             </row>
+
+            <row>
+              <entry><emphasis>translateDFSlayouts</emphasis></entry>
+
+              <entry>Default = 0</entry>
+
+              <entry>Specifies that file layouts should be looked up at
+              compile time. See <emphasis>File Layout Resolution at Compile
+              Time</emphasis> in the <emphasis>Programmer's Guide</emphasis>
+              for more details.</entry>
+            </row>
           </tbody>
         </tgroup>
       </informaltable></para>

+ 13 - 3
docs/ECLLanguageReference/ECLR_mods/Value-RecordOf.xml

@@ -8,8 +8,8 @@
       <primary>RECORDOF</primary>
     </indexterm><indexterm>
       <primary>RECORDOF datatype</primary>
-    </indexterm>(</emphasis><emphasis> recordset </emphasis><emphasis
-  role="bold">)</emphasis></para>
+    </indexterm>(</emphasis><emphasis> recordset , </emphasis><emphasis
+  role="bold">[LOOKUP])</emphasis></para>
 
   <informaltable colsep="1" frame="all" rowsep="1">
     <tgroup cols="2">
@@ -22,7 +22,17 @@
           <entry><emphasis>recordset</emphasis></entry>
 
           <entry>The set of data records whose RECORD structure to use. This
-          may be a DATASET or any derived recordset.</entry>
+          may be a DATASET or any derived recordset. If the LOOKUP attribute
+          is used, this may be a filename. </entry>
+        </row>
+
+        <row>
+          <entry><emphasis role="bold">LOOKUP</emphasis></entry>
+
+          <entry>Optional. Specifies that the file layout should be looked up
+          at compile time. See <emphasis>File Layout Resolution at Compile
+          Time</emphasis> in the <emphasis>Programmer's Guide</emphasis> for
+          more details.</entry>
         </row>
       </tbody>
     </tgroup>

+ 232 - 0
docs/ECLProgrammersGuide/PRG_Mods/PrG_File_layout_resolution.xml

@@ -0,0 +1,232 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<sect1 id="File_layout_resolution">
+  <title>File Layout Resolution at Compile Time</title>
+
+  <para>When reading a disk file in ECL, the layout of the file is specified
+  in the ECL code. This allows the code to be compiled to access the data very
+  efficiently, but can cause issues if the file on disk is actually using a
+  different layout.</para>
+
+  <para>In particular, it can present a challenge to the version control
+  process, if you have ECL queries that are being changed to add
+  functionality, but which need to be applied without modification to data
+  files whose layout is changing on a different timeline.</para>
+
+  <para>There has been a partial solution to this dilemma available in Roxie
+  for index files--the ability to apply runtime translation from the fields in
+  the physical index file to the fields specified in the index. However, that
+  has significant potential overhead and is not available for flat files or on
+  Thor. This feature supports flat files and Thor files.</para>
+
+  <para>A new feature, added in the HPCC Systems 6.4.0 release, allows file
+  resolution to be performed at compile time, which provides the following
+  advantages:</para>
+
+  <itemizedlist>
+    <listitem>
+      <para>Code changes can be insulated from file layout changes - you only
+      need to declare the fields you actually want to use from a
+      datafile.</para>
+    </listitem>
+  </itemizedlist>
+
+  <itemizedlist>
+    <listitem>
+      <para>File layout mismatches can be detected sooner.</para>
+    </listitem>
+  </itemizedlist>
+
+  <itemizedlist>
+    <listitem>
+      <para>The compiler can use information about file sizes to guide code
+      optimization decisions.</para>
+    </listitem>
+  </itemizedlist>
+
+  <para>There are two language constructs associated with this feature:</para>
+
+  <itemizedlist>
+    <listitem>
+      <para>Using a LOOKUP attribute on DATASET or INDEX declarations.</para>
+    </listitem>
+  </itemizedlist>
+
+  <itemizedlist>
+    <listitem>
+      <para>Using a LOOKUP attribute in a RECORDOF function.</para>
+    </listitem>
+  </itemizedlist>
+
+  <sect2>
+    <title>Using LOOKUP on a DATASET</title>
+
+    <para>Adding the LOOKUP attribute to a DATASET declaration indicates that
+    the file layout should be looked up at compile time:</para>
+
+    <programlisting>myrecord := RECORD
+  STRING field1;
+  STRING field2;
+END;
+
+f := DATASET(‘myfilename’, myrecord, FLAT);    
+  // This will fail at runtime if file layout does not match myrecord
+f := DATASET(‘myfilename’, myrecord, FLAT, LOOKUP);    
+  // This will automatically project from the actual to the requested layout
+</programlisting>
+
+    <para>If we assume that the actual layout of the file on disk is:</para>
+
+    <programlisting>myactualrecord := RECORD
+  STRING field1;
+  STRING field2;
+  STRING field3;
+END;</programlisting>
+
+    <?hard-pagebreak ?>
+
+    <para>Then the effect of the LOOKUP attribute will be as if your code
+    was:</para>
+
+    <programlisting>actualfile := DATASET(‘myfilename’, myactualrecord, FLAT);
+f := PROJECT(actualfile, TRANSFORM(myrecord, SELF := LEFT; SELF := []));
+</programlisting>
+
+    <para>Fields that are present in both record structures are assigned
+    across, fields that are present only in the disk version are dropped and
+    fields that are present only in the ECL version receive their default
+    value (a warning will be issued in this latter case).</para>
+
+    <para>There is also a compiler directive that can be used to specify
+    translation for all files:</para>
+
+    <para><programlisting>#OPTION('translateDFSlayouts',TRUE);</programlisting></para>
+
+    <para>The LOOKUP attribute accepts a parameter (TRUE or FALSE) to allow
+    easier control of where and when you want translation to occur. Any
+    Boolean expression that can be evaluated at compile time can be
+    supplied.</para>
+
+    <para>When using the #OPTION for <emphasis>translateDFSlayouts</emphasis>,
+    you may want to use LOOKUP(FALSE) to override the default on some specific
+    datasets.</para>
+  </sect2>
+
+  <sect2>
+    <title>Using LOOKUP in a RECORDOF function</title>
+
+    <para>Using a LOOKUP attribute in a RECORDOF function is useful when
+    fields were present in the original and later dropped or when you want to
+    write to a file that matches the layout of an existing file, but you don't
+    know the layout.</para>
+
+    <para>The LOOKUP attribute in the RECORDOF function takes a filename
+    rather than a dataset. The result is expanded at compile time to the
+    record layout stored in the named file’s metadata. There are several forms
+    of this construct:</para>
+
+    <para><programlisting>RECORDOF(‘myfile’, LOOKUP);
+RECORDOF(‘myfile', defaultstructure, LOOKUP);
+RECORDOF(‘myfile’, defaultstructure, LOOKUP, OPT);</programlisting>You can
+    also specify a DATASET as the first parameter instead of a filename (a
+    syntactic convenience) and the filename specified on the dataset will be
+    used for the lookup.</para>
+
+    <para>The <emphasis>defaultstructure</emphasis> is useful for situations
+    where the file layout information may not be available (for example, when
+    syntax-checking locally or creating an archive). It is also useful when
+    the file being looked up may not exist--this is where OPT should be
+    used.</para>
+
+    <para>The compiler checks that the actual record structure retrieved from
+    the distributed file system lookup contains all the fields specified, and
+    that the types are compatible.</para>
+
+    <para>For example, to read a file whose structure is unknown other than
+    that it contains an ID field, and create an output file containing all
+    records that matched a supplied value, you could write:</para>
+
+    <para><programlisting>myfile := DATASET(‘myinputfile’, RECORDOF(‘myinputfile’, { STRING id },
+                                                        LOOKUP), FLAT);
+filtered := myfile(id=‘123’);
+OUTPUT(filtered,,’myfilteredfile’);</programlisting></para>
+  </sect2>
+
+  <sect2>
+    <title>Additional Details</title>
+
+    <itemizedlist>
+      <listitem>
+        <para>The syntax is designed so that it is not necessary to perform
+        file resolution to be able to syntax-check or create archives. This is
+        important for local-repository mode to work.</para>
+      </listitem>
+
+      <listitem>
+        <para>Foreign file resolution works the same way - just use the
+        standard filename syntax for foreign filename resolution.</para>
+      </listitem>
+
+      <listitem>
+        <para>You can also use the LOOKUP attribute on INDEX declarations as
+        well as DATASET.</para>
+      </listitem>
+
+      <listitem>
+        <para>When using the RECORDOF form and supplying a default layout, you
+        may need to use the =&gt; form of the record layout syntax to specify
+        both keyed and payload fields in the same record.</para>
+      </listitem>
+
+      <listitem>
+        <para>Files that have been sprayed rather than created by ECL jobs may
+        not have record information (metadata) available in the distributed
+        file system.</para>
+      </listitem>
+
+      <listitem>
+        <para>There are some new parameters to eclcc that can be used if you
+        want to use this functionality for local compiles:</para>
+
+        <para><informaltable colsep="1" frame="all" rowsep="1">
+            <tgroup cols="2">
+              <colspec align="left" colwidth="125.55pt" />
+
+              <colspec />
+
+              <tbody>
+                <row>
+                  <entry>-dfs=ip</entry>
+
+                  <entry>Use specified Dali IP for filename
+                  resolution.</entry>
+                </row>
+
+                <row>
+                  <entry>-scope=prefix</entry>
+
+                  <entry>Use specified scope prefix in filename
+                  resolution.</entry>
+                </row>
+
+                <row>
+                  <entry>-user=id</entry>
+
+                  <entry>Use specified username in filename
+                  resolution.</entry>
+                </row>
+
+                <row>
+                  <entry>password=xxx</entry>
+
+                  <entry>Use specified password in filename resolution (Leave
+                  blank to prompt)</entry>
+                </row>
+              </tbody>
+            </tgroup>
+          </informaltable></para>
+      </listitem>
+    </itemizedlist>
+  </sect2>
+</sect1>

+ 5 - 0
docs/ECLProgrammersGuide/PrGd-Includer.xml

@@ -176,6 +176,11 @@
     <xi:include href="ECLProgrammersGuide/PRG_Mods/PrG_Hex_String_to_Decimal_String.xml"
                 xpointer="element(/1)"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
+                
+    <xi:include href="ECLProgrammersGuide/PRG_Mods/PrG_File_layout_resolution.xml"
+                xpointer="element(/1)"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+                
   </chapter>
 
   <chapter id="EmbeddedLanguages_DataStores">