Sfoglia il codice sorgente

Merge pull request #8458 from JamesDeFabia/14927RegexfindsetDocs

HPCC-14927 Document REGEXFINDSET

Reviewed-By: Shamser Ahmed <shamser.ahmed@lexisnexis.co.uk>
Reviewed-By: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 9 anni fa
parent
commit
9da6ee428f

+ 4 - 0
docs/ECLLanguageReference/ECLR-includer.xml

@@ -781,6 +781,10 @@
                 xpointer="element(/1)"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />
 
+    <xi:include href="ECLLanguageReference/ECLR_mods/BltInFunc-REGEXFINDSET.xml"
+                xpointer="element(/1)"
+                xmlns:xi="http://www.w3.org/2001/XInclude" />
+
     <xi:include href="ECLLanguageReference/ECLR_mods/BltInFunc-REGEXREPLACE.xml"
                 xpointer="element(/1)"
                 xmlns:xi="http://www.w3.org/2001/XInclude" />

+ 3 - 1
docs/ECLLanguageReference/ECLR_mods/BltInFunc-REGEXFIND.xml

@@ -108,5 +108,7 @@ REGEXFIND(searchpattern, search, 2); //returns '14'
 REGEXFIND(searchpattern, search, 3); //returns '1998'
 </programlisting>
 
-  <para>See Also: <link linkend="PARSE">PARSE</link>, <link linkend="REGEXREPLACE">REGEXREPLACE</link></para>
+  <para>See Also: <link linkend="PARSE">PARSE</link>, <link
+  linkend="REGEXFINDSET">REGEXFINDSET</link>, <link
+  linkend="REGEXREPLACE">REGEXREPLACE</link></para>
 </sect1>

+ 84 - 0
docs/ECLLanguageReference/ECLR_mods/BltInFunc-REGEXFINDSET.xml

@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<sect1 id="REGEXFINDSET">
+  <title>REGEXFINDSET</title>
+
+  <para><emphasis role="bold">REGEXFINDSET<indexterm>
+      <primary>REGEXFINDSET</primary>
+    </indexterm><indexterm>
+      <primary>REGEXFINDSET function</primary>
+    </indexterm>(</emphasis><emphasis>regex, text </emphasis><emphasis
+  role="bold"> [, NOCASE<indexterm>
+      <primary>NOCASE</primary>
+    </indexterm>])</emphasis></para>
+
+  <para><informaltable colsep="1" frame="all" rowsep="1">
+      <tgroup cols="2">
+        <colspec colwidth="81.35pt" />
+
+        <colspec />
+
+        <tbody>
+          <row>
+            <entry><emphasis>regex</emphasis></entry>
+
+            <entry>A standard Perl regular expression.</entry>
+          </row>
+
+          <row>
+            <entry><emphasis>text</emphasis></entry>
+
+            <entry>The text to parse.</entry>
+          </row>
+
+          <row>
+            <entry><emphasis role="bold">NOCASE</emphasis></entry>
+
+            <entry>Optional. Specifies a case insensitive search.</entry>
+          </row>
+
+          <row>
+            <entry>Return:</entry>
+
+            <entry>REGEXFINDSET returns a set of strings.</entry>
+          </row>
+        </tbody>
+      </tgroup>
+    </informaltable></para>
+
+  <para>The <emphasis role="bold">REGEXFIND </emphasis>function uses the
+  <emphasis>regex<indexterm>
+      <primary>regex</primary>
+    </indexterm></emphasis> to parse through the <emphasis>text</emphasis> and
+  find matches. The <emphasis>regex</emphasis> must be a standard Perl regular
+  expression<indexterm>
+      <primary>Perl regular expression</primary>
+    </indexterm>. We use third-party libraries to support this, so for
+  non-unicode <emphasis>text</emphasis>, see boost docs at <emphasis
+  role="underline">http://www.boost.org/doc/libs/1_39_0/libs/regex/doc/html/index.html</emphasis>.
+  For unicode <emphasis>text</emphasis>, see the ICU docs, the sections
+  ‘Regular Expression Metacharacters' and ‘Regular Expression Operators' at
+  <emphasis
+  role="underline">http://userguide.icu-project.org/strings/regexp</emphasis>
+  and the links from there, in particular the section ‘UnicodeSet patterns' at
+  <emphasis
+  role="underline">http://userguide.icu-project.org/strings/unicodeset</emphasis>.
+  We use version 2.6 which should support all listed features.</para>
+
+  <para>Example:</para>
+
+  <programlisting>sampleStr := 'To: jane@example.com From: john@example.com This is the winter of our discontent.';
+eMails:=REGEXFINDSET('\\w+@[a-zA-Z_]+?\\.[a-zA-Z]{2,3}' , sampleStr);
+OUTPUT(eMails);
+
+UNICODE sampleStr2:= U'To: janë@example.com From john@example.com This is the winter of our discontent.';
+eMails2:= REGEXFINDSET(U'\\w+@[a-zA-Z_]+?\\.[a-zA-Z]{2,3}', sampleStr2);
+OUTPUT(eMails2);
+
+</programlisting>
+
+  <para>See Also: <link linkend="PARSE">PARSE</link>, <link
+  linkend="REGEXREPLACE">REGEXFIND</link>, <link
+  linkend="REGEXREPLACE">REGEXREPLACE</link></para>
+</sect1>