123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- <?xml version="1.0" encoding="UTF-8"?>
- <!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
- "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
- <sect1 id="NLP_RECORD_and_TRANSFORM_Functions">
- <title>NLP RECORD and TRANSFORM Functions</title>
- <para><emphasis>The following functions are used in field definition
- expressions within the RECORD structure<indexterm>
- <primary>RECORD structure</primary>
- </indexterm> or TRANSFORM function that defines the result set from the
- PARSE function<indexterm>
- <primary>PARSE function</primary>
- </indexterm>:</emphasis></para>
- <para><emphasis role="bold">MATCHED(</emphasis><emphasis>
- </emphasis><emphasis role="bold">[ </emphasis><emphasis>patternreference
- </emphasis><emphasis role="bold">] )</emphasis></para>
- <para><emphasis role="bold">MATCHED</emphasis> returns true or false as to
- whether the <emphasis>patternreference</emphasis> found a match. If the
- <emphasis>patternreference</emphasis> is omitted, it indicates whether the
- entire pattern matched or not (for use with the NOT MATCHED option).</para>
- <para><emphasis role="bold">MATCHTEXT</emphasis><emphasis>
- </emphasis><emphasis role="bold">[
- (</emphasis><emphasis>patternreference</emphasis><emphasis role="bold">)
- ]</emphasis></para>
- <para><emphasis role="bold">MATCHTEXT</emphasis> returns the matching ASCII
- text the <emphasis>patternreference</emphasis> found, or blank if not found.
- If the <emphasis>patternreference</emphasis> is omitted, MATCHTEXT returns
- all matching text.</para>
- <para><emphasis
- role="bold">MATCHUNICODE(</emphasis><emphasis>patternreference</emphasis><emphasis
- role="bold">)</emphasis></para>
- <para><emphasis role="bold">MATCHUNICODE</emphasis> returns the matching
- Unicode text the <emphasis>patternreference</emphasis> found, or blank if
- not found.</para>
- <para><emphasis
- role="bold">MATCHLENGTH(</emphasis><emphasis>patternreference</emphasis><emphasis
- role="bold">)</emphasis></para>
- <para><emphasis role="bold">MATCHLENGTH</emphasis> returns the number of
- characters in the matching text the <emphasis>patternreference</emphasis>
- found, or 0 if not found.</para>
- <para><emphasis
- role="bold">MATCHPOSITION(</emphasis><emphasis>patternreference</emphasis><emphasis
- role="bold">)</emphasis></para>
- <para><emphasis role="bold">MATCHPOSITION</emphasis> returns the position
- within the text of the first character in the matching text the
- <emphasis>patternreference</emphasis> found, or 0 if not found.</para>
- <para><emphasis
- role="bold">MATCHROW(</emphasis><emphasis>patternreference</emphasis><emphasis
- role="bold">)</emphasis></para>
- <para><emphasis role="bold">MATCHROW</emphasis> returns the entire row of
- the matching text the <emphasis>patternreference</emphasis> found for a RULE
- (valid only when the PARSE option is used on the PARSE function). This may
- be used to fully qualify a field in the RECORD structure of the row.</para>
- <sect2 id="Pattern_References">
- <title>Pattern References</title>
- <para>The <emphasis>patternreference</emphasis> parameter to these
- functions is a slash-delimited (/) list of previously defined PATTERN,
- TOKEN, or RULE attributes with or without an instance number appended in
- square brackets.</para>
- <para>If an instance number is supplied, the
- <emphasis>patternreference</emphasis> matches a particular occurrence,
- otherwise it matches any. The <emphasis>patternreference</emphasis>
- provides a path through the regular expression grammar to a particular
- result. The path to a particular attribute can either be fully or
- partially specified.</para>
- <para>Example:</para>
- <programlisting>PATTERN ws := PATTERN('[ \t\r\n]');
- PATTERN arb := PATTERN('[-!.,\t a-zA-Z0-9]')+;
- PATTERN number := PATTERN('[0-9]')+;
- PATTERN age := '(' number OPT('/I') ')';
- PATTERN role := '[' arb ']';
- PATTERN m_rank := '<' number '>';
- PATTERN actor := arb OPT(ws '(I)' ws);
-
- NLP_layout_actor_movie := RECORD
- STRING30 actor_name := MATCHTEXT(actor);
- STRING50 movie_name := MATCHTEXT(arb[2]); //2nd instance of arb
- UNSIGNED2 movie_year := (UNSIGNED)MATCHTEXT(age/number);
- //number within age
- STRING20 movie_role := MATCHTEXT(role/arb); //arb within role
- UNSIGNED1 cast_rank := (UNSIGNED)MATCHTEXT(m_rank/number);
- END;
-
- // This example demonstrates the use of productions in PARSE code
- //(only supported in the tomita version of PARSE).
- PATTERN ws := [' ','\t'];
- TOKEN number := PATTERN('[0-9]+');
- TOKEN plus := '+';
- TOKEN minus := '-';
- attrRec := RECORD
- INTEGER val;
- END;
- RULE(attrRec) e0 :=
- '(' USE(attrRec,expr)? ')' |
- number TRANSFORM(attrRec, SELF.val := (INTEGER)$1;) |
- '-' SELF TRANSFORM(attrRec, SELF.val := -$2.val;);
- RULE(attrRec) e1 :=
- e0 |
- SELF '*' e0 TRANSFORM(attrRec, SELF.val := $1.val * $3.val;) |
- USE(attrRec, e1) '/' e0
- TRANSFORM(attrRec, SELF.val := $1.val / $3.val;);
- RULE(attrRec) e2 :=
- e1 |
- SELF plus e1 TRANSFORM(attrRec, SELF.val := $1.val + $3.val;) |
- SELF minus e1 TRANSFORM(attrRec, SELF.val := $1.val - $3.val;);
- RULE(attrRec) expr := e2;
-
- infile := DATASET([{'1+2*3'},{'1+2*z'},{'1+2+(3+4)*4/2'}],
- { STRING line });
- resultsRec := RECORD
- RECORDOF(infile);
- attrRec;
- STRING exprText;
- INTEGER value3;
- END;
- resultsRec extractResults(infile l, attrRec attr) := TRANSFORM
- SELF := l;
- SELF := attr;
- SELF.exprText := MATCHTEXT;
- SELF.value3 := MATCHROW(e0[3]).val;
- END;
- OUTPUT(PARSE(infile,line,expr,extractResults(LEFT, $1),
- FIRST,WHOLE,PARSE,SKIP(ws)));</programlisting>
- <para>See Also: <link linkend="PARSE">PARSE</link>, <link
- linkend="RECORD_Structure">RECORD Structure</link>, <link
- linkend="TRANSFORM_Structure">TRANSFORM Structure</link></para>
- </sect2>
- </sect1>
|