ParSppt-NLPRecrd.xml 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
  3. "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
  4. <sect1 id="NLP_RECORD_and_TRANSFORM_Functions">
  5. <title>NLP RECORD and TRANSFORM Functions</title>
  6. <para><emphasis>The following functions are used in field definition
  7. expressions within the RECORD structure<indexterm>
  8. <primary>RECORD structure</primary>
  9. </indexterm> or TRANSFORM function that defines the result set from the
  10. PARSE function<indexterm>
  11. <primary>PARSE function</primary>
  12. </indexterm>:</emphasis></para>
  13. <para><emphasis role="bold">MATCHED(</emphasis><emphasis>
  14. </emphasis><emphasis role="bold">[ </emphasis><emphasis>patternreference
  15. </emphasis><emphasis role="bold">] )</emphasis></para>
  16. <para><emphasis role="bold">MATCHED</emphasis> returns true or false as to
  17. whether the <emphasis>patternreference</emphasis> found a match. If the
  18. <emphasis>patternreference</emphasis> is omitted, it indicates whether the
  19. entire pattern matched or not (for use with the NOT MATCHED option).</para>
  20. <para><emphasis role="bold">MATCHTEXT</emphasis><emphasis>
  21. </emphasis><emphasis role="bold">[
  22. (</emphasis><emphasis>patternreference</emphasis><emphasis role="bold">)
  23. ]</emphasis></para>
  24. <para><emphasis role="bold">MATCHTEXT</emphasis> returns the matching ASCII
  25. text the <emphasis>patternreference</emphasis> found, or blank if not found.
  26. If the <emphasis>patternreference</emphasis> is omitted, MATCHTEXT returns
  27. all matching text.</para>
  28. <para><emphasis
  29. role="bold">MATCHUNICODE(</emphasis><emphasis>patternreference</emphasis><emphasis
  30. role="bold">)</emphasis></para>
  31. <para><emphasis role="bold">MATCHUNICODE</emphasis> returns the matching
  32. Unicode text the <emphasis>patternreference</emphasis> found, or blank if
  33. not found.</para>
  34. <para><emphasis
  35. role="bold">MATCHLENGTH(</emphasis><emphasis>patternreference</emphasis><emphasis
  36. role="bold">)</emphasis></para>
  37. <para><emphasis role="bold">MATCHLENGTH</emphasis> returns the number of
  38. characters in the matching text the <emphasis>patternreference</emphasis>
  39. found, or 0 if not found.</para>
  40. <para><emphasis
  41. role="bold">MATCHPOSITION(</emphasis><emphasis>patternreference</emphasis><emphasis
  42. role="bold">)</emphasis></para>
  43. <para><emphasis role="bold">MATCHPOSITION</emphasis> returns the position
  44. within the text of the first character in the matching text the
  45. <emphasis>patternreference</emphasis> found, or 0 if not found.</para>
  46. <para><emphasis
  47. role="bold">MATCHROW(</emphasis><emphasis>patternreference</emphasis><emphasis
  48. role="bold">)</emphasis></para>
  49. <para><emphasis role="bold">MATCHROW</emphasis> returns the entire row of
  50. the matching text the <emphasis>patternreference</emphasis> found for a RULE
  51. (valid only when the PARSE option is used on the PARSE function). This may
  52. be used to fully qualify a field in the RECORD structure of the row.</para>
  53. <sect2 id="Pattern_References">
  54. <title>Pattern References</title>
  55. <para>The <emphasis>patternreference</emphasis> parameter to these
  56. functions is a slash-delimited (/) list of previously defined PATTERN,
  57. TOKEN, or RULE attributes with or without an instance number appended in
  58. square brackets.</para>
  59. <para>If an instance number is supplied, the
  60. <emphasis>patternreference</emphasis> matches a particular occurrence,
  61. otherwise it matches any. The <emphasis>patternreference</emphasis>
  62. provides a path through the regular expression grammar to a particular
  63. result. The path to a particular attribute can either be fully or
  64. partially specified.</para>
  65. <para>Example:</para>
  66. <programlisting>PATTERN ws := PATTERN('[ \t\r\n]');
  67. PATTERN arb := PATTERN('[-!.,\t a-zA-Z0-9]')+;
  68. PATTERN number := PATTERN('[0-9]')+;
  69. PATTERN age := '(' number OPT('/I') ')';
  70. PATTERN role := '[' arb ']';
  71. PATTERN m_rank := '&lt;' number '&gt;';
  72. PATTERN actor := arb OPT(ws '(I)' ws);
  73. NLP_layout_actor_movie := RECORD
  74. STRING30 actor_name := MATCHTEXT(actor);
  75. STRING50 movie_name := MATCHTEXT(arb[2]); //2nd instance of arb
  76. UNSIGNED2 movie_year := (UNSIGNED)MATCHTEXT(age/number);
  77. //number within age
  78. STRING20 movie_role := MATCHTEXT(role/arb); //arb within role
  79. UNSIGNED1 cast_rank := (UNSIGNED)MATCHTEXT(m_rank/number);
  80. END;
  81. // This example demonstrates the use of productions in PARSE code
  82. //(only supported in the tomita version of PARSE).
  83. PATTERN ws := [' ','\t'];
  84. TOKEN number := PATTERN('[0-9]+');
  85. TOKEN plus := '+';
  86. TOKEN minus := '-';
  87. attrRec := RECORD
  88. INTEGER val;
  89. END;
  90. RULE(attrRec) e0 :=
  91. '(' USE(attrRec,expr)? ')' |
  92. number TRANSFORM(attrRec, SELF.val := (INTEGER)$1;) |
  93. '-' SELF TRANSFORM(attrRec, SELF.val := -$2.val;);
  94. RULE(attrRec) e1 :=
  95. e0 |
  96. SELF '*' e0 TRANSFORM(attrRec, SELF.val := $1.val * $3.val;) |
  97. USE(attrRec, e1) '/' e0
  98. TRANSFORM(attrRec, SELF.val := $1.val / $3.val;);
  99. RULE(attrRec) e2 :=
  100. e1 |
  101. SELF plus e1 TRANSFORM(attrRec, SELF.val := $1.val + $3.val;) |
  102. SELF minus e1 TRANSFORM(attrRec, SELF.val := $1.val - $3.val;);
  103. RULE(attrRec) expr := e2;
  104. infile := DATASET([{'1+2*3'},{'1+2*z'},{'1+2+(3+4)*4/2'}],
  105. { STRING line });
  106. resultsRec := RECORD
  107. RECORDOF(infile);
  108. attrRec;
  109. STRING exprText;
  110. INTEGER value3;
  111. END;
  112. resultsRec extractResults(infile l, attrRec attr) := TRANSFORM
  113. SELF := l;
  114. SELF := attr;
  115. SELF.exprText := MATCHTEXT;
  116. SELF.value3 := MATCHROW(e0[3]).val;
  117. END;
  118. OUTPUT(PARSE(infile,line,expr,extractResults(LEFT, $1),
  119. FIRST,WHOLE,PARSE,SKIP(ws)));</programlisting>
  120. <para>See Also: <link linkend="PARSE">PARSE</link>, <link
  121. linkend="RECORD_Structure">RECORD Structure</link>, <link
  122. linkend="TRANSFORM_Structure">TRANSFORM Structure</link></para>
  123. </sect2>
  124. </sect1>