PrG_Using_ECL_Keys.xml 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!DOCTYPE sect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
  3. "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
  4. <sect1 id="Using_ECL_Keys-INDEX_Files">
  5. <title><emphasis role="bold">Using ECL Keys (INDEX Files)</emphasis></title>
  6. <para>The ETL (Extract, Transform, and Load—standard data ingest processing)
  7. operations in ECL typically operate against all or most of the records in
  8. any given dataset, which makes the use of keys (INDEX files) of little use.
  9. Many queries do the same.</para>
  10. <para>However, production data delivery to end-users rarely requires
  11. accessing all records in a dataset. End-users always want “instant” access
  12. to the data they're interested in, and most often that data is a very small
  13. subset of the total set of records available. Therefore, using keys
  14. (INDEXes) becomes a requirement.</para>
  15. <para>The following attribute definitions used by the code examples in this
  16. article are declared in the DeclareData MODULE structure attribute in the
  17. DeclareData.ECL file:</para>
  18. <programlisting>EXPORT Person := MODULE
  19. EXPORT File := DATASET('~PROGGUIDE::EXAMPLEDATA::People',Layout_Person, THOR);
  20. EXPORT FilePlus := DATASET('~PROGGUIDE::EXAMPLEDATA::People',
  21. {Layout_Person,
  22. UNSIGNED8 RecPos{VIRTUAL(fileposition)}}, THOR);
  23. END;
  24. EXPORT Accounts := DATASET('~PROGGUIDE::EXAMPLEDATA::Accounts',
  25. {Layout_Accounts_Link,
  26. UNSIGNED8 RecPos{VIRTUAL(fileposition)}}, THOR);
  27. EXPORT PersonAccounts := DATASET('~PROGGUIDE::EXAMPLEDATA::PeopleAccts',
  28. {Layout_Combined,
  29. UNSIGNED8 RecPos{virtual(fileposition)}},THOR);
  30. EXPORT IDX_Person_PersonID := INDEX(Person.FilePlus,{PersonID,RecPos},
  31. '~PROGGUIDE::EXAMPLEDATA::KEYS::People.PersonID');
  32. EXPORT IDX_Accounts_PersonID := INDEX(Accounts,{PersonID,RecPos},
  33. '~PROGGUIDE::EXAMPLEDATA::KEYS::Accounts.PersonID');
  34. EXPORT IDX_Accounts_PersonID_Payload :=
  35. INDEX(Accounts,
  36. {PersonID},
  37. {Account,OpenDate,IndustryCode,AcctType,
  38. AcctRate,Code1,Code2,HighCredit,Balance,RecPos},
  39. '~PROGGUIDE::EXAMPLEDATA::KEYS::Accounts.PersonID.Payload');
  40. EXPORT IDX_PersonAccounts_PersonID :=
  41. INDEX(PersonAccounts,{PersonID,RecPos},
  42. '~PROGGUIDE::EXAMPLEDATA::KEYS::PeopleAccts.PersonID');
  43. EXPORT IDX__Person_LastName_FirstName :=
  44. INDEX(Person.FilePlus,{LastName,FirstName,RecPos},
  45. '~PROGGUIDE::EXAMPLEDATA::KEYS::People.LastName.FirstName');
  46. EXPORT IDX__Person_PersonID_Payload :=
  47. INDEX(Person.FilePlus,{PersonID},
  48. {FirstName,LastName,MiddleInitial,
  49. Gender,Street,City,State,Zip,RecPos},
  50. '~PROGGUIDE::EXAMPLEDATA::KEYS::People.PersonID.Payload');
  51. </programlisting>
  52. <para>Although you can use an INDEX as if it were a DATASET, there are only
  53. two operations in ECL that directly use keys: FETCH and JOIN.</para>
  54. <sect2 id="Simple_FETCH">
  55. <title>Simple FETCH</title>
  56. <para>The FETCH is the simplest use of an INDEX. Its purpose is to
  57. retrieve records from a dataset by using an INDEX to directly access only
  58. the specified records.</para>
  59. <para>The example code below (contained in the IndexFetch.ECL file)
  60. illustrates the usual form:</para>
  61. <programlisting>IMPORT $;
  62. F1 := FETCH($.DeclareData.Person.FilePlus,
  63. $.DeclareData.IDX_Person_PersonID(PersonID=1),
  64. RIGHT.RecPos);
  65. OUTPUT(F1); </programlisting>
  66. <para>You will note that the DATASET named as the first parameter has no
  67. filter, while the INDEX named as the second parameter does have a filter.
  68. This is always the case with FETCH. The purpose of an INDEX in ECL is
  69. always to allow “direct” access to individual records in the base dataset,
  70. therefore filtering the INDEX is always required to define the exact set
  71. of records to retrieve. Given that, filtering the base dataset is
  72. unnecessary.</para>
  73. <para>As you can see, there is no TRANSFORM function in this code. For
  74. most typical uses of FETCH a transform function is unnecessary, although
  75. it is certainly appropriate if the result data requires formatting, as in
  76. this example (also contained in the IndexFetch.ECL file):</para>
  77. <programlisting>r := RECORD
  78. STRING FullName;
  79. STRING Address;
  80. STRING CSZ;
  81. END;
  82. r Xform($.DeclareData.Person.FilePlus L) := TRANSFORM
  83. SELF.Fullname := TRIM(L.Firstname) + TRIM(' ' + L.MiddleInitial) + ' ' + L.Lastname;
  84. SELF.Address := L.Street;
  85. SELF.CSZ := TRIM(L.City) + ', ' + L.State + ' ' + L.Zip;
  86. END;
  87. F2 := FETCH($.DeclareData.Person.FilePlus,
  88. $.DeclareData.IDX_Person_PersonID(PersonID=1),
  89. RIGHT.RecPos,
  90. Xform(LEFT));
  91. OUTPUT(F2);
  92. </programlisting>
  93. <para>Even with a TRANSFORM function, this code is still a very
  94. straight-forward “go get me the records, please” operation.</para>
  95. </sect2>
  96. <sect2 id="Full-keyed_JOIN">
  97. <title>Full-keyed JOIN</title>
  98. <para>As simple as FETCH is, using INDEXes in JOIN operations is a little
  99. more complex. The most obvious form is a "full-keyed" JOIN, specified by
  100. the KEYED option, which, nominates an INDEX into the right-hand recordset
  101. (the second JOIN parameter). The purpose for this form is to handle
  102. situations where the left-hand recordset (named as the first parameter to
  103. the JOIN) is a fairly small dataset that needs to join to a large, indexed
  104. dataset (the right-hand recordset). By using the KEYED option, the JOIN
  105. operation uses the specified INDEX to find the matching right-hand
  106. records. This means that the join condition must use the key fields in the
  107. INDEX to find matching records.</para>
  108. <para>This example code (contained in the IndexFullKeyedJoin.ECL file)
  109. illustrates the usual use of a full-keyed join:</para>
  110. <programlisting>IMPORT $;
  111. r1 := RECORD
  112. $.DeclareData.Layout_Person;
  113. $.DeclareData.Layout_Accounts;
  114. END;
  115. r1 Xform1($.DeclareData.Person.FilePlus L,
  116. $.DeclareData.Accounts R) := TRANSFORM
  117. SELF := L;
  118. SELF := R;
  119. END;
  120. J1 := JOIN($.DeclareData.Person.FilePlus(PersonID BETWEEN 1 AND 100),
  121. $.DeclareData.Accounts,
  122. LEFT.PersonID=RIGHT.PersonID,
  123. Xform1(LEFT,RIGHT),
  124. KEYED($.DeclareData.IDX_Accounts_PersonID));
  125. OUTPUT(J1,ALL);
  126. </programlisting>
  127. <para>The right-hand Accounts file contains five million records, and with
  128. the specified filter condition the left-hand Person recordset contains
  129. exactly one hundred records. A standard JOIN between these two would
  130. normally require that all five million Accounts records be read to produce
  131. the result. However, by using the KEYED option the INDEX’s binary tree is
  132. used to find the entries with the appropriate key field values and get the
  133. pointers to the exact set of Accounts records required to produce the
  134. correct result. That means that the only records read from the right-hand
  135. file are those actually contained in the result.</para>
  136. </sect2>
  137. <sect2 id="Half-keyed_JOIN">
  138. <title>Half-keyed JOIN</title>
  139. <para>The half-keyed JOIN is a simpler version, wherein the INDEX is the
  140. right-hand recordset in the JOIN. Just as with the full-keyed JOIN, the
  141. join condition must use the key fields in the INDEX to do its work. The
  142. purpose of the half-keyed JOIN is the same as the full-keyed
  143. version.</para>
  144. <para>In fact, a full-keyed JOIN is, behind the curtains, actually the
  145. same as a half-keyed JOIN then a FETCH to retrieve the base dataset
  146. records. Therefore, a half-keyed JOIN and a FETCH are semantically and
  147. functionally equivalent, as shown in this example code (contained in the
  148. IndexHalfKeyedJoin.ECL file):</para>
  149. <programlisting>IMPORT $;
  150. r1 := RECORD
  151. $.DeclareData.Layout_Person;
  152. $.DeclareData.Layout_Accounts;
  153. END;
  154. r2 := RECORD
  155. $.DeclareData.Layout_Person;
  156. UNSIGNED8 AcctRecPos;
  157. END;
  158. r2 Xform2($.DeclareData.Person.FilePlus L,
  159. $.DeclareData.IDX_Accounts_PersonID R) := TRANSFORM
  160. SELF.AcctRecPos := R.RecPos;
  161. SELF := L;
  162. END;
  163. J2 := JOIN($.DeclareData.Person.FilePlus(PersonID BETWEEN 1 AND 100),
  164. $.DeclareData.IDX_Accounts_PersonID,
  165. LEFT.PersonID=RIGHT.PersonID,
  166. Xform2(LEFT,RIGHT));
  167. r1 Xform3($.DeclareData.Accounts L, r2 R) := TRANSFORM
  168. SELF := L;
  169. SELF := R;
  170. END;
  171. F1 := FETCH($.DeclareData.Accounts,
  172. J2,
  173. RIGHT.AcctRecPos,
  174. Xform3(LEFT,RIGHT));
  175. OUTPUT(F1,ALL);
  176. </programlisting>
  177. <para>This code produces the same result set as the previous
  178. example.</para>
  179. <para>The advantage of using half-keyed JOINs over the full-keyed version
  180. comes in where you may need to do several JOINs to fully perform whatever
  181. process is being run. Using the half-keyed form allows you to accomplish
  182. all the necessary JOINs before you explicitly do the FETCH to retrieve the
  183. final result records, thereby making the code more efficient.</para>
  184. </sect2>
  185. <sect2 id="Payload_INDEXes">
  186. <title>Payload INDEXes</title>
  187. <para>There is an extended form of INDEX that allows each entry to carry a
  188. “payload”—additional data not included in the set of key fields. These
  189. additional fields may simply be additional fields from the base dataset
  190. (not required as part of the search key), or they may contain the result
  191. of some preliminary computation (computed fields). Since the data in an
  192. INDEX is always compressed (using LZW compression), carrying the extra
  193. payload doesn't tax the system unduly.</para>
  194. <para>A payload INDEX requires two separate RECORD structures as the
  195. second and third parameters of the INDEX declaration. The second parameter
  196. RECORD structure lists the key fields on which the INDEX is built (the
  197. search fields), while the third parameter RECORD structure defines the
  198. additional payload fields.</para>
  199. <para>The <emphasis role="bold">virtual(fileposition)</emphasis> record
  200. pointer field must always be the last field listed in any type of INDEX,
  201. therefore, when you're defining a payload key it is always the last field
  202. in the third parameter RECORD structure.</para>
  203. <para>This example code (contained in the IndexHalfKeyedPayloadJoin.ECL
  204. file) once again duplicates the previous results, but does so using just
  205. the half-keyed JOIN (without the FETCH) by making use of a payload
  206. key:</para>
  207. <programlisting>IMPORT $;
  208. r1 := RECORD
  209. $.DeclareData.Layout_Person;
  210. $.DeclareData.Layout_Accounts;
  211. END;
  212. r1 Xform($.DeclareData.Person.FilePlus L, $.DeclareData.IDX_Accounts_PersonID_Payload R) :=
  213. TRANSFORM
  214. SELF := L;
  215. SELF := R;
  216. END;
  217. J2 := JOIN($.DeclareData.Person.FilePlus(PersonID BETWEEN 1 AND 100),
  218. $.DeclareData.IDX_Accounts_PersonID_Payload,
  219. LEFT.PersonID=RIGHT.PersonID,
  220. Xform(LEFT,RIGHT));
  221. OUTPUT(J2,ALL);
  222. </programlisting>
  223. <para>You can see that this makes for tighter code. By eliminating the
  224. FETCH operation you also eliminate the disk access associated with it,
  225. making your process faster. The requirement, of course, is to pre-build
  226. the payload keys so that the FETCH becomes unnecessary.</para>
  227. </sect2>
  228. <sect2 id="Computed_Fields_in_Payload_Keys">
  229. <title>Computed Fields in Payload Keys</title>
  230. <para>There is a trick to putting computed fields in the payload. Since a
  231. “computed field” by definition does not exist in the dataset, the
  232. technique required for their creation and use is to build the content of
  233. the INDEX beforehand.</para>
  234. <para>The following example code (contained in IndexPayloadFetch.ECL)
  235. illustrates how to accomplish this by building the content of some
  236. computed fields (derived from related child records) in a TABLE on which
  237. the INDEX is built:</para>
  238. <programlisting>IMPORT $;
  239. PersonFile := $.DeclareData.Person.FilePlus;
  240. AcctFile := $.DeclareData.Accounts;
  241. IDXname := '~$.DeclareData::EXAMPLEDATA::KEYS::Person.PersonID.CompPay';
  242. r1 := RECORD
  243. PersonFile.PersonID;
  244. UNSIGNED8 AcctCount := 0;
  245. UNSIGNED8 HighCreditSum := 0;
  246. UNSIGNED8 BalanceSum := 0;
  247. PersonFile.RecPos;
  248. END;
  249. t1 := TABLE(PersonFile,r1);
  250. st1 := DISTRIBUTE(t1,HASH32(PersonID));
  251. r2 := RECORD
  252. AcctFile.PersonID;
  253. UNSIGNED8 AcctCount := COUNT(GROUP);
  254. UNSIGNED8 HighCreditSum := SUM(GROUP,AcctFile.HighCredit);
  255. UNSIGNED8 BalanceSum := SUM(GROUP,AcctFile.Balance);
  256. END;
  257. t2 := TABLE(AcctFile,r2,PersonID);
  258. st2 := DISTRIBUTE(t2,HASH32(PersonID));
  259. r1 countem(t1 L, t2 R) := TRANSFORM
  260. SELF := R;
  261. SELF := L;
  262. END;
  263. j := JOIN(st1,st2,LEFT.PersonID=RIGHT.PersonID,countem(LEFT,RIGHT),LOCAL);
  264. Bld := BUILDINDEX(j,
  265. {PersonID},
  266. {AcctCount,HighCreditSum,BalanceSum,RecPos},
  267. IDXname,OVERWRITE);
  268. i := INDEX(PersonFile,
  269. {PersonID},
  270. {UNSIGNED8 AcctCount,UNSIGNED8 HighCreditSum,UNSIGNED8 BalanceSum,RecPos},
  271. IDXname);
  272. f := FETCH(PersonFile,i(PersonID BETWEEN 1 AND 100),RIGHT.RecPos);
  273. Get := OUTPUT(f,ALL);
  274. SEQUENTIAL(Bld,Get);
  275. </programlisting>
  276. <para>The first TABLE function gets all the key field values from the
  277. Person dataset for the INDEX and creates empty fields to contain the
  278. computed values. Note well that the RecPos virtual(fileposition) field
  279. value is also retrieved at this point.</para>
  280. <para>The second TABLE function calculates the values to go into the
  281. computed fields. The values in this example are coming from the related
  282. Accounts dataset. These computed field values will allow the final payload
  283. INDEX into the Person dataset to produce these child recordset values
  284. without any additional code (or disk access).</para>
  285. <para>The JOIN operation moves combines the result from two TABLEs into
  286. its final form. This is the data from which the INDEX is built.</para>
  287. <para>The BUILDINDEX action writes the INDEX to disk. The tricky part then
  288. is to declare the INDEX against the base dataset (not the JOIN result). So
  289. the key to this technique is to build the INDEX against a derived/computed
  290. set of data, then declare the INDEX against the base dataset from which
  291. that data was drawn.</para>
  292. <para>To demonstrate the use of a computed-field payload INDEX, this
  293. example code just does a simple FETCH to return the combined result
  294. containing all the fields from the Person dataset along with all the
  295. computed field values. In “normal” use, this type of payload key would
  296. generally be used in a half-keyed JOIN operation.</para>
  297. </sect2>
  298. <sect2 id="Computed_Fields_in_Search_Keys">
  299. <title>Computed Fields in Search Keys</title>
  300. <para>There is one situation where using a computed field as a search key
  301. is required—when the field you want to search on is a REAL or DECIMAL data
  302. type. Neither of these two is valid for use as a search key. Therefore,
  303. making the search key a computed STRING field containing the value to
  304. search on is a way to get around this limitation.</para>
  305. <para>The trick to computed fields in the payload is the same for search
  306. keys—build the content of the INDEX beforehand. The following example code
  307. (contained in IndexREALkey.ECL) illustrates how to accomplish this by
  308. building the content of computed search key fields on which the INDEX is
  309. built using a TABLE and PROJECT:</para>
  310. <programlisting>IMPORT $;
  311. r := RECORD
  312. REAL8 Float := 0.0;
  313. DECIMAL8_3 Dec := 0.0;
  314. $.DeclareData.person.file;
  315. END;
  316. t := TABLE($.DeclareData.person.file,r);
  317. r XF(r L) := TRANSFORM
  318. SELF.float := L.PersonID / 1000;
  319. SELF.dec := L.PersonID / 1000;
  320. SELF := L;
  321. END;
  322. p := PROJECT(t,XF(LEFT));
  323. DSname := '~PROGGUIDE::EXAMPLEDATA::KEYS::dataset';
  324. IDX1name := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestIDX1';
  325. IDX2name := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestIDX2';
  326. OutName1 := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestout1';
  327. OutName2 := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestout2';
  328. OutName3 := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestout3';
  329. OutName4 := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestout4';
  330. OutName5 := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestout5';
  331. OutName6 := '~PROGGUIDE::EXAMPLEDATA::KEYS::realkeytestout6';
  332. DSout := OUTPUT(p,,DSname,OVERWRITE);
  333. ds := DATASET(DSname,r,THOR);
  334. idx1 := INDEX(ds,{STRING13 FloatStr := REALFORMAT(float,13,3)},{ds},IDX1name);
  335. idx2 := INDEX(ds,{STRING13 DecStr := (STRING13)dec},{ds},IDX2name);
  336. Bld1Out := BUILD(idx1,OVERWRITE);
  337. Bld2Out := BUILD(idx2,OVERWRITE);
  338. j1 := JOIN(idx1,idx2,LEFT.FloatStr = RIGHT.DecStr);
  339. j2 := JOIN(idx1,idx2,KEYED(LEFT.FloatStr = RIGHT.DecStr));
  340. j3 := JOIN(ds,idx1,KEYED((STRING10)LEFT.float = RIGHT.FloatStr));
  341. j4 := JOIN(ds,idx2,KEYED((STRING10)LEFT.dec = RIGHT.DecStr));
  342. j5 := JOIN(ds,idx1,KEYED((STRING10)LEFT.dec = RIGHT.FloatStr));
  343. j6 := JOIN(ds,idx2,KEYED((STRING10)LEFT.float = RIGHT.DecStr));
  344. JoinOut1 := OUTPUT(j1,,OutName1,OVERWRITE);
  345. JoinOut2 := OUTPUT(j2,,OutName2,OVERWRITE);
  346. JoinOut3 := OUTPUT(j3,,OutName3,OVERWRITE);
  347. JoinOut4 := OUTPUT(j4,,OutName4,OVERWRITE);
  348. JoinOut5 := OUTPUT(j5,,OutName5,OVERWRITE);
  349. JoinOut6 := OUTPUT(j6,,OutName6,OVERWRITE);
  350. SEQUENTIAL(DSout,Bld1Out,Bld2Out,JoinOut1,JoinOut2,JoinOut3,JoinOut4,JoinOut5,JoinOut6);
  351. </programlisting>
  352. <para>This code starts with some filename definitions. The record
  353. structure adds two fields to the existing set of fields from our base
  354. dataset: a REAL8 field named “float” and a DECIMAL12_6 field named “dec.”
  355. These will contain our REAL and DECIMAL data that we want to search on.
  356. The PROJECT of the TABLE puts values into these two fields (in this case,
  357. just dividing the PersonID file by 1000 to achieve a floating point value
  358. to use that will be unique).</para>
  359. <para>The IDX1 INDEX definition creates the REAL search key as a STRING13
  360. computed field by using the REALFORMAT function to right-justify the
  361. floating point value into a 13-character STRING. This formats the value
  362. with exactly the number of decimal places specified in the REALFORMAT
  363. function.</para>
  364. <para>The IDX2 INDEX definition creates the DECIMAL search key as a
  365. STRING13 computed field by casting the DECIMAL data to a STRING13. Using
  366. the typecast operator simply left-justifies the value in the string. It
  367. may also drop trailing zeros, so the number of decimal places is not
  368. guaranteed to always be the same.</para>
  369. <para>Because of the two different methods of constructing the search key
  370. strings, the strings themselves are not equal, although the values used to
  371. create them are the same. This means that you cannot expect to “mix and
  372. match” between the two—you need to use each INDEX with the method used to
  373. create it. That's why the two JOIN operations that demonstrate their usage
  374. use the same method to create the string comparison value as was used to
  375. create the INDEX. This way, you are guaranteed to achieve matching
  376. values.</para>
  377. </sect2>
  378. <sect2 id="Using_an_INDEX_like_a_DATASET">
  379. <title>Using an INDEX like a DATASET</title>
  380. <para>Payload keys can also be used for standard DATASET-type operations.
  381. In this type of usage, the INDEX acts as if it were a dataset, with the
  382. advantage that it contains compressed data and a btree index. The key
  383. difference in this type of use is the use of KEYED and WILD in INDEX
  384. filters, which allows the INDEX read to make use of the btree instead of
  385. doing a full-table scan.</para>
  386. <para>The following example code (contained in IndexAsDataset.ECL)
  387. illustrates the use of an INDEX as if it were a DATASET, and compares the
  388. relative performance of INDEX versus DATASET use:</para>
  389. <programlisting>IMPORT $;
  390. OutRec := RECORD
  391. INTEGER Seq;
  392. QSTRING15 FirstName;
  393. QSTRING25 LastName;
  394. STRING2 State;
  395. END;
  396. IDX := $.DeclareData.IDX__Person_LastName_FirstName_Payload;
  397. Base := $.DeclareData.Person.File;
  398. OutRec XF1(IDX L, INTEGER C) := TRANSFORM
  399. SELF.Seq := C;
  400. SELF := L;
  401. END;
  402. O1 := PROJECT(IDX(KEYED(lastname='COOLING'),
  403. KEYED(firstname='LIZZ'),
  404. state='OK'),
  405. XF1(LEFT,COUNTER));
  406. OUTPUT(O1,ALL);
  407. OutRec XF2(Base L, INTEGER C) := TRANSFORM
  408. SELF.Seq := C;
  409. SELF := L;
  410. END;
  411. O2 := PROJECT(Base(lastname='COOLING',
  412. firstname='LIZZ',
  413. state='OK'),
  414. XF2(LEFT,COUNTER));
  415. OUTPUT(O2,ALL);
  416. </programlisting>
  417. <para>Both PROJECT operations will produce exactly the same result, but
  418. the first one uses an INDEX and the second uses a DATASET. The only
  419. significant difference between the two is the use of KEYED in the INDEX
  420. filter. This indicates that the index read should use the btree to find
  421. the specific set of leaf node records to read. The DATASET version must
  422. read all the records in the file to find the correct one, making it a much
  423. slower process.</para>
  424. <para>If you check the workunit timings in ECL Watch, you should see a
  425. difference. In this test case, the difference may not appear to be
  426. significant (there's not that much test data), but in your real-world
  427. applications the difference between an index read operation and a
  428. full-table scan should prove meaningful.</para>
  429. </sect2>
  430. </sect1>