PrG_Crosstab_reports.xml 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
  3. "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
  4. <sect1 id="Cross-Tab_Reports">
  5. <title><emphasis role="bold">Cross-Tab Reports</emphasis></title>
  6. <para>Cross-Tab reports are a very useful way of discovering statistical
  7. information about the data that you work with. They can be easily produced
  8. using the TABLE function and the aggregate functions (COUNT, SUM, MIN, MAX,
  9. AVE, VARIANCE, COVARIANCE, CORRELATION). The resulting recordset contains a
  10. single record for each unique value of the “group by” fields specified in
  11. the TABLE function, along with the statistics you generate with the
  12. aggregate functions.</para>
  13. <para>The TABLE function's “group by” parameters are used and duplicated as
  14. the first set of fields in the RECORD structure, followed by any number of
  15. aggregate function calls, all using the GROUP keyword as the replacement for
  16. the recordset required by the first parameter of each of the aggregate
  17. functions. The GROUP keyword specifies performing the aggregate operation on
  18. the group and is the key to creating a Cross-Tab report. This creates an
  19. output table containing a single row for each unique value of the “group by”
  20. parameters.</para>
  21. <sect2 id="A_Simple_Crosstab">
  22. <title>A Simple CrossTab</title>
  23. <para>The example code below (contained in the CrossTab.ECL file) produces
  24. an output of State/CountAccts with counts from the nested child dataset
  25. created by the GenData.ECL code (see the <emphasis role="bold">Creating
  26. Example Data</emphasis> article):</para>
  27. <programlisting>IMPORT $;
  28. Person := $.DeclareData.PersonAccounts;
  29. CountAccts := COUNT(Person.Accounts);
  30. MyReportFormat1 := RECORD
  31. State := Person.State;
  32. A1 := CountAccts;
  33. GroupCount := COUNT(GROUP);
  34. END;
  35. RepTable1 := TABLE(Person,MyReportFormat1,State,CountAccts );
  36. OUTPUT(RepTable1);
  37. /* The result set would look something like this:
  38. State A1 GroupCount
  39. AK 1 7
  40. AK 2 3
  41. AL 1 42
  42. AL 2 54
  43. AR 1 103
  44. AR 2 89
  45. AR 3 2 */
  46. </programlisting>
  47. <para>Slight modifications allow some more sophisticated statistics to be
  48. produced, such as:</para>
  49. <programlisting>MyReportFormat2 := RECORD
  50. State{cardinality(56)} := Person.State;
  51. A1 := CountAccts;
  52. GroupCount := COUNT(GROUP);
  53. MaleCount := COUNT(GROUP,Person.Gender = 'M');
  54. FemaleCount := COUNT(GROUP,Person.Gender = 'F');
  55. END;
  56. RepTable2 := TABLE(Person,MyReportFormat2,State,CountAccts );
  57. OUTPUT(RepTable2);
  58. </programlisting>
  59. <para>This adds a breakdown of how many men and women there are in each
  60. category, by using the optional second parameter to COUNT (available only
  61. for use in RECORD structures where its first parameter is the GROUP
  62. keyword).</para>
  63. <para>The addition of the {cardinality(56)} to the State definition is a
  64. hint to the optimizer that there are exactly 56 values possible in that
  65. field, allowing it to select the best algorithm to produce the output as
  66. quickly as possible.</para>
  67. <para>The possibilities are endless for the type of statistics you can
  68. generate against any set of data.</para>
  69. </sect2>
  70. <sect2 id="A_More_Complex_Example">
  71. <title>A More Complex Example</title>
  72. <para>As a slightly more complex example, the following code produces a
  73. Cross-Tab result table with the average balance on a bankcard trade,
  74. average high credit on a bankcard trade, and the average total balance on
  75. bankcards, tabulated by state and sex.</para>
  76. <para>This code demonstrates using separate aggregate attributes as the
  77. value parameters to the aggregate function in the CrossTab.</para>
  78. <programlisting>IsValidType(STRING1 PassedType) := PassedType IN ['O', 'R', 'I'];
  79. IsRevolv := Person.Accounts.AcctType = 'R' OR
  80. (~IsValidType(Person.Accounts.AcctType) AND
  81. Person.Accounts.Account[1] IN ['4', '5', '6']);
  82. SetBankIndCodes := ['BB', 'ON', 'FS', 'FC'];
  83. IsBank := Person.Accounts.IndustryCode IN SetBankIndCodes;
  84. IsBankCard := IsBank AND IsRevolv;
  85. AvgBal := AVE(Person.Accounts(isBankCard),Balance);
  86. TotBal := SUM(Person.Accounts(isBankCard),Balance);
  87. AvgHC := AVE(Person.Accounts(isBankCard),HighCredit);
  88. R1 := RECORD
  89. person.state;
  90. person.gender;
  91. Number := COUNT(GROUP);
  92. AverageBal := AVE(GROUP,AvgBal);
  93. AverageTotalBal := AVE(GROUP,TotBal);
  94. AverageHC := AVE(GROUP,AvgHC);
  95. END;
  96. T1 := TABLE(person, R1, state, gender);
  97. OUTPUT(T1);
  98. </programlisting>
  99. </sect2>
  100. <sect2 id="A_Statistical_Example">
  101. <title>A Statistical Example</title>
  102. <para>The following example demonstrates the VARIANCE, COVARIANCE and
  103. CORRELATION functions to analyze grid points. It also shows the technique
  104. of putting the CrossTab into a MACRO, calling the MACRO to generate the
  105. specific result for a given dataset.</para>
  106. <programlisting>pointRec := { REAL x, REAL y };
  107. analyze( ds ) := MACRO
  108. #uniquename(rec)
  109. %rec% := RECORD
  110. c := COUNT(GROUP),
  111. sx := SUM(GROUP, ds.x),
  112. sy := SUM(GROUP, ds.y),
  113. sxx := SUM(GROUP, ds.x * ds.x),
  114. sxy := SUM(GROUP, ds.x * ds.y),
  115. syy := SUM(GROUP, ds.y * ds.y),
  116. varx := VARIANCE(GROUP, ds.x);
  117. vary := VARIANCE(GROUP, ds.y);
  118. varxy := COVARIANCE(GROUP, ds.x, ds.y);
  119. rc := CORRELATION(GROUP, ds.x, ds.y) ;
  120. END;
  121. #uniquename(stats)
  122. %stats% := TABLE(ds,%rec% );
  123. OUTPUT(%stats%);
  124. OUTPUT(%stats%, { varx - (sxx-sx*sx/c)/c,
  125. vary - (syy-sy*sy/c)/c,
  126. varxy - (sxy-sx*sy/c)/c,
  127. rc - (varxy/SQRT(varx*vary)) });
  128. OUTPUT(%stats%, { 'bestFit: y='+(STRING)((sy-sx*varxy/varx)/c)+' + '+(STRING)(varxy/varx)+'x' });
  129. ENDMACRO;
  130. ds1 := DATASET([{1,1},{2,2},{3,3},{4,4},{5,5},{6,6}], pointRec);
  131. ds2 := DATASET([{1.93896e+009, 2.04482e+009},
  132. {1.77971e+009, 8.54858e+008},
  133. {2.96181e+009, 1.24848e+009},
  134. {2.7744e+009, 1.26357e+009},
  135. {1.14416e+009, 4.3429e+008},
  136. {3.38728e+009, 1.30238e+009},
  137. {3.19538e+009, 1.71177e+009} ], pointRec);
  138. ds3 := DATASET([{1, 1.00039},
  139. {2, 2.07702},
  140. {3, 2.86158},
  141. {4, 3.87114},
  142. {5, 5.12417},
  143. {6, 6.20283} ], pointRec);
  144. analyze(ds1);
  145. analyze(ds2);
  146. analyze(ds3);
  147. </programlisting>
  148. <para></para>
  149. </sect2>
  150. </sect1>