thorstep.hpp 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef THORSTEP_HPP_INCL
  14. #define THORSTEP_HPP_INCL
  15. #include "thorcommon.ipp"
  16. //---------------------------------------------------------------------------
  17. //Can be extended, since it is only ever implemented within the engines
  18. interface IInputSteppingMeta : public ISteppingMeta
  19. {
  20. //??virtual ISteppingMeta * querySteppingMeta() const = 0
  21. virtual bool hasPostFilter() = 0; // make this const?
  22. virtual bool isDistributed() const = 0;
  23. virtual unsigned getSteppedFlags() const = 0;
  24. virtual double getPriority() = 0;
  25. inline bool hasPriority() const { return (getSteppedFlags() & SSFhaspriority) != 0; }
  26. };
  27. interface ISteppedJoin : public IInterface
  28. {
  29. public:
  30. virtual unsigned getNumEqualFields() const = 0;
  31. virtual ISteppingMeta * querySteppingMeta() const = 0;
  32. virtual void markRestrictedJoin(unsigned numEqualityFields) = 0;
  33. virtual void startRestrictedJoin(const void * equalityRow, unsigned numEqualityFields) = 0;
  34. virtual void stopRestrictedJoin() = 0;
  35. };
  36. class CSteppedInputLookahead;
  37. interface ISteppedConjunctionCollector
  38. {
  39. public:
  40. virtual void addInput(CSteppedInputLookahead & _input) = 0;
  41. virtual void addPseudoInput(CSteppedInputLookahead & _input) = 0;
  42. virtual void addJoin(ISteppedJoin & _join) = 0;
  43. };
  44. interface IMultipleStepSeekInfo
  45. {
  46. virtual void ensureFilled(const void * seek, unsigned numFields, unsigned maxcount) = 0; // max count probably needs to be calculated from buffer/seek row size.
  47. virtual unsigned ordinality() const = 0;
  48. virtual const void * querySeek(unsigned i) const = 0;
  49. };
  50. //Use a set of named flags rather than a set of booleans because it makes the constructors clearer.
  51. enum
  52. {
  53. SSEFreturnUnbufferedMatches = 0x0000,
  54. //if the returned row matches the seek fields, then it must also match the post filter.
  55. //if the row doesn't match the seek fields, then it should be returned if the following flag is set.
  56. SSEFreturnMismatches = 0x0001,
  57. //Should we readahead extra rows from this input? Normally set for a known, or likely, lowest frequency term.
  58. SSEFreadAhead = 0x0002,
  59. //We may want a flag for a LEFT ONLY join to ensure only the first match is returned. It could only be set if no post filter on the join.
  60. SSEFonlyReturnFirstSeekMatch = 0x0004,
  61. };
  62. struct SmartStepExtra
  63. {
  64. inline SmartStepExtra(unsigned _flags, IMultipleStepSeekInfo * _extraSeeks)
  65. : extraSeeks(_extraSeeks), flags(_flags)
  66. {}
  67. inline SmartStepExtra(const SmartStepExtra & other)
  68. : extraSeeks(other.extraSeeks), flags(other.flags)
  69. {}
  70. //Input fields
  71. inline bool returnMismatches() const { return (flags & SSEFreturnMismatches) != 0; }
  72. inline bool readAheadManyResults() const { return (flags & SSEFreadAhead) != 0; }
  73. inline bool onlyReturnFirstSeekMatch() const { return (flags & SSEFonlyReturnFirstSeekMatch) != 0; }
  74. inline unsigned queryFlags() const { return flags; }
  75. //If extraSeeks is provided, then it is used to provide a list of additional seek positions - but only as a hint.
  76. //If provided, then we are interested in any rows that match the seek positions (and match the post filter), or the row
  77. //following the last seek position (postfilter determined by returmMismatches)
  78. inline IMultipleStepSeekInfo * queryExtraSeeks() const { return extraSeeks; }
  79. inline void setReturnMismatches() { flags |= SSEFreturnMismatches; }
  80. inline void setReadAhead() { flags |= SSEFreadAhead; }
  81. inline void set(unsigned _flags, IMultipleStepSeekInfo * _extraSeeks)
  82. {
  83. flags = _flags;
  84. extraSeeks = _extraSeeks;
  85. }
  86. protected:
  87. IMultipleStepSeekInfo * extraSeeks;
  88. unsigned flags;
  89. };
  90. interface ISteppedInput : public IInterface
  91. {
  92. public:
  93. virtual const void * nextInputRow() = 0;
  94. virtual const void * nextInputRowGE(const void * seek, unsigned numFields, bool & wasCompleteMatch, const SmartStepExtra & stepExtra) = 0;
  95. virtual bool gatherConjunctions(ISteppedConjunctionCollector & collector) = 0;
  96. virtual IInputSteppingMeta * queryInputSteppingMeta() = 0;
  97. virtual void resetEOF() = 0;
  98. };
  99. //GH->RKC I'm slightly concerned about the number of parameters on nextInputRowGE(), but they all have different lifetimes, so I think it is correct.
  100. /*
  101. When should the input be read-ahead, and when should mismatches be returned?
  102. Current thinking:
  103. i) The input should be read-ahead
  104. a) If the input is known to contain the lowest frequemcy term.
  105. b) If all medians are known, and this is the lowest frequency term.
  106. ii) Mismatches for non equal seek positions should be returned
  107. a) If the priority of the input isn't known
  108. b) Except if all medians are known, and a row from the lowest frequency term mismatches and it is still the lowest frequemcy input
  109. Note: The buffering and mismatch semantics are generally ignored by the merge join code.
  110. An exception is left only join.
  111. */
  112. #endif