udptopo.hpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2019 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef UDPTOPO_INCL
  14. #define UDPTOPO_INCL
  15. #include "jlib.hpp"
  16. #include "jsocket.hpp"
  17. #include "udplib.hpp"
  18. /*
  19. * IBYTI handling
  20. *
  21. * IBYTI (I beat you to it) messages are sent by the agent that is going to process a particular request,
  22. * to tell the other agents on the same channel not to bother.
  23. *
  24. * In order to reduce wasted work, for each request a "primary" subchannel is selected (based on a hash of the
  25. * packet's RUID) - this channel will process the request immediately, but others will delay a little while
  26. * in order to give the expected IBYTI time to arrive.
  27. *
  28. * The decision on how long to delay is a little complex - too long, and you end up losing the ability for a
  29. * backup agent to step in when primary is under load (or dead); too short and you end up duplicating work.
  30. * It's also important for the delay to be adaptive so that if a agent goes offline, the other agents on the
  31. * subchannel don't keep waiting for it to take its turn.
  32. *
  33. * The adaptiveness is handled by noting any time that we delay waiting for an IBYTI that does not arrive - this
  34. * may mean that the agent(s) we expected to get there first are offline, and thus next time we don't wait quite
  35. * so long for them. Conversely, any time an IBYTI does arrive from another agent on your channel, you know that
  36. * it is online and so can reset the delay to its original value.
  37. *
  38. * A previous version of this code assumed a single missed IBYTI was enough to assume that a agent was dead and drop the
  39. * delay for that agent to zero - this turned out to behave pretty poorly when under load, with much duplicated work.
  40. * Thus we take care to adjust the delay more gradually, while still ending up with a zero delay if the buddy does not respond
  41. * several times in a row.
  42. */
  43. /*
  44. * A "subchannel" is a value from 1 to 7 (with current settings) that indicates which "copy" of the data for this channel
  45. * is being processed by this agent. A value of 0 would indicate that this agent does not have any data for this channel.
  46. * In a typical 100-way roxie with cyclic redundancy, node 1 would be processing channel 1, subchannel 1, and channel 2,
  47. * subchannel 2, node 2 would be processing channel 2, subchannel 1 and channel 3, subchannel 2, and so on u to node 100,
  48. * which would process channel 100, subchannel 1 and channel 1, subchannel 2.
  49. *
  50. * To determine which subchannel is the "primary" for a given query packet, a hash value of fields from the packet header
  51. * is used, modulo the number of subchannels on this channel. The agent on this subchannel will respond immediately.
  52. * Agents on other subchannels delay according to the subchannel number - so on a 4-way redundant system, if the primary
  53. * subchannel is decided to be 2, the agent on subchannel 3 will delay by 1 ibytiDelay value, the agent on subchannel 4 by
  54. * 2 values, and the agent on subchannel 1 by 3 values (this assumes all agents are responding normally).
  55. *
  56. * In fact, the calculation is a little more complex, in that the "units" are adjusted per subchannel to take into account
  57. * the responsiveness or otherwise of a subchannel. Initially, the delay value for each subchannel is the same, but any time
  58. * a agent waits for an IBYTI that does not arrive on time, the delay value for any agent that is "more primary" than me for
  59. * this packet is reduced. Any time an IBYTI _does_ arrive on time, the delay is reset to its initial value.
  60. */
  61. extern UDPLIB_API unsigned minIbytiDelay;
  62. extern UDPLIB_API unsigned initIbytiDelay;
  63. extern UDPLIB_API SocketEndpoint myAgentEP;
  64. extern UDPLIB_API unsigned numChannels;
  65. class UDPLIB_API ChannelInfo
  66. {
  67. public:
  68. ChannelInfo(unsigned _subChannel, unsigned _numSubChannels, unsigned _replicationLevel);
  69. ChannelInfo(ChannelInfo && ) = default;
  70. unsigned getIbytiDelay(unsigned primarySubChannel) const;
  71. void noteChannelsSick(unsigned primarySubChannel) const;
  72. void noteChannelHealthy(unsigned subChannel) const;
  73. inline unsigned subChannel() const { return mySubChannel; }
  74. inline unsigned replicationLevel() const { return myReplicationLevel; }
  75. /*
  76. * Determine whether to abort on receipt of an IBYTI for a packet which I have already started processing
  77. * As I will also have sent out an IBYTI, I should only abort if the sender of the IBYTI has higher priority
  78. * for this packet than I do.
  79. */
  80. bool otherAgentHasPriority(unsigned priorityHash, unsigned otherAgentSubChannel) const;
  81. private:
  82. unsigned mySubChannel = 0; // Which subChannel does this node implement for this channel - zero-based
  83. unsigned myReplicationLevel = 0; // Which data location is this channel pulling its data from - zero-based
  84. unsigned numSubChannels = 0; // How many subchannels are there for this channel, across all agents. Equivalently, the number of agents that implement this channel
  85. mutable std::vector<unsigned> currentDelay; // NOTE - technically should be atomic, but in the event of a race we don't really care who wins
  86. };
  87. // In containerized mode with dynamic topology , we prefer a different mechanism for tracking node health
  88. extern UDPLIB_API void noteNodeSick(const ServerIdentifier &node);
  89. extern UDPLIB_API void noteNodeHealthy(const ServerIdentifier &node);
  90. extern UDPLIB_API unsigned getIbytiDelay(const ServerIdentifier &node);
  91. interface ITopologyServer : public IInterface
  92. {
  93. virtual const SocketEndpointArray &queryAgents(unsigned channel) const = 0;
  94. virtual const SocketEndpointArray &queryServers(unsigned port) const = 0;
  95. virtual const ChannelInfo &queryChannelInfo(unsigned channel) const = 0;
  96. virtual const std::vector<unsigned> &queryChannels() const = 0;
  97. virtual bool implementsChannel(unsigned channel) const = 0;
  98. virtual StringBuffer & report(StringBuffer &ret) const = 0;
  99. virtual time_t queryServerInstance(const SocketEndpoint &ep) const = 0;
  100. virtual void updateStatus() const = 0;
  101. };
  102. extern UDPLIB_API unsigned getNumAgents(unsigned channel);
  103. extern UDPLIB_API const ITopologyServer *getTopology();
  104. extern UDPLIB_API void freezeTopology(bool frozen);
  105. struct RoxieEndpointInfo
  106. {
  107. enum Role { RoxieServer, RoxieAgent } role;
  108. unsigned channel;
  109. SocketEndpoint ep;
  110. unsigned replicationLevel;
  111. };
  112. extern UDPLIB_API void initializeTopology(const StringArray &topoValues, const std::vector<RoxieEndpointInfo> &myRoles);
  113. extern UDPLIB_API void publishTopology(unsigned traceLevel, const std::vector<RoxieEndpointInfo> &myRoles);
  114. extern UDPLIB_API void stopTopoThread();
  115. #ifndef _CONTAINERIZED
  116. extern UDPLIB_API void createStaticTopology(const std::vector<RoxieEndpointInfo> &allRoles, unsigned traceLevel);
  117. #endif
  118. #endif