jregexp.hpp 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef JREGEXP_INCL
  14. #define JREGEXP_INCL
  15. #include <limits.h>
  16. #include "jiface.hpp"
  17. #include "jstring.hpp"
  18. /*********************** Regular Expression Class ***********************/
  19. class RECOMP;
  20. #define RE_ALL UINT_MAX
  21. // WARNING - this is not threadsafe - it is up to the caller to ensure that a RegExpr is accessed from only one thread at a time
  22. // that INCLUDES the simple find() method
  23. class jlib_decl RegExpr
  24. {
  25. public:
  26. RegExpr();
  27. ~RegExpr();
  28. RegExpr(const char *exp, bool nocase = false);
  29. bool init(const char *re, bool nocase = false);
  30. // Compiles the regular expression ready for Find
  31. // if nocase = 1 the matching is case insensitive (where possible)
  32. const char * find(const char *str,size32_t from=0,size32_t len=RE_ALL,size32_t maxlen=0);
  33. // finds the first occurrence of the RE in string
  34. // (positioned after or at 'from' within 'len'
  35. // (len = RE_ALL) means to end of the string))
  36. // maxlen is only used for clarion strings (should be 0 otherwise)
  37. // returns position of first match in string if found
  38. // or NULL if not found
  39. size32_t findlen(unsigned n=0);
  40. // size of string (or n'th sub-string (n>0)) last matched using find
  41. const char * findstr(StringBuffer &s,unsigned n=0);
  42. // returns string last matched (n = 0) or substring n (n>0)
  43. const char *findnext();
  44. // repeat last find from after end of last successful find
  45. // returns position of first match in string if found
  46. // or NULL if not found
  47. void replace(const char *s,size32_t maxlen,unsigned n = 0);
  48. // replaces string (or n'th sub-string (n>0)) previously found
  49. // by find or findnext by 's'
  50. // can only be called after a successful find/findnext
  51. // maxlen is the maximum size of the result string after replacement
  52. const char * substitute(StringBuffer &s,const char *mask,...) __attribute__((format(printf,3,4)));;
  53. // (for DAB)
  54. // Creates a string from mask (and following parameters) where mask is
  55. // a 'sprintf' string with the addition that *after* the sprintf
  56. // any embedded strings of the form '&n&' are expanded to n'th sub-string
  57. // previously found by find/findnext ('&0&' is the entire found string)
  58. void kill();
  59. // releases extra storage used by RegularExpressionClass
  60. // (called by destructor)
  61. protected:
  62. RECOMP *re;
  63. };
  64. inline bool isWildString(const char *s)
  65. {
  66. if (s && *s) {
  67. do {
  68. if ('?'==*s || '*'==*s)
  69. return true;
  70. }
  71. while (*++s);
  72. }
  73. return false;
  74. }
  75. bool jlib_decl WildMatch(const char *src, size_t srclen, const char *pat, size_t patlen,bool nocase);
  76. bool jlib_decl WildMatch(const char *src, const char *pat, bool nocase=false);
  77. bool jlib_decl WildMatchReplace(const char *src, const char *pat, const char *repl, bool nocase, StringBuffer &out);
  78. bool jlib_decl SoundexMatch(const char *src, const char *pat);
  79. bool jlib_decl containsWildcard(const char * pattern);
  80. class jlib_decl StringMatcher
  81. {
  82. public:
  83. StringMatcher();
  84. ~StringMatcher();
  85. void addEntry(const char * text, unsigned action);
  86. void addEntry(unsigned len, const char * text, unsigned action);
  87. unsigned getMatch(unsigned maxLength, const char * text, unsigned & matchLen);
  88. bool queryAddEntry(unsigned len, const char * text, unsigned action);
  89. void reset() { freeLevel(firstLevel); }
  90. protected:
  91. struct entry { unsigned value; entry * table; };
  92. void freeLevel(entry * elems);
  93. protected:
  94. entry firstLevel[256];
  95. };
  96. void jlib_decl addActionList(StringMatcher & matcher, const char * text, unsigned action, unsigned * maxElementLength = NULL);
  97. #endif