浏览代码

Cache the Collator between calls to unicode editdistance

Previously the ICU RuleBasedCollator was being created on each call to
the unicode edit distance functions.  This change caches the collator
so it is shared between all calls.

Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
Gavin Halliday 13 年之前
父节点
当前提交
1a9b7d3e2a
共有 1 个文件被更改,包括 80 次插入20 次删除
  1. 80 20
      plugins/unicodelib/unicodelib.cpp

+ 80 - 20
plugins/unicodelib/unicodelib.cpp

@@ -30,7 +30,7 @@
 #include "unicode/rbbi.h"
 #include "../stringlib/wildmatch.tpp"
 
-#define UNICODELIB_VERSION "UNICODELIB 1.1.05"
+#define UNICODELIB_VERSION "UNICODELIB 1.1.06"
 
 UChar32 const u32comma = ',';
 UChar32 const u32space = ' ';
@@ -91,6 +91,7 @@ static const char * compatibleVersions[] = {
     "UNICODELIB 1.1.02", 
     "UNICODELIB 1.1.03", 
     "UNICODELIB 1.1.04", 
+    "UNICODELIB 1.1.05",
     NULL };
 
 UNICODELIB_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb) 
@@ -303,10 +304,12 @@ private:
     }
 
 public:
-    CEList(RuleBasedCollator& rbc, const UnicodeString & source, UErrorCode& status, uint32_t capacity=0) 
+    CEList(RuleBasedCollator& rbc, const UnicodeString & source, uint32_t capacity=0)
         : length_(0), capacity_(capacity), ustring_(source)
     {
         //doTrimRight(ustring_);
+        UErrorCode status = U_ZERO_ERROR;
+
         doCreateCEList(rbc, status);
     }
 
@@ -326,7 +329,7 @@ public:
 
 inline unsigned mask(unsigned x) { return x & 1; }
 
-unsigned unicodeEditDistanceV2(UnicodeString & left, UnicodeString & right, RuleBasedCollator& rbc, UErrorCode &status)
+unsigned unicodeEditDistanceV2(UnicodeString & left, UnicodeString & right, RuleBasedCollator& rbc)
 {
     unsigned char i, j;
 
@@ -348,8 +351,8 @@ unsigned unicodeEditDistanceV2(UnicodeString & left, UnicodeString & right, Rule
     if (rightLen == 0)
         return leftLen;
 
-    CEList   leftCEs(rbc, left, status, leftLen);
-    CEList   rightCEs(rbc, right, status, rightLen);
+    CEList   leftCEs(rbc, left, leftLen);
+    CEList   rightCEs(rbc, right, rightLen);
     leftLen = leftCEs.length();
     rightLen = rightCEs.length();
 
@@ -394,7 +397,7 @@ unsigned unicodeEditDistanceV2(UnicodeString & left, UnicodeString & right, Rule
 // * special case edit1 - you could use variables for the 6 interesting array elements, and get
 //   rid of the array completely.  You could also unwind the first (and last iterations).
 // * I suspect the early exit condition could be improved depending the lengths of the strings.
-unsigned unicodeEditDistanceV3(UnicodeString & left, UnicodeString & right, unsigned radius, RuleBasedCollator& rbc, UErrorCode &status)
+unsigned unicodeEditDistanceV3(UnicodeString & left, UnicodeString & right, unsigned radius, RuleBasedCollator& rbc)
 {
     if (radius >= 255)
         return 255;
@@ -422,8 +425,8 @@ unsigned unicodeEditDistanceV3(UnicodeString & left, UnicodeString & right, unsi
     if (rightLen == 0)
         return leftLen;
 
-    CEList   leftCEs(rbc, left, status, leftLen);
-    CEList   rightCEs(rbc, right, status, rightLen);
+    CEList   leftCEs(rbc, left, leftLen);
+    CEList   rightCEs(rbc, right, rightLen);
     leftLen = leftCEs.length();
     rightLen = rightCEs.length();
 
@@ -1012,36 +1015,93 @@ UNICODELIB_API void UNICODELIB_CALL ulUnicodeCleanAccents(unsigned & tgtLen, UCh
     source.extract(0, tgtLen, tgt);
 }
 
-UNICODELIB_API unsigned UNICODELIB_CALL ulUnicodeLocaleEditDistance(unsigned leftLen, UChar const * left, unsigned rightLen, UChar const * right, char const * localename)
+
+
+static RuleBasedCollator * createRBCollator(const char * localename)
 {
     UErrorCode status = U_ZERO_ERROR;
     Locale locale(localename);
-
-    RuleBasedCollator* rbc = (RuleBasedCollator*)RuleBasedCollator::createInstance(locale, status);
+    RuleBasedCollator * rbc = (RuleBasedCollator *)RuleBasedCollator::createInstance(locale, status);
     rbc->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
+    if (U_FAILURE(status))
+    {
+        delete rbc;
+        return NULL;
+    }
+    return rbc;
+}
+
+class RBCLocale
+{
+public:
+    RBCLocale(char const * _locale) : locale(_locale)
+    {
+        rbc = createRBCollator(locale);
+    }
+    ~RBCLocale()
+    {
+        delete rbc;
+    }
+    RuleBasedCollator * queryCollator() const { return rbc; }
+private:
+    StringAttr locale;
+    RuleBasedCollator * rbc;
+};
+
+typedef MapStringTo<RBCLocale, char const *> MapStrToRBC;
+static MapStrToRBC * localeMap;
+static CriticalSection localeCrit;
+MODULE_INIT(INIT_PRIORITY_STANDARD)
+{
+    return true;
+}
+MODULE_EXIT()
+{
+    delete localeMap;
+    localeMap = NULL;
+}
+
+static RuleBasedCollator * queryRBCollator(const char * localename)
+{
+    if (!localename) localename = "";
+    CriticalBlock b(localeCrit);
+    if (!localeMap)
+        localeMap = new MapStrToRBC;
+    RBCLocale * loc = localeMap->getValue(localename);
+    if(!loc)
+    {
+        //MORE: ECLRTL calls rtlGetNormalizedUnicodeLocaleName().  Should this be happening here?
+        const char * normalizedlocale = localename;
+        localeMap->setValue(localename, normalizedlocale);
+        loc = localeMap->getValue(localename);
+    }
+    return loc->queryCollator();
+}
+
+UNICODELIB_API unsigned UNICODELIB_CALL ulUnicodeLocaleEditDistance(unsigned leftLen, UChar const * left, unsigned rightLen, UChar const * right, char const * localename)
+{
+    RuleBasedCollator* rbc = queryRBCollator(localename);
+    if (!rbc)
+        return 0;
 
     UnicodeString uLeft(left, leftLen);
     UnicodeString uRight(right, rightLen);
 
-    unsigned distance = nsUnicodelib::unicodeEditDistanceV2(uLeft, uRight, *rbc, status);
-    delete rbc;
+    unsigned distance = nsUnicodelib::unicodeEditDistanceV2(uLeft, uRight, *rbc);
     return distance;
 }
 
 
 UNICODELIB_API bool UNICODELIB_CALL ulUnicodeLocaleEditDistanceWithinRadius(unsigned leftLen, UChar const * left, unsigned rightLen, UChar const * right, unsigned radius, char const * localename)
 {
-    UErrorCode status = U_ZERO_ERROR;
-    Locale locale(localename);
-
-    RuleBasedCollator* rbc = (RuleBasedCollator*)RuleBasedCollator::createInstance(locale, status);
-    rbc->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
+    RuleBasedCollator* rbc = queryRBCollator(localename);
+    if (!rbc)
+        return 0;
 
     UnicodeString uLeft(left, leftLen);
     UnicodeString uRight(right, rightLen);
 
-    unsigned distance = nsUnicodelib::unicodeEditDistanceV3(uLeft, uRight, radius, *rbc, status);
-    delete rbc;
+    unsigned distance = nsUnicodelib::unicodeEditDistanceV3(uLeft, uRight, radius, *rbc);
     return distance <= radius;
 }