7 年之前 · ee66550921
--- a/rtl/eclrtl/eclrtl.cpp
+++ b/rtl/eclrtl/eclrtl.cpp
@@ -2766,8 +2766,8 @@ const static UChar nullUStr = 0;
 
				 #ifdef _USE_ICU
			
 
				 int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale)
			
 
				 {
			
 
				-    while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
			
 
				-    while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
			
 
				+    while(l1 && (p1[l1-1] == ' ')) l1--;
			
 
				+    while(l2 && (p2[l2-1] == ' ')) l2--;
			
 
				     if (!p1) p1 = &nullUStr;
			
 
				     if (!p2) p2 = &nullUStr;
			
 
				     return ucol_strcoll(queryRTLLocale(locale)->queryCollator(), p1, l1, p2, l2);
			
@@ -2775,8 +2775,8 @@ int rtlCompareUnicodeUnicode(unsigned l1, UChar const * p1, unsigned l2, UChar c
 
				 
			
 
				 int rtlCompareUnicodeUnicodeStrength(unsigned l1, UChar const * p1, unsigned l2, UChar const * p2, char const * locale, unsigned strength)
			
 
				 {
			
 
				-    while(l1 && u_isUWhiteSpace(p1[l1-1])) l1--;
			
 
				-    while(l2 && u_isUWhiteSpace(p2[l2-1])) l2--;
			
 
				+    while(l1 && (p1[l1-1] == ' ')) l1--;
			
 
				+    while(l2 && (p2[l2-1] == ' ')) l2--;
			
 
				     if (!p1) p1 = &nullUStr;
			
 
				     if (!p2) p2 = &nullUStr;
			
 
				     return ucol_strcoll(queryRTLLocale(locale)->queryCollator(strength), p1, l1, p2, l2);
			
@@ -4912,34 +4912,21 @@ static int rtlCompareUtf8Utf8ViaUnicode(size32_t llen, const char * left, size32
 
				 #ifdef _USE_ICU
			
 
				 int rtlCompareUtf8Utf8(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale)
			
 
				 {
			
 
				-    //MORE: Do a simple comparison as long as there are no non->0x80 characters around
			
 
				-    //      fall back to a full unicode comparison if we hit one - or in the next character to allow for accents etc.
			
 
				-    const byte * bleft = (const byte *)left;
			
 
				-    const byte * bright = (const byte *)right;
			
 
				-    unsigned len = llen > rlen ? rlen : llen;
			
 
				-    for (unsigned i = 0; i < len; i++)
			
 
				-    {
			
 
				-        byte nextLeft = bleft[i];
			
 
				-        byte nextRight = bright[i];
			
 
				-        if (nextLeft >= 0x80 || nextRight >= 0x80)
			
 
				-            return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
			
 
				-        if ((i+1 != len) && ((bleft[i+1] >= 0x80) || bright[i+1] >= 0x80))
			
 
				-            return rtlCompareUtf8Utf8ViaUnicode(llen-i, left+i, rlen-i, right+i, locale);
			
 
				-        if (nextLeft != nextRight)
			
 
				-            return nextLeft - nextRight;
			
 
				-    }
			
 
				-    int diff = 0;
			
 
				-    if (len != llen)
			
 
				-    {
			
 
				-        for (;(diff == 0) && (len != llen);len++)
			
 
				-            diff = bleft[len] - ' ';
			
 
				-    }
			
 
				-    else if (len != rlen)
			
 
				-    {
			
 
				-        for (;(diff == 0) && (len != rlen);len++)
			
 
				-            diff = ' ' - bright[len];
			
 
				-    }
			
 
				-    return diff;
			
 
				+#if U_ICU_VERSION_MAJOR_NUM>=50
			
 
				+    size_t lSize = rtlUtf8Size(llen, left);
			
 
				+    while (lSize && (left[lSize-1] == ' '))
			
 
				+        lSize--;
			
 
				+
			
 
				+    size_t rSize = rtlUtf8Size(rlen, right);
			
 
				+    while (rSize && (right[rSize-1] == ' '))
			
 
				+        rSize--;
			
 
				+
			
 
				+    UCollator * collator = queryRTLLocale(locale)->queryCollator();
			
 
				+    UErrorCode status = U_ZERO_ERROR; // Not documented, but this needs to be cleared otherwise the function can fail
			
 
				+    return ucol_strcollUTF8(collator, left, lSize, right, rSize, &status);
			
 
				+#else
			
 
				+    return rtlCompareUtf8Utf8ViaUnicode(llen, left, rlen, right, locale);
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 int rtlCompareUtf8Utf8Strength(size32_t llen, const char * left, size32_t rlen, const char * right, const char * locale, unsigned strength)
			
--- a/testing/regress/ecl/key/utf8order.xml
+++ b/testing/regress/ecl/key/utf8order.xml
@@ -0,0 +1,99 @@
 
				+<Dataset name='Result 1'>
			
 
				+ <Row><Result_1>Cycles - which should never happen!</Result_1></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 2'>
			
 
				+ <Row><Result_2>true</Result_2></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 3'>
			
 
				+ <Row><Result_3>true</Result_3></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 4'>
			
 
				+ <Row><Result_4>false</Result_4></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 5'>
			
 
				+ <Row><Result_5>true</Result_5></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 6'>
			
 
				+ <Row><Result_6>true</Result_6></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 7'>
			
 
				+ <Row><Result_7>false</Result_7></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 8'>
			
 
				+ <Row><Result_8>Unicode:</Result_8></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 9'>
			
 
				+ <Row><Result_9>true</Result_9></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 10'>
			
 
				+ <Row><Result_10>true</Result_10></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 11'>
			
 
				+ <Row><Result_11>true</Result_11></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 12'>
			
 
				+ <Row><Result_12>true</Result_12></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 13'>
			
 
				+ <Row><Result_13>true</Result_13></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 14'>
			
 
				+ <Row><Result_14>Utf8:</Result_14></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 15'>
			
 
				+ <Row><Result_15>true</Result_15></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 16'>
			
 
				+ <Row><Result_16>true</Result_16></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 17'>
			
 
				+ <Row><Result_17>true</Result_17></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 18'>
			
 
				+ <Row><Result_18>true</Result_18></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 19'>
			
 
				+ <Row><Result_19>true</Result_19></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 20'>
			
 
				+ <Row><Result_20>true</Result_20></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 21'>
			
 
				+ <Row><Result_21>true</Result_21></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 22'>
			
 
				+ <Row><Result_22>true</Result_22></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 23'>
			
 
				+ <Row><Result_23>true</Result_23></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 24'>
			
 
				+ <Row><Result_24>true</Result_24></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 25'>
			
 
				+ <Row><Result_25>true</Result_25></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 26'>
			
 
				+ <Row><text>abc </text></Row>
			
 
				+ <Row><text>abC </text></Row>
			
 
				+ <Row><text>ABc </text></Row>
			
 
				+ <Row><text>ABC </text></Row>
			
 
				+ <Row><text>abcE</text></Row>
			
 
				+ <Row><text>abcÈ</text></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 27'>
			
 
				+ <Row><text>ABC </text></Row>
			
 
				+ <Row><text>ABc </text></Row>
			
 
				+ <Row><text>abC </text></Row>
			
 
				+ <Row><text>abc </text></Row>
			
 
				+ <Row><text>abcE</text></Row>
			
 
				+ <Row><text>abc&#200;</text></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 28'>
			
 
				+ <Row><text>abc</text></Row>
			
 
				+ <Row><text>abC</text></Row>
			
 
				+ <Row><text>ABc</text></Row>
			
 
				+ <Row><text>ABC</text></Row>
			
 
				+ <Row><text>abcE</text></Row>
			
 
				+ <Row><text>abcÈ</text></Row>
			
 
				+</Dataset>
			
--- a/testing/regress/ecl/utf8order.ecl
+++ b/testing/regress/ecl/utf8order.ecl
@@ -0,0 +1,45 @@
 
				+u8null := u8'' : stored('u8null');
			
 
				+unull := u'' : stored('unull');
			
 
				+
			
 
				+output('Cycles - which should never happen!');
			
 
				+output(u8'BC' > u8'BB\u20AC');
			
 
				+output(u8'BB\u20AC' > u8'Ba');
			
 
				+output(u8'Ba' > u8'BC');
			
 
				+
			
 
				+output(u'BC' > u'BB\u20AC');
			
 
				+output(u'BB\u20AC' > u'Ba');
			
 
				+output(u'Ba' > u'BC');
			
 
				+
			
 
				+output('Unicode:');
			
 
				+output(u'abcÈ' > u'abcE');
			
 
				+output(u'abcÈ'+unull > u'abcE');
			
 
				+output(u'abcÈ'+unull != u'abcE');
			
 
				+
			
 
				+//Check correct length is used rather than size.
			
 
				+output(u'AB\u20ACX'+unull < u'AB\u20ACY');
			
 
				+output(u'AB\u20ACX'+unull != u'AB\u20ACY');
			
 
				+
			
 
				+Output('Utf8:');
			
 
				+output(u8'abcÈ' > u8'abcE');
			
 
				+output(u8'abcÈ'+u8null > u8'abcE');
			
 
				+output(u8'abcÈ'+u8null != u8'abcE');
			
 
				+
			
 
				+output(u8'AB\u20ACX'+u8null < u8'AB\u20ACY');
			
 
				+output(u8'AB\u20ACX'+u8null != u8'AB\u20ACY');
			
 
				+
			
 
				+output(U'AB ' = U'AB');
			
 
				+output(U'AB ' != U'AB\t');
			
 
				+output(U8'AB ' = U8'AB');
			
 
				+output(U8'AB ' != U8'AB\t');
			
 
				+output('AB ' = 'AB');
			
 
				+output('AB ' != 'AB\t');
			
 
				+
			
 
				+//Illustrate the different ordering or string v unicode v utf8
			
 
				+d1 := dataset([U'ABC',U'ABc',U'abc',U'abC',U'abcÈ',U'abcE'], { unicode4 text });
			
 
				+output(sort(d1, text));
			
 
				+
			
 
				+d2 := dataset(['ABC','ABc','abc','abC','abcÈ','abcE'], { string4 text });
			
 
				+output(sort(d2, text));
			
 
				+
			
 
				+d3 := dataset([u8'ABC',u8'ABc',u8'abc',u8'abC',u8'abcÈ',u8'abcE'], { utf8 text });
			
 
				+output(sort(d3, text));