7 år sedan · 21b654b042
--- a/ecllibrary/std/Uni.ecl
+++ b/ecllibrary/std/Uni.ecl
@@ -481,4 +481,15 @@ EXPORT STRING Version() := lib_unicodelib.UnicodeLib.UnicodeVersion();
 
				 EXPORT RemoveSuffix(unicode src, unicode suff, string form) :=
			
 
				     lib_unicodelib.UnicodeLib.UnicodeLocaleRemoveSuffix(src, suff, form);
			
 
				 
			
 
				+/*
			
 
				+ * Returns a string containing text repeated n times.
			
 
				+ *
			
 
				+ * @param src           The string to be repeated.
			
 
				+ * @param n             Number of repetitions.
			
 
				+ * @return              A string containing n concatenations of the string text.
			
 
				+ */
			
 
				+
			
 
				+EXPORT Repeat(unicode src, unsigned4 n) :=
			
 
				+    lib_unicodelib.UnicodeLib.UnicodeLocaleRepeat(src, n);
			
 
				+
			
 
				 END;
			
--- a/ecllibrary/teststd/uni/TestRepeat.ecl
+++ b/ecllibrary/teststd/uni/TestRepeat.ecl
@@ -0,0 +1,60 @@
 
				+/*##############################################################################
			
 
				+## HPCC SYSTEMS software Copyright (C) 2018 HPCC Systems®.  All rights reserved.
			
 
				+############################################################################## */
			
 
				+
			
 
				+IMPORT Std.Uni;
			
 
				+
			
 
				+EXPORT TestRepeat := MODULE
			
 
				+
			
 
				+   EXPORT TestConst := MODULE
			
 
				+
			
 
				+    angstrom := U'A\u030A';         // Single character
			
 
				+    angstrom2d := x'41000A03';      // Bytes for A followed by circle
			
 
				+    angstrom2 := (>unicode<)angstrom2d; // Convert to a unicode, but it will not be normalized
			
 
				+    revangstrom := U'\u030AA';      // circle followed by an A
			
 
				+
			
 
				+    EXPORT Tests := [
			
 
				+        ASSERT(Uni.Repeat('Repeat this string ', 0) = '');
			
 
				+        ASSERT(Uni.Repeat('Repeat this string ', 1) = 'Repeat this string ');
			
 
				+        ASSERT(Uni.Repeat('Repeat this string ', 2) = 'Repeat this string Repeat this string');
			
 
				+
			
 
				+        ASSERT(Uni.Repeat(U'', 0) = '');
			
 
				+        ASSERT(Uni.Repeat(U'', 1) = '');
			
 
				+        ASSERT(Uni.Repeat(U'', 2) = '');
			
 
				+        ASSERT(Uni.Repeat(U'', 10) = '');
			
 
				+        ASSERT(Uni.Repeat(U'', -2) = '');
			
 
				+
			
 
				+        ASSERT(Uni.Repeat(U'r', 0) = '');
			
 
				+        ASSERT(Uni.Repeat(U'r', 1) = 'r');
			
 
				+        ASSERT(Uni.Repeat(U'r', 2) = 'rr');
			
 
				+        ASSERT(Uni.Repeat(U'r', 10) = 'rrrrrrrrrr');
			
 
				+        ASSERT(Uni.Repeat(U'r', -2) = '');
			
 
				+
			
 
				+        ASSERT(Uni.Repeat(U'abc', 0) = '');
			
 
				+        ASSERT(Uni.Repeat(U'abc', 1) = 'abc');
			
 
				+        ASSERT(Uni.Repeat(U'abc', 2) = 'abcabc');
			
 
				+        ASSERT(Uni.Repeat(U'abc', 10) = 'abcabcabcabcabcabcabcabcabcabc');
			
 
				+        ASSERT(Uni.Repeat(U'abc', -2) = '');
			
 
				+
			
 
				+        //Various checks to ensure that strings are correctly normalized after duplicating
			
 
				+        ASSERT(Uni.Repeat(angstrom, 1) = U'\u212B');
			
 
				+        ASSERT(LENGTH(angstrom) = 1);
			
 
				+        ASSERT(LENGTH(angstrom2) = 2);
			
 
				+        ASSERT(LENGTH(TRIM(angstrom2)) = 2);
			
 
				+        ASSERT(LENGTH(Uni.Repeat(angstrom, 1)) = 1);
			
 
				+        ASSERT(LENGTH(Uni.Repeat(angstrom2, 1)) = 2);
			
 
				+        ASSERT(LENGTH(TRIM(Uni.Repeat(angstrom2, 1))) = 1);
			
 
				+        ASSERT(LENGTH(Uni.Repeat(angstrom2, 2)) = 4);
			
 
				+        ASSERT(LENGTH(TRIM(Uni.Repeat(angstrom2, 2))) = 2);
			
 
				+        ASSERT(Uni.Repeat(angstrom2, 1) = U'\u212B');
			
 
				+        ASSERT(revangstrom[2] = 'A');
			
 
				+
			
 
				+        ASSERT(LENGTH(Uni.Repeat(revangstrom, 1)) = 2);
			
 
				+        ASSERT(LENGTH(TRIM(Uni.Repeat(revangstrom   , 1))) = 2);
			
 
				+        ASSERT(LENGTH(Uni.Repeat(revangstrom, 2)) = 4);
			
 
				+        ASSERT(LENGTH(TRIM(Uni.Repeat(revangstrom   , 2))) = 3);
			
 
				+        ASSERT(Uni.Repeat(revangstrom, 2) = U'\u030A\u212bA');
			
 
				+
			
 
				+        ASSERT(TRUE)];
			
 
				+   END;
			
 
				+END;
			
--- a/plugins/unicodelib/unicodelib.cpp
+++ b/plugins/unicodelib/unicodelib.cpp
@@ -92,6 +92,7 @@ static const char * EclDefinition =
 
				 "  boolean UnicodeLocaleEndsWith(const unicode src, const unicode suff, const string form) :c,pure,entrypoint='ulUnicodeLocaleEndsWith';\n"
			
 
				 "  string UnicodeVersion():c,pure,entrypoint='ulUnicodeVersion';\n"
			
 
				 "  unicode UnicodeLocaleRemoveSuffix(const unicode src, const unicode suff, const string form) :c,pure,entrypoint='ulUnicodeLocaleRemoveSuffix';\n"
			
 
				+"  unicode UnicodeLocaleRepeat(const unicode src, unsigned4 n) : c, pure,entrypoint='ulUnicodeLocaleRepeat'; \n"
			
 
				 "END;\n";
			
 
				 
			
 
				 static const char * compatibleVersions[] = {
			
@@ -121,6 +122,21 @@ UNICODELIB_API bool getECLPluginDefinition(ECLPluginDefinitionBlock *pb)
 
				     return true;
			
 
				 }
			
 
				 
			
 
				+static void unicodeEnsureIsNormalized(unsigned inLen, UChar * in)
			
 
				+{
			
 
				+    UErrorCode err = U_ZERO_ERROR;
			
 
				+    if (!unorm_isNormalized(in, inLen, UNORM_NFC, &err))
			
 
				+    {
			
 
				+        UChar * buff = (UChar *)malloc(inLen * 2);
			
 
				+        unsigned len = unorm_normalize(in, inLen, UNORM_NFC, 0, buff, inLen, &err);
			
 
				+        if (len > inLen)
			
 
				+            len = inLen;
			
 
				+        memcpy(in, buff, len*sizeof(UChar));
			
 
				+        while (len < inLen) in[len++] = 0x0020;
			
 
				+        free(buff);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 
			
 
				 namespace nsUnicodelib {
			
 
				 
			
@@ -330,7 +346,7 @@ private:
 
				         next_ = new uint32_t[capacity_+1]; // the number of characters is always less or equal to the string length
			
 
				         unsigned index=0;
			
 
				         next_[index] = 0;
			
 
				-        int32_t end = 0;
			
 
				+        uint32_t end = 0;
			
 
				         while (end < capacity_)
			
 
				         {
			
 
				             end = end+ucpLength(ustring_[end]);
			
@@ -1749,3 +1765,28 @@ UNICODELIB_API void UNICODELIB_CALL ulUnicodeLocaleRemoveSuffix(unsigned & tgtLe
 
				     tgt = (UChar *)CTXMALLOC(parentCtx, tgtLen * 2);
			
 
				     pro.extract(0, tgtLen, tgt);
			
 
				 }
			
 
				+
			
 
				+UNICODELIB_API void UNICODELIB_CALL ulUnicodeLocaleRepeat(unsigned & tgtLen, UChar * & tgt, unsigned srcLen, UChar const * src, unsigned n)
			
 
				+{
			
 
				+    size32_t resultLen = srcLen * n;
			
 
				+    //Check for empty string or overflow in the length of the string
			
 
				+    if (((int)n <= 0) || (srcLen == 0) || (resultLen /n != srcLen))
			
 
				+    {
			
 
				+        tgtLen = 0;
			
 
				+        tgt = nullptr;
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    UChar * result = (UChar *)CTXMALLOC(parentCtx, resultLen * sizeof(UChar));
			
 
				+    assertex(result);
			
 
				+    for (unsigned i = 0; i < n; ++i)
			
 
				+    {
			
 
				+        memcpy(&result[i * srcLen], src, srcLen * sizeof(UChar));
			
 
				+    }
			
 
				+
			
 
				+    //Now need to ensure the string is normalized since characters from the end of one string may combine with start of the next
			
 
				+    unicodeEnsureIsNormalized(resultLen, result);
			
 
				+
			
 
				+    tgtLen = resultLen;
			
 
				+    tgt = result;
			
 
				+}
			
--- a/plugins/unicodelib/unicodelib.hpp
+++ b/plugins/unicodelib/unicodelib.hpp
@@ -108,6 +108,7 @@ UNICODELIB_API bool UNICODELIB_CALL ulUnicodeLocaleStartsWith(unsigned srcLen, U
 
				 UNICODELIB_API bool UNICODELIB_CALL ulUnicodeLocaleEndsWith(unsigned srcLen, UChar const * src, unsigned suffLen, UChar const * suff, unsigned formLen, char const * form);
			
 
				 UNICODELIB_API void UNICODELIB_CALL ulUnicodeVersion(unsigned & tgtLen, char * & tgt);
			
 
				 UNICODELIB_API void UNICODELIB_CALL ulUnicodeLocaleRemoveSuffix(unsigned & tgtLen, UChar * & tgt, unsigned srcLen, UChar const * src, unsigned suffLen, UChar const * suff, unsigned formLen, char const * form);
			
 
				+UNICODELIB_API void UNICODELIB_CALL ulUnicodeLocaleRepeat(unsigned & tgtLen, UChar * & tgt, unsigned srcLen, UChar const * src, unsigned n);
			
 
				 }
			
 
				 
			
 
				 #endif
			
--- a/rtl/eclrtl/eclrtl.cpp
+++ b/rtl/eclrtl/eclrtl.cpp
@@ -357,19 +357,19 @@ bool vunicodeNeedsNormalize(UChar * in, UErrorCode * err)
 
				 
			
 
				 void unicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
			
 
				 {
			
 
				-    UChar * buff = (UChar *)rtlMalloc(inlen*2);
			
 
				+    UChar * buff = (UChar *)rtlMalloc(inlen*sizeof(UChar));
			
 
				     unsigned len = unorm_normalize(in, inlen, UNORM_NFC, 0, buff, inlen, err);
			
 
				     while(len<inlen) buff[len++] = 0x0020;
			
 
				-    memcpy(in, buff, inlen);
			
 
				+    memcpy(in, buff, inlen * sizeof(UChar));
			
 
				     free(buff);
			
 
				 }
			
 
				 
			
 
				 void vunicodeReplaceNormalized(unsigned inlen, UChar * in, UErrorCode * err)
			
 
				 {
			
 
				-    UChar * buff = (UChar *)rtlMalloc(inlen*2);
			
 
				+    UChar * buff = (UChar *)rtlMalloc(inlen*sizeof(UChar));
			
 
				     unsigned len = unorm_normalize(in, -1, UNORM_NFC, 0, buff, inlen-1, err);
			
 
				     buff[len] = 0x0000;
			
 
				-    memcpy(in, buff, inlen);
			
 
				+    memcpy(in, buff, inlen * sizeof(UChar));
			
 
				     free(buff);
			
 
				 }