8 年之前 · 3230c5881d
--- a/ecl/hql/hqlgram.hpp
+++ b/ecl/hql/hqlgram.hpp
@@ -1224,6 +1224,7 @@ class HqlLex
 
				         bool getDefinedParameter(StringBuffer &curParam, YYSTYPE & returnToken, const char* for_what, SharedHqlExpr & resolved);
			
 
				 
			
 
				         bool checkUnicodeLiteral(char const * str, unsigned length, unsigned & ep, StringBuffer & msg);
			
 
				+        int processStringLiteral(YYSTYPE & returnToken, char *CUR_TOKEN_TEXT, unsigned CUR_TOKEN_LENGTH, int oldColumn, int oldPosition);
			
 
				 
			
 
				         bool readCheckNextToken(YYSTYPE & returnToken, int expected, unsigned errCode, const char * msg);
			
 
				 
			
--- a/ecl/hql/hqllex.l
+++ b/ecl/hql/hqllex.l
@@ -256,6 +256,7 @@ xpathseq      ([^}\r\n])+
 
				 
			
 
				 %x COMMENT
			
 
				 %x CPP
			
 
				+%x MULTISTRING
			
 
				 %x SLSL
			
 
				 %x SLSLHASH
			
 
				 %x PGPHEADER
			
@@ -1527,7 +1528,7 @@ FUNCTIONMACRO|MACRO {
 
				                         else
			
 
				                             startpos += 1;  // Skip the ) of EMBED(xxx)
			
 
				 
			
 
				-                        // keep the orginal format info (like blanks, newlines)
			
 
				+                        // keep the original format info (like blanks, newlines)
			
 
				                         while (endpos != startpos && (lexer->yyBuffer[endpos-1] == 13 || lexer->yyBuffer[endpos-1] == 10))
			
 
				                             endpos--;
			
 
				                         int len = endpos-startpos;
			
@@ -1548,6 +1549,64 @@ FUNCTIONMACRO|MACRO {
 
				 <CPP>[^\n]+         { updatepos1; }
			
 
				 <CPP>\n             { updatepos1; lexer->updateNewline(); }
			
 
				 
			
 
				+(d|D|q|Q|v|V|u|U|u8|U8)?"'''"  { 
			
 
				+                        setupdatepos; 
			
 
				+                        BEGIN(MULTISTRING);
			
 
				+                        lexer->inCpp = true;
			
 
				+                    }
			
 
				+<MULTISTRING>[^\n]*"'''"[^\n]*  {
			
 
				+                        lexer->inCpp = false;
			
 
				+                        int endpos = lexer->yyPosition;
			
 
				+                        //skip to the position of ''' on the line)
			
 
				+                        while (memcmp(lexer->yyBuffer+endpos, "'''", 3) != 0)
			
 
				+                            endpos++;
			
 
				+                        const int lastpos = endpos + 3;
			
 
				+                        
			
 
				+                        updatepos1; 
			
 
				+                        BEGIN(0);
			
 
				+
			
 
				+                        //Return any characters found after the closing '''
			
 
				+                        unsigned delta = lexer->yyPosition - lastpos;
			
 
				+                        yyless(CUR_TOKEN_LENGTH - delta);
			
 
				+                        lexer->yyPosition -= delta;
			
 
				+                        lexer->yyColumn -= delta;
			
 
				+                            
			
 
				+                        int startpos = returnToken.pos.position;
			
 
				+                        if (lexer->yyBuffer[startpos] == 'u')
			
 
				+                        {
			
 
				+                            bool isUtf8 = false;
			
 
				+                            startpos++;
			
 
				+                            if (lexer->yyBuffer[startpos]=='8')
			
 
				+                            {
			
 
				+                                isUtf8 = true;
			
 
				+                                startpos++;
			
 
				+                            }
			
 
				+                            startpos +=3;
			
 
				+                            int len = endpos-startpos;
			
 
				+                            Owned<IValue> unicodeValue;
			
 
				+                            // Special handling required for trailing \ char which suppresses the following linefeed, as unicode unescape does not handle it
			
 
				+                            StringBuffer source(len, lexer->yyBuffer+startpos);
			
 
				+                            source.replaceString("\\\n","");
			
 
				+                            if (isUtf8)
			
 
				+                            {
			
 
				+                                size32_t chars = rtlUtf8Length(source.length(), source.str());
			
 
				+                                unicodeValue.setown(createUtf8Value(chars, source.str(), "", true));
			
 
				+                            }
			
 
				+                            else
			
 
				+                                unicodeValue.setown(createUnicodeValue(source.str(), source.length(), "", true, true));
			
 
				+                            
			
 
				+                            returnToken.setExpr(createConstant(unicodeValue.getClear()));
			
 
				+                            return (UNICODE_CONST);
			
 
				+                        }
			
 
				+                        else
			
 
				+                        {
			
 
				+                            return lexer->processStringLiteral(returnToken, lexer->yyBuffer+startpos, lastpos-startpos, returnToken.pos.column, returnToken.pos.position);
			
 
				+                        }
			
 
				+                    }
			
 
				+<MULTISTRING>[^\n]+         { updatepos1; }
			
 
				+<MULTISTRING>\n             { updatepos1; lexer->updateNewline(); }
			
 
				+
			
 
				+
			
 
				 "<)"                { setupdatepos; return(TYPE_RPAREN) ; }
			
 
				 "(>"                { setupdatepos; return(TYPE_LPAREN) ; }
			
 
				 "<=>"               { setupdatepos; return(ORDER) ; }
			
@@ -1698,190 +1757,8 @@ FUNCTIONMACRO|MACRO {
 
				 (d|D|q|Q|v|V)?\'([^'\r\n\\]|\\[^\r\n])*\' {
			
 
				                         int oldColumn = lexer->yyColumn;
			
 
				                         int oldPosition = lexer->yyPosition;
			
 
				-                        setupdatepos; 
			
 
				-                        MemoryAttr tempBuff;
			
 
				-                        char *b = (char *)tempBuff.allocate(CUR_TOKEN_LENGTH); // Escape sequence can only make is shorter...
			
 
				-                        char *bf = b;
			
 
				-                        const char *finger = CUR_TOKEN_TEXT;
			
 
				-                        type_t tc = type_string;
			
 
				-                        if (*finger != '\'')
			
 
				-                        {
			
 
				-                            if ((*finger == 'd') || (*finger == 'D'))
			
 
				-                                tc = type_data;
			
 
				-                            else if((*finger == 'q') || (*finger == 'Q'))
			
 
				-                                tc = type_qstring;
			
 
				-                            else if((*finger == 'v') || (*finger == 'V'))
			
 
				-                                tc = type_varstring;
			
 
				-                            finger++;
			
 
				-                        }
			
 
				-                        for (finger++; finger[1]; finger++)
			
 
				-                        {
			
 
				-                            unsigned char next = *finger;
			
 
				-                            size32_t delta = (size32_t)(finger-CUR_TOKEN_TEXT);
			
 
				-                            if (next == '\\')
			
 
				-                            {
			
 
				-                                next = finger[1];
			
 
				-                                if (finger[2]==0)  // finger[1] must be '.
			
 
				-                                {
			
 
				-                                    assertex(false);
			
 
				-                                    returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                    StringBuffer msg("Can not terminate a string with escape char '\\': ");
			
 
				-                                    msg.append(CUR_TOKEN_TEXT);
			
 
				-                                    lexer->reportError(returnToken, RRR_ESCAPE_ENDWITHSLASH, "%s", msg.str());
			
 
				-                                    if (lexer->checkAborting())
			
 
				-                                        return EOF;
			
 
				-                                }
			
 
				-                                else if (next == '\'' || next == '\\' || next == '?' || next == '"') 
			
 
				-                                {
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (next == 'a') 
			
 
				-                                {
			
 
				-                                    next = '\a';
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (next == 'b') 
			
 
				-                                {
			
 
				-                                    next = '\b';
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (next == 'f') 
			
 
				-                                {
			
 
				-                                    next = '\f';
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (next == 'n') 
			
 
				-                                {
			
 
				-                                    next = '\n';
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (next == 'r') 
			
 
				-                                {
			
 
				-                                    next = '\r';
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (next == 't') 
			
 
				-                                {
			
 
				-                                    next = '\t';
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (next == 'v') 
			
 
				-                                {
			
 
				-                                    next = '\v';
			
 
				-                                    finger++;
			
 
				-                                } 
			
 
				-                                else if (isdigit(next) && next < '8')
			
 
				-                                {
			
 
				-                                    //Allow octal constants for ^Z etc.
			
 
				-                                    unsigned value = 0;
			
 
				-                                    unsigned count;
			
 
				-                                    for (count=0; count < 3; count++)
			
 
				-                                    {
			
 
				-                                        next = finger[count+1];
			
 
				-                                        if (!isdigit(next) || next >= '8')
			
 
				-                                            break;
			
 
				-                                        value = value * 8 + (next - '0');
			
 
				-                                    }
			
 
				-                                    if(count != 3)
			
 
				-                                    {
			
 
				-                                        returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                        StringBuffer msg;
			
 
				-                                        msg.append("3-digit numeric escape sequence contained non-octal digit: ").append(next);
			
 
				-                                        lexer->reportError(returnToken, ERR_ESCAPE_UNKNOWN, "%s", msg.str());
			
 
				-                                        if (lexer->checkAborting())
			
 
				-                                            return EOF;
			
 
				-                                    }
			
 
				-                                    *bf++ = value;
			
 
				-                                    if(!(isValidAsciiLikeCharacter(value) || (tc == type_data)))
			
 
				-                                    {
			
 
				-                                        returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                        lexer->reportWarning(CategoryCast, returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not defined in encoding " ASCII_LIKE_CODEPAGE);
			
 
				-                                        if (lexer->checkAborting())
			
 
				-                                            return EOF;
			
 
				-                                    }
			
 
				-                                    finger += count;
			
 
				-                                    continue;
			
 
				-                                }
			
 
				-                                else
			
 
				-                                {
			
 
				-                                    StringBuffer msg;
			
 
				-                                    msg.append("Unrecognized escape sequence: ");
			
 
				-                                    msg.append("\\").append(finger[1]);
			
 
				-                                    returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                    lexer->reportError(returnToken, ERR_ESCAPE_UNKNOWN, "%s", msg.str());
			
 
				-                                    if (lexer->checkAborting())
			
 
				-                                        return EOF;
			
 
				-                                }
			
 
				-                                *bf++ = next;
			
 
				-                            }
			
 
				-                            else if (next == '\'')
			
 
				-                            {
			
 
				-                                returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                lexer->reportError(returnToken, ERR_STRING_NEEDESCAPE,"' needs to be escaped by \\ inside string");
			
 
				-                                if (lexer->checkAborting())
			
 
				-                                    return EOF;
			
 
				-                            }
			
 
				-                            else if (next >= 128)
			
 
				-                            {
			
 
				-                                const byte * temp = (byte *)finger;
			
 
				-                                unsigned lenLeft = CUR_TOKEN_LENGTH - (size32_t)(finger - CUR_TOKEN_TEXT);
			
 
				-                                int extraCharsRead = rtlSingleUtf8ToCodepage(bf, lenLeft, finger, ASCII_LIKE_CODEPAGE);
			
 
				-                                if (extraCharsRead == -1)
			
 
				-                                {
			
 
				-                                    //This really has to be an error, otherwise it will work most of the time, but will then sometimes fail
			
 
				-                                    //because two characters > 128 are next to each other.
			
 
				-                                    returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                    lexer->reportError(returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not legal UTF-8");
			
 
				-                                    if (lexer->checkAborting())
			
 
				-                                        return EOF;
			
 
				-                                    *bf = next;
			
 
				-                                }
			
 
				-                                else
			
 
				-                                {
			
 
				-                                    if (*bf == ASCII_LIKE_SUBS_CHAR)
			
 
				-                                    {
			
 
				-                                        returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                        lexer->reportWarning(CategoryCast, returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not defined in encoding " ASCII_LIKE_CODEPAGE ", try using a unicode constant");
			
 
				-                                    }
			
 
				-                                    finger += extraCharsRead;
			
 
				-                                }
			
 
				-                                bf++;
			
 
				-                            }
			
 
				-                            else
			
 
				-                            {
			
 
				-                                *bf++ = next;
			
 
				-                                if(!(isValidAsciiLikeCharacter(next) || (tc == type_data)))
			
 
				-                                {
			
 
				-                                    returnToken.setPosition(lexer->yyLineNo, oldColumn+delta, oldPosition+delta, lexer->querySourcePath());
			
 
				-                                    lexer->reportError(returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not defined in encoding " ASCII_LIKE_CODEPAGE);
			
 
				-                                    if (lexer->checkAborting())
			
 
				-                                        return EOF;
			
 
				-                                }
			
 
				-                            }
			
 
				-                        }
			
 
				-                        returnToken.setPosition(lexer->yyLineNo, oldColumn, oldPosition, lexer->querySourcePath());
			
 
				-                        switch (tc)
			
 
				-                        {
			
 
				-                        case type_qstring:
			
 
				-                            {
			
 
				-                                Owned<ITypeInfo> qStrType = makeQStringType(UNKNOWN_LENGTH); 
			
 
				-                                returnToken.setExpr(createConstant(qStrType->castFrom((size32_t)(bf-b), b)));
			
 
				-                                return (DATA_CONST);
			
 
				-                            }
			
 
				-                        case type_data:
			
 
				-                            {
			
 
				-                                returnToken.setExpr(createConstant(createDataValue(b, (size32_t)(bf-b))));
			
 
				-                                return (DATA_CONST);
			
 
				-                            }
			
 
				-                        case type_varstring:
			
 
				-                            {
			
 
				-                                returnToken.setExpr(createConstant(createVarStringValue((size32_t)(bf-b), b, makeVarStringType(UNKNOWN_LENGTH))));
			
 
				-                                return (DATA_CONST);
			
 
				-                            }
			
 
				-                        case type_string:
			
 
				-                            returnToken.setExpr(createConstant(createStringValue(b, (size32_t)(bf-b))));
			
 
				-                            return (STRING_CONST);
			
 
				-                        }
			
 
				+                        setupdatepos;
			
 
				+                        return lexer->processStringLiteral(returnToken, CUR_TOKEN_TEXT, CUR_TOKEN_LENGTH, oldColumn, oldPosition);
			
 
				                     }
			
 
				 
			
 
				 (d|D|q|Q|u|U|v|V)?\'([^'\r\n\\]|\\[^\r\n])*(\\)? {  
			
--- a/ecl/hql/hqlparse.cpp
+++ b/ecl/hql/hqlparse.cpp
@@ -28,6 +28,7 @@
 
				 
			
 
				 #define YY_NO_UNISTD_H
			
 
				 #include "hqllex.hpp"
			
 
				+#include "eclrtl.hpp"
			
 
				 
			
 
				 //#define TIMING_DEBUG
			
 
				 
			
@@ -2129,6 +2130,205 @@ bool HqlLex::checkUnicodeLiteral(char const * str, unsigned length, unsigned & e
 
				     return true;
			
 
				 }
			
 
				 
			
 
				+int HqlLex::processStringLiteral(YYSTYPE & returnToken, char *CUR_TOKEN_TEXT, unsigned CUR_TOKEN_LENGTH, int oldColumn, int oldPosition)
			
 
				+{
			
 
				+    MemoryAttr tempBuff;
			
 
				+    char *b = (char *)tempBuff.allocate(CUR_TOKEN_LENGTH); // Escape sequence can only make is shorter...
			
 
				+    char *bf = b;
			
 
				+    const char *finger = CUR_TOKEN_TEXT;
			
 
				+    type_t tc = type_string;
			
 
				+    if (*finger != '\'')
			
 
				+    {
			
 
				+        if ((*finger == 'd') || (*finger == 'D'))
			
 
				+            tc = type_data;
			
 
				+        else if((*finger == 'q') || (*finger == 'Q'))
			
 
				+            tc = type_qstring;
			
 
				+        else if((*finger == 'v') || (*finger == 'V'))
			
 
				+            tc = type_varstring;
			
 
				+        finger++;
			
 
				+    }
			
 
				+    bool isMultiline = false;
			
 
				+    if (finger[1]=='\'' && finger[2]=='\'')
			
 
				+    {
			
 
				+        isMultiline = true;
			
 
				+        CUR_TOKEN_TEXT[CUR_TOKEN_LENGTH-2] = '\0';
			
 
				+        finger += 2;
			
 
				+    }
			
 
				+    for (finger++; finger[1]; finger++)
			
 
				+    {
			
 
				+        unsigned char next = *finger;
			
 
				+        size32_t delta = (size32_t)(finger-CUR_TOKEN_TEXT);
			
 
				+        if (next == '\\')
			
 
				+        {
			
 
				+            next = finger[1];
			
 
				+            if (finger[2]==0)  // finger[1] must be '.
			
 
				+            {
			
 
				+                returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+                StringBuffer msg("Can not terminate a string with escape char '\\': ");
			
 
				+                msg.append(CUR_TOKEN_TEXT);
			
 
				+                reportError(returnToken, RRR_ESCAPE_ENDWITHSLASH, "%s", msg.str());
			
 
				+                if (checkAborting())
			
 
				+                    return EOF;
			
 
				+            }
			
 
				+            else if (next == '\'' || next == '\\' || next == '?' || next == '"')
			
 
				+            {
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (next == '\n')
			
 
				+            {
			
 
				+                finger++;
			
 
				+                continue;  // A \ at end of line in a multiline constant means remove the end-of-line
			
 
				+            }
			
 
				+            else if (next == 'a')
			
 
				+            {
			
 
				+                next = '\a';
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (next == 'b')
			
 
				+            {
			
 
				+                next = '\b';
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (next == 'f')
			
 
				+            {
			
 
				+                next = '\f';
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (next == 'n')
			
 
				+            {
			
 
				+                next = '\n';
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (next == 'r')
			
 
				+            {
			
 
				+                next = '\r';
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (next == 't')
			
 
				+            {
			
 
				+                next = '\t';
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (next == 'v')
			
 
				+            {
			
 
				+                next = '\v';
			
 
				+                finger++;
			
 
				+            }
			
 
				+            else if (isdigit(next) && next < '8')
			
 
				+            {
			
 
				+                //Allow octal constants for ^Z etc.
			
 
				+                unsigned value = 0;
			
 
				+                unsigned count;
			
 
				+                for (count=0; count < 3; count++)
			
 
				+                {
			
 
				+                    next = finger[count+1];
			
 
				+                    if (!isdigit(next) || next >= '8')
			
 
				+                        break;
			
 
				+                    value = value * 8 + (next - '0');
			
 
				+                }
			
 
				+                if(count != 3)
			
 
				+                {
			
 
				+                    returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+                    StringBuffer msg;
			
 
				+                    msg.append("3-digit numeric escape sequence contained non-octal digit: ").append(next);
			
 
				+                    reportError(returnToken, ERR_ESCAPE_UNKNOWN, "%s", msg.str());
			
 
				+                    if (checkAborting())
			
 
				+                        return EOF;
			
 
				+                }
			
 
				+                *bf++ = value;
			
 
				+                if(!(isValidAsciiLikeCharacter(value) || (tc == type_data)))
			
 
				+                {
			
 
				+                    returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+                    reportWarning(CategoryCast, returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not defined in encoding " ASCII_LIKE_CODEPAGE);
			
 
				+                    if (checkAborting())
			
 
				+                        return EOF;
			
 
				+                }
			
 
				+                finger += count;
			
 
				+                continue;
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                StringBuffer msg;
			
 
				+                msg.append("Unrecognized escape sequence: ");
			
 
				+                msg.append("\\").append(finger[1]);
			
 
				+                returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+                reportError(returnToken, ERR_ESCAPE_UNKNOWN, "%s", msg.str());
			
 
				+                if (checkAborting())
			
 
				+                    return EOF;
			
 
				+            }
			
 
				+            *bf++ = next;
			
 
				+        }
			
 
				+        else if (next == '\'' && !isMultiline)
			
 
				+        {
			
 
				+            returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+            reportError(returnToken, ERR_STRING_NEEDESCAPE,"' needs to be escaped by \\ inside string");
			
 
				+            if (checkAborting())
			
 
				+                return EOF;
			
 
				+        }
			
 
				+        else if (next >= 128)
			
 
				+        {
			
 
				+            const byte * temp = (byte *)finger;
			
 
				+            unsigned lenLeft = CUR_TOKEN_LENGTH - (size32_t)(finger - CUR_TOKEN_TEXT);
			
 
				+            int extraCharsRead = rtlSingleUtf8ToCodepage(bf, lenLeft, finger, ASCII_LIKE_CODEPAGE);
			
 
				+            if (extraCharsRead == -1)
			
 
				+            {
			
 
				+                //This really has to be an error, otherwise it will work most of the time, but will then sometimes fail
			
 
				+                //because two characters > 128 are next to each other.
			
 
				+                returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+                reportError(returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not legal UTF-8");
			
 
				+                if (checkAborting())
			
 
				+                    return EOF;
			
 
				+                *bf = next;
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                if (*bf == ASCII_LIKE_SUBS_CHAR)
			
 
				+                {
			
 
				+                    returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+                    reportWarning(CategoryCast, returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not defined in encoding " ASCII_LIKE_CODEPAGE ", try using a unicode constant");
			
 
				+                }
			
 
				+                finger += extraCharsRead;
			
 
				+            }
			
 
				+            bf++;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            *bf++ = next;
			
 
				+            if(!(isValidAsciiLikeCharacter(next) || (tc == type_data)))
			
 
				+            {
			
 
				+                returnToken.setPosition(yyLineNo, oldColumn+delta, oldPosition+delta, querySourcePath());
			
 
				+                reportError(returnToken, ERR_STRING_NON_ASCII, "Character in string literal is not defined in encoding " ASCII_LIKE_CODEPAGE);
			
 
				+                if (checkAborting())
			
 
				+                    return EOF;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    returnToken.setPosition(yyLineNo, oldColumn, oldPosition, querySourcePath());
			
 
				+    switch (tc)
			
 
				+    {
			
 
				+    case type_qstring:
			
 
				+        {
			
 
				+            Owned<ITypeInfo> qStrType = makeQStringType(UNKNOWN_LENGTH);
			
 
				+            returnToken.setExpr(createConstant(qStrType->castFrom((size32_t)(bf-b), b)));
			
 
				+            return (DATA_CONST);
			
 
				+        }
			
 
				+    case type_data:
			
 
				+        {
			
 
				+            returnToken.setExpr(createConstant(createDataValue(b, (size32_t)(bf-b))));
			
 
				+            return (DATA_CONST);
			
 
				+        }
			
 
				+    case type_varstring:
			
 
				+        {
			
 
				+            returnToken.setExpr(createConstant(createVarStringValue((size32_t)(bf-b), b, makeVarStringType(UNKNOWN_LENGTH))));
			
 
				+            return (DATA_CONST);
			
 
				+        }
			
 
				+    case type_string:
			
 
				+        returnToken.setExpr(createConstant(createStringValue(b, (size32_t)(bf-b))));
			
 
				+        return (STRING_CONST);
			
 
				+    }
			
 
				+    throwUnexpected();
			
 
				+}
			
 
				+
			
 
				 //====================================== Error Reporting  ======================================
			
 
				 
			
 
				 bool HqlLex::checkAborting()
			
--- a/testing/regress/ecl/key/strings.xml
+++ b/testing/regress/ecl/key/strings.xml
@@ -0,0 +1,44 @@
 
				+<Dataset name='Result 1'>
			
 
				+ <Row><Result_1>Single
			
 
				+quotes</Result_1></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 2'>
			
 
				+ <Row><Result_2>Can&apos;t be multiline and must escape embedded quotes</Result_2></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 3'>
			
 
				+ <Row><Result_3>€</Result_3></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 4'>
			
 
				+ <Row><Result_4>Can use various prefixes</Result_4></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 5'>
			
 
				+ <Row><Result_5>37313732373337343735</Result_5></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 6'>
			
 
				+ <Row><Result_6>ABCDE</Result_6></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 7'>
			
 
				+ <Row><Result_7>Triple
			
 
				+quotes can have embedded newlines, but also support
			
 
				+escape sequence</Result_7></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 8'>
			
 
				+ <Row><Result_8>Unicode triple
			
 
				+quotes should be the same, and also support
			
 
				+escape sequence</Result_8></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 9'>
			
 
				+ <Row><Result_9>Don&apos;t have to be multiline and need not escape embedded quotes (but &apos;can&apos; if they want)</Result_9></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 10'>
			
 
				+ <Row><Result_10>€</Result_10></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 11'>
			
 
				+ <Row><Result_11>Can use same prefixes as single</Result_11></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 12'>
			
 
				+ <Row><Result_12>37313732373337343735</Result_12></Row>
			
 
				+</Dataset>
			
 
				+<Dataset name='Result 13'>
			
 
				+ <Row><Result_13>ABCDE</Result_13></Row>
			
 
				+</Dataset>
			
--- a/testing/regress/ecl/strings.ecl
+++ b/testing/regress/ecl/strings.ecl
@@ -0,0 +1,36 @@
 
				+/*##############################################################################
			
 
				+
			
 
				+    HPCC SYSTEMS software Copyright (C) 2017 HPCC Systems®.
			
 
				+
			
 
				+    Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+    you may not use this file except in compliance with the License.
			
 
				+    You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+    Unless required by applicable law or agreed to in writing, software
			
 
				+    distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+    See the License for the specific language governing permissions and
			
 
				+    limitations under the License.
			
 
				+############################################################################## */
			
 
				+
			
 
				+'Single\nquotes';
			
 
				+u'Can\'t be multiline and must escape embedded quotes';
			
 
				+u8'€';
			
 
				+v'Can use various prefixes';
			
 
				+d'7172737475';
			
 
				+Q'ABCDE';
			
 
				+
			
 
				+
			
 
				+'''Triple
			
 
				+quotes can have embedded newlines, but also \
			
 
				+support\nescape sequence''';
			
 
				+u'''Unicode triple
			
 
				+quotes should be the same, and also \
			
 
				+support\nescape sequence''';
			
 
				+u'''Don't have to be multiline and need not escape embedded quotes (but \'can' if they want)''';
			
 
				+u8'''€''';
			
 
				+v'''Can use same prefixes as single''';
			
 
				+d'''7172737475''';
			
 
				+Q'''ABCDE''';