浏览代码

HPCC-12161 Cassandra plugin should distinguish ASCII from TEXT fields

Note - also corrects incorrect key file that was causing false positives from
overnight build and test.

Signed-off-by: Richard Chapman <rchapman@hpccsystems.com>
Richard Chapman 10 年之前
父节点
当前提交
448aee9377

+ 24 - 0
plugins/cassandra/cassandraembed.cpp

@@ -570,6 +570,14 @@ static void getStringResult(const RtlFieldInfo *field, const CassValue *value, s
     switch (cass_value_type(value))
     {
     case CASS_VALUE_TYPE_ASCII:
+    {
+        CassString output;
+        check(cass_value_get_string(value, &output));
+        const char *text = output.data;
+        unsigned long bytes = output.length;
+        rtlStrToStrX(chars, result, bytes, text);
+        break;
+    }
     case CASS_VALUE_TYPE_VARCHAR:
     case CASS_VALUE_TYPE_TEXT:
     {
@@ -597,6 +605,14 @@ static void getUTF8Result(const RtlFieldInfo *field, const CassValue *value, siz
     switch (cass_value_type(value))
     {
     case CASS_VALUE_TYPE_ASCII:
+    {
+        CassString output;
+        check(cass_value_get_string(value, &output));
+        const char *text = output.data;
+        unsigned long bytes = output.length;
+        rtlStrToUtf8X(chars, result, bytes, text);
+        break;
+    }
     case CASS_VALUE_TYPE_VARCHAR:
     case CASS_VALUE_TYPE_TEXT:
     {
@@ -624,6 +640,14 @@ static void getUnicodeResult(const RtlFieldInfo *field, const CassValue *value,
     switch (cass_value_type(value))
     {
     case CASS_VALUE_TYPE_ASCII:
+    {
+        CassString output;
+        check(cass_value_get_string(value, &output));
+        const char *text = output.data;
+        unsigned long bytes = output.length;
+        rtlStrToUnicodeX(chars, result, bytes, text);
+        break;
+    }
     case CASS_VALUE_TYPE_VARCHAR:
     case CASS_VALUE_TYPE_TEXT:
     {

+ 19 - 12
testing/regress/ecl/cassandra-simple.ecl

@@ -39,6 +39,7 @@ childrec := RECORD
    DECIMAL10_2 ddd {default(9.99)},
    UTF8 u1 {default(U'9999 ß')},
    UNICODE u2 {default(U'9999 ßßßß')},
+   STRING a,
    SET OF STRING set1,
    SET OF INTEGER4 list1,
    LINKCOUNTED DICTIONARY(maprec) map1{linkcounted};
@@ -46,10 +47,10 @@ END;
 
 // Some data we will use to initialize the Cassandra table
 
-init := DATASET([{'name1', 1, true, 1.2, 3.4, D'aa55aa55', 1234567.89, U'Straße', U'Straße',['one','two','two','three'],[5,4,4,3],[{'a'=>'apple'},{'b'=>'banana'}]},
-                 {'name2', 2, false, 5.6, 7.8, D'00', -1234567.89, U'là', U'là',[],[],[]}], childrec);
+init := DATASET([{'name1', 1, true, 1.2, 3.4, D'aa55aa55', 1234567.89, U'Straße', U'Straße','Ascii',['one','two','two','three'],[5,4,4,3],[{'a'=>'apple'},{'b'=>'banana'}]},
+                 {'name2', 2, false, 5.6, 7.8, D'00', -1234567.89, U'là', U'là','Ascii', [],[],[]}], childrec);
 
-init2 := ROW({'name4' , 3, true, 9.10, 11.12, D'aa55aa55', 987.65, U'Baße', U'Baße',[],[],[]}, childrec);
+init2 := ROW({'name4' , 3, true, 9.10, 11.12, D'aa55aa55', 987.65, U'Baße', U'Baße', '', [],[],[]}, childrec);
 
 // Set up the Cassandra database
 // Note that we can execute multiple statements in a single embed, provided that there are
@@ -76,6 +77,7 @@ createTables() := EMBED(cassandra : server('127.0.0.1'),user('rchapman'),keyspac
                       ddd VARCHAR,
                       u1 VARCHAR,
                       u2 VARCHAR,
+                      a ASCII,
                       set1 SET<varchar>,
                       list1 LIST<INT>,
                       map1 MAP<VARCHAR, VARCHAR>,
@@ -92,23 +94,23 @@ ENDEMBED;
 // unless told to...
 
 initialize(dataset(childrec) values) := EMBED(cassandra : user('rchapman'),keyspace('test'),batch('unlogged'))
-  INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1) values (?,?,?,?,?,?,?,?,?,?,?,?);
+  INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1) values (?,?,?,?,?,?,?,?,?,?,?,?,?);
 ENDEMBED;
 
 initialize2(row(childrec) values) := EMBED(cassandra : user('rchapman'),keyspace('test'))
-  INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1) values (?,?,?,?,?,?,?,?,?,?,?,?);
+  INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1) values (?,?,?,?,?,?,?,?,?,?,?,?,?);
 ENDEMBED;
 
 // Returning a dataset
 
 dataset(childrec) testCassandraDS() := EMBED(cassandra : user('rchapman'),keyspace('test'))
-  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1 from tbl1;
+  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1 from tbl1;
 ENDEMBED;
 
 // Returning a single row
 
 childrec testCassandraRow() := EMBED(cassandra : user('rchapman'),keyspace('test'))
-  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1 from tbl1 LIMIT 1;
+  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1 from tbl1 LIMIT 1;
 ENDEMBED;
 
 // Passing in parameters
@@ -127,13 +129,14 @@ testCassandraParms(
 //   DECIMAL10_2 ddd,
    UTF8 u1,
    UNICODE u2,
+   STRING a,
    SET OF STRING set1,
    SET OF INTEGER4 list1,
    // Note we can't pass a dataset as a paramter to bind to a collection field - it would be interpreted as 'execute once per value in the dataset'
    // You have to pass a record containing the field as a child dataset
    ROW(mapwrapper) map1
    ) := EMBED(cassandra : user('rchapman'),keyspace('test'))
-  INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1) values (?,?,?,?,?,?,'8.76543',?,?,?,?,?);
+  INSERT INTO tbl1 (name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1) values (?,?,?,?,?,?,'8.76543',?,?,?,?,?,?);
 ENDEMBED;
 
 // Returning scalars
@@ -143,7 +146,7 @@ string testCassandraString() := EMBED(cassandra : user('rchapman'),keyspace('tes
 ENDEMBED;
 
 dataset(childrec) testCassandraStringParam(string filter) := EMBED(cassandra : user('rchapman'),keyspace('test'))
-  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1 from tbl1 where name = ?;
+  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1 from tbl1 where name = ?;
 ENDEMBED;
 
 integer testCassandraInt() := EMBED(cassandra : user('rchapman'),keyspace('test'))
@@ -174,6 +177,10 @@ UNICODE testCassandraUnicode() := EMBED(cassandra : user('rchapman'),keyspace('t
   SELECT u2 from tbl1 WHERE name='name1';
 ENDEMBED;
 
+STRING testCassandraAscii() := EMBED(cassandra : user('rchapman'),keyspace('test'))
+  SELECT a from tbl1 WHERE name='name1';
+ENDEMBED;
+
 SET OF STRING testCassandraSet() := EMBED(cassandra : user('rchapman'),keyspace('test'))
   SELECT set1 from tbl1 WHERE name='name1';
 ENDEMBED;
@@ -196,7 +203,7 @@ stringrec := RECORD
 END;
 
 TRANSFORM(childrec) t(stringrec L) := EMBED(cassandra : user('rchapman'),keyspace('test'))
-  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1 from tbl1 where name = ?;
+  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1 from tbl1 where name = ?;
 ENDEMBED;
 
 init3 := DATASET([{'name1'},
@@ -211,7 +218,7 @@ stringrec extractName(childrec l) := TRANSFORM
 END;
 
 dataset(childrec) testCassandraDSParam(dataset(stringrec) inrecs) := EMBED(cassandra : user('rchapman'),keyspace('test'))
-  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,set1,list1,map1 from tbl1 where name = ?;
+  SELECT name, value, boolval, r8, r4,d,ddd,u1,u2,a,set1,list1,map1 from tbl1 where name = ?;
 ENDEMBED;
 
 // Testing performance of batch inserts
@@ -239,7 +246,7 @@ sequential (
   createTables(),
   initialize(init),
 
-  testCassandraParms('name3', 1, true, 1.2, 3.4, D'aa55aa55', U'Straße', U'Straße', ['four','five'], [2,2,3,1], ROW({[{'f'=>'fish'}]},MapWrapper)),
+  testCassandraParms('name3', 1, true, 1.2, 3.4, D'aa55aa55', U'Straße', U'Straße', 'Only 7-bit US-ASCII chars allowed', ['four','five'], [2,2,3,1], ROW({[{'f'=>'fish'}]},MapWrapper)),
   initialize2(init2),
   OUTPUT(SORT(testCassandraDS(), name)),
   OUTPUT(testCassandraRow().name),

+ 8 - 11
testing/regress/ecl/key/cassandra-simple.xml

@@ -1,11 +1,9 @@
-
-<Result>
 <Dataset name='Result 1'>
- <Row><name>name1</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>1234567.89</ddd><u1>Straße</u1><u2>Straße</u2><set1><Item>one</Item><Item>three</Item><Item>two</Item></set1><list1><Item>5</Item><Item>4</Item><Item>4</Item><Item>3</Item></list1><map1><Row><fromval>b</fromval><toval>banana</toval></Row><Row><fromval>a</fromval><toval>apple</toval></Row></map1></Row>
- <Row><name>name2</name><value>2</value><boolval>false</boolval><r8>5.6</r8><r4>7.800000190734863</r4><d>3030</d><ddd>-1234567.89</ddd><u1>là</u1><u2>là</u2><set1></set1><list1></list1><map1></map1></Row>
- <Row><name>name3</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>8.76</ddd><u1>Straße</u1><u2>Straße</u2><set1><Item>five</Item><Item>four</Item></set1><list1><Item>2</Item><Item>2</Item><Item>3</Item><Item>1</Item></list1><map1><Row><fromval>f</fromval><toval>fish</toval></Row></map1></Row>
- <Row><name>name4</name><value>3</value><boolval>true</boolval><r8>9.1</r8><r4>11.11999988555908</r4><d>6161353561613535</d><ddd>987.65</ddd><u1>Baße</u1><u2>Baße</u2><set1></set1><list1></list1><map1></map1></Row>
- <Row><name>nulls</name><value>99999</value><boolval>true</boolval><r8>99.98999999999999</r8><r4>999.989990234375</r4><d>393939393939</d><ddd>9.99</ddd><u1>ß</u1><u2>9999 ßßßß</u2><set1></set1><list1></list1><map1></map1></Row>
+ <Row><name>name1</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>1234567.89</ddd><u1>Straße</u1><u2>Straße</u2><a>Ascii</a><set1><Item>one</Item><Item>three</Item><Item>two</Item></set1><list1><Item>5</Item><Item>4</Item><Item>4</Item><Item>3</Item></list1><map1><Row><fromval>b</fromval><toval>banana</toval></Row><Row><fromval>a</fromval><toval>apple</toval></Row></map1></Row>
+ <Row><name>name2</name><value>2</value><boolval>false</boolval><r8>5.6</r8><r4>7.800000190734863</r4><d>3030</d><ddd>-1234567.89</ddd><u1>là</u1><u2>là</u2><a>Ascii</a><set1></set1><list1></list1><map1></map1></Row>
+ <Row><name>name3</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>8.76</ddd><u1>Straße</u1><u2>Straße</u2><a>Only 7-bit US-ASCII chars allowed</a><set1><Item>five</Item><Item>four</Item></set1><list1><Item>2</Item><Item>2</Item><Item>3</Item><Item>1</Item></list1><map1><Row><fromval>f</fromval><toval>fish</toval></Row></map1></Row>
+ <Row><name>name4</name><value>3</value><boolval>true</boolval><r8>9.1</r8><r4>11.11999988555908</r4><d>6161353561613535</d><ddd>987.65</ddd><u1>Baße</u1><u2>Baße</u2><a></a><set1></set1><list1></list1><map1></map1></Row>
+ <Row><name>nulls</name><value>99999</value><boolval>true</boolval><r8>99.98999999999999</r8><r4>999.989990234375</r4><d>393939393939</d><ddd>9.99</ddd><u1>ß</u1><u2>9999 ßßßß</u2><a></a><set1></set1><list1></list1><map1></map1></Row>
 </Dataset>
 <Dataset name='Result 2'>
  <Row><Result_2>name1</Result_2></Row>
@@ -14,7 +12,7 @@
  <Row><Result_3>name1</Result_3></Row>
 </Dataset>
 <Dataset name='Result 4'>
- <Row><name>name1</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>1234567.89</ddd><u1>Straße</u1><u2>Straße</u2><set1><Item>one</Item><Item>three</Item><Item>two</Item></set1><list1><Item>5</Item><Item>4</Item><Item>4</Item><Item>3</Item></list1><map1><Row><fromval>b</fromval><toval>banana</toval></Row><Row><fromval>a</fromval><toval>apple</toval></Row></map1></Row>
+ <Row><name>name1</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>1234567.89</ddd><u1>Straße</u1><u2>Straße</u2><a>Ascii</a><set1><Item>one</Item><Item>three</Item><Item>two</Item></set1><list1><Item>5</Item><Item>4</Item><Item>4</Item><Item>3</Item></list1><map1><Row><fromval>b</fromval><toval>banana</toval></Row><Row><fromval>a</fromval><toval>apple</toval></Row></map1></Row>
 </Dataset>
 <Dataset name='Result 5'>
  <Row><Result_5>1</Result_5></Row>
@@ -48,8 +46,8 @@
  <Row><fromval>a</fromval><toval>apple</toval></Row>
 </Dataset>
 <Dataset name='Result 15'>
- <Row><name>name1</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>1234567.89</ddd><u1>Straße</u1><u2>Straße</u2><set1><Item>one</Item><Item>three</Item><Item>two</Item></set1><list1><Item>5</Item><Item>4</Item><Item>4</Item><Item>3</Item></list1><map1><Row><fromval>b</fromval><toval>banana</toval></Row><Row><fromval>a</fromval><toval>apple</toval></Row></map1></Row>
- <Row><name>name2</name><value>2</value><boolval>false</boolval><r8>5.6</r8><r4>7.800000190734863</r4><d>3030</d><ddd>-1234567.89</ddd><u1>là</u1><u2>là</u2><set1></set1><list1></list1><map1></map1></Row>
+ <Row><name>name1</name><value>1</value><boolval>true</boolval><r8>1.2</r8><r4>3.400000095367432</r4><d>6161353561613535</d><ddd>1234567.89</ddd><u1>Straße</u1><u2>Straße</u2><a>Ascii</a><set1><Item>one</Item><Item>three</Item><Item>two</Item></set1><list1><Item>5</Item><Item>4</Item><Item>4</Item><Item>3</Item></list1><map1><Row><fromval>b</fromval><toval>banana</toval></Row><Row><fromval>a</fromval><toval>apple</toval></Row></map1></Row>
+ <Row><name>name2</name><value>2</value><boolval>false</boolval><r8>5.6</r8><r4>7.800000190734863</r4><d>3030</d><ddd>-1234567.89</ddd><u1>là</u1><u2>là</u2><a>Ascii</a><set1></set1><list1></list1><map1></map1></Row>
 </Dataset>
 <Dataset name='Result 16'>
 </Dataset>
@@ -59,4 +57,3 @@
 <Dataset name='Result 18'>
  <Row><Result_18>Done</Result_18></Row>
 </Dataset>
-</Result>