1 /******************************************************************************
2 * Copyright (c) 2000-2014 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 ******************************************************************************/
8 // This module tests the following functions for UTF-8:
12 module UTF8Testcases {
14 import from Common all;
16 const octetstring c_BOM_UTF8 :='EFBBBF'O
17 //type component MTC {}
20 //00-0F UTF-8 decoding octetstring to unicode
21 //decoding is UTF-8 as default encoding
22 testcase tc_UTF8_encodeDecode_BMP1_implicit() runs on MTC {
23 var octetstring o:=''O;
24 var octetstring o_expected:=''O;
25 var universal charstring u1 := char(0,0,0,0);
26 var universal charstring u2 := char(0,0,0,0); //converted
27 for(var integer i:=0;i<128;i:=i+1) {
29 o_expected := int2oct(i,1);
33 setverdict(pass,"unichar2oct(",u1,") encoding ok");
35 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
41 setverdict(pass,"matched :", match(u2,u1));
43 setverdict(fail,"unmatched :", match(u2,u1));
46 if( get_stringencoding(o) == "ASCII" ) {
47 setverdict(pass,"encoding type ok");
49 setverdict(fail,"encoding type nok, expected: UTF-8, received ", get_stringencoding(o) );
55 //00-0F UTF-8 encoding octetstring to unicode
56 testcase tc_UTF8_encodeDecode_BMP1_explicit() runs on MTC {
57 var octetstring o:=''O;
58 var octetstring o_expected:=''O;
59 var universal charstring u1 := char(0,0,0,0); //expected
60 var universal charstring u2 := char(0,0,0,0); //converted
61 for(var integer i:=0;i<128;i:=i+1) {
63 //unichar2oct, UTF-8 no bom
65 o_expected := int2oct(i,1);
67 o:=unichar2oct(u1,"UTF-8"); //no BOM
69 setverdict(pass,"unichar2oct(",u1,",\"UTF-8\") encoding is ok");
71 setverdict(fail,"unichar2oct(",u1,"\"UTF-8\") encoding is nok");
74 u2 := oct2unichar(o); //default:UTF-8 no bom
79 setverdict(fail,"unmatched :", match(u2,u1));
82 u2 := oct2unichar(o,"UTF-8"); //
87 setverdict(fail,"unmatched :", match(u2,u1));
91 //unichar2oct, UTF-8 BOM
92 o_expected := c_BOM_UTF8 & o_expected; //bom
93 o:=unichar2oct(u1,"UTF-8 BOM")
95 setverdict(pass,"unichar2oct(",u1,") encoding ok");
97 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected ", o_expected,"received ",o );
101 u2 := oct2unichar(o,"UTF-8");
106 setverdict(fail,"unmatched :", match(u2,u1));
110 if( get_stringencoding(o) == "UTF-8" ) {
111 setverdict(pass,"encoding type ok");
113 setverdict(fail,"encoding type nok");
116 if( unichar2oct(u2, "UTF-8 BOM") == o){
117 setverdict(pass, "converting back to octetstring is ok")
119 setverdict(fail,"converting back to octetstring is nok, expected: ",o , " received:",unichar2oct(u2) );
125 //0080-0800 UTF-8 encoding octetstring to unicode
126 // UTF-8, 2 bytes long
127 testcase tc_UTF8_encodeDecode_BMP2_implicit() runs on MTC {
128 var octetstring o:=''O;
129 var octetstring o_expected:=''O;
130 var universal charstring u1 := char(0,0,0,0);
131 var universal charstring u2 := char(0,0,0,0);
132 const integer L := oct2int('0080'O);
133 const integer N := oct2int('0800'O);
134 var bitstring bs :=''B;
135 for( var integer i:=L; i<N; i:=i+9 ) {
136 u1 := int2unichar(i);
138 o_expected := bit2oct('110'B & substr(bs,0,5)&'10'B & substr(bs,5,6));
139 log("i=",i," utf-8 string:", o_expected);
143 setverdict(pass,"unichar2oct(",u1,") encoding ok");
145 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
149 u2 := oct2unichar(o);
154 setverdict(fail,"unmatched :", match( u2, u1 ));
158 if( get_stringencoding(o) == "UTF-8" ) {
159 setverdict(pass,"encoding type ok");
161 setverdict(fail,"encoding type nok");
166 //0080-0800 UTF-8 encoding unicode to octetstring and octetstring to unicode
167 // UTF-8, 2 bytes long
168 // encoding type explicite given
169 testcase tc_UTF8_encodeDecode_BMP2_explicit() runs on MTC {
170 var octetstring o:=''O;
171 var octetstring o_expected:=''O;
172 var universal charstring u1 := char(0,0,0,0);
173 var universal charstring u2 := char(0,0,0,0);
174 const integer L := oct2int('0080'O);
175 const integer N := oct2int('0800'O);
176 var bitstring bs :=''B;
177 for( var integer i:=L; i<N; i:=i+9 ) {
178 u1 := int2unichar(i);
180 log("i=",i," utf-8 string:",'110'B & substr(bs,0,5)&'10'B & substr(bs,5,6));
181 o_expected := bit2oct('110'B & substr(bs,0,5)&'10'B & substr(bs,5,6));
182 log("utf-8 string without bom:",o_expected);
183 o:=unichar2oct(u1,"UTF-8"); //means: without bom
185 setverdict(pass,"unichar2oct(",u1,",\"UTF-8\") encoding ok");
187 setverdict(fail,"unichar2oct(",u1,",\"UTF-8\") encoding nok, expected: ",o_expected," received: ", o);
190 u2 := oct2unichar(o, "UTF-8");
195 setverdict(fail,"unmatched :", match( u2, u1 ));
198 if( get_stringencoding(o) == "UTF-8" ) {
199 setverdict(pass,"encoding type ok");
201 setverdict(fail,"encoding type nok");
205 o_expected := c_BOM_UTF8 & o_expected;
206 o:=unichar2oct(u1,"UTF-8 BOM");
208 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
210 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
213 u2 := oct2unichar(o, "UTF-8");
218 setverdict(fail,"unmatched :", match( u2, u1 ));
221 if( get_stringencoding(o) == "UTF-8" ) {
222 setverdict(pass,"encoding type ok");
224 setverdict(fail,"encoding type nok");
229 //0800-FFFF UTF-8 encoding octetstring to unicode
230 // UTF-8, 3 bytes long
231 testcase tc_UTF8_encodeDecode_BMP3_implicit() runs on MTC {
232 var octetstring o:=''O;
233 var octetstring o_expected:=''O;
234 var universal charstring u1 := char(0,0,0,0); //expected
235 var universal charstring u2 := char(0,0,0,0); //converted
236 const integer L := oct2int( '0800'O);
237 const integer N := oct2int('010000'O);
238 var bitstring bs :=''B;
239 for( var integer i:=L; i<N; i:=i+10027 ) {
240 u1 := int2unichar(i);
242 bs := '1110'B & substr(bs,0,4)&'10'B & substr(bs,4,6) &'10'B & substr(bs,10,6);
243 log("i=",i," utf-8 string:", bs);
244 o_expected := bit2oct(bs);
247 setverdict(pass,"unichar2oct(",u1,") encoding ok");
249 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
253 u2 := oct2unichar(o);
258 setverdict(fail,"unmatched :", match( u2, u1 ));
262 if( get_stringencoding(o) == "UTF-8" ) {
263 setverdict(pass,"encoding type ok");
265 setverdict(fail,"encoding type nok");
270 //0800-FFFF UTF-8 encoding octetstring to unicode
271 // UTF-8, 3 bytes long
272 testcase tc_UTF8_encodeDecode_BMP3_explicit() runs on MTC {
273 var octetstring o:=''O;
274 var octetstring o_expected:=''O;
275 var universal charstring u1 := char(0,0,0,0); //expected
276 var universal charstring u2 := char(0,0,0,0); //converted
277 const integer L := oct2int( '0800'O);
278 const integer N := oct2int('010000'O);
279 var bitstring bs :=''B;
280 for( var integer i:=L; i<N; i:=i+10027 ) {
281 u1 := int2unichar(i);
283 bs := '1110'B & substr(bs,0,4)&'10'B & substr(bs,4,6) &'10'B & substr(bs,10,6);
284 log("i=",i," utf-8 string:", bs);
285 o_expected := bit2oct(bs);
286 o:=unichar2oct(u1,"UTF-8");
288 setverdict(pass,"unichar2oct(",u1,") encoding ok");
290 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
294 u2 := oct2unichar(o,"UTF-8");
299 setverdict(fail,"unmatched :", match( u2, u1 ));
303 if( get_stringencoding(o) == "UTF-8" ) {
304 setverdict(pass,"encoding type ok");
306 setverdict(fail,"encoding type nok");
310 o_expected := c_BOM_UTF8 & o_expected;
311 o:=unichar2oct(u1,"UTF-8 BOM");
313 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
315 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
318 u2 := oct2unichar(o, "UTF-8");
323 setverdict(fail,"unmatched :", match( u2, u1 ));
326 if( get_stringencoding(o) == "UTF-8" ) {
327 setverdict(pass,"encoding type ok");
329 setverdict(fail,"encoding type nok");
334 //10000-20 0000 UTF-8 encoding octetstring to unicode
335 // UTF-8, 4 bytes long
336 testcase tc_UTF8_encodeDecode_BMP4_implicit() runs on MTC {
337 var octetstring o:=''O;
338 var octetstring o_expected:=''O;
339 var universal charstring u1 := char(0,0,0,0); // expected encoded value
340 var universal charstring u2 := char(0,0,0,0); //decoded value
341 const integer L := oct2int( '010000'O);
342 const integer N := oct2int('200000'O);
343 var bitstring bs :=''B;
344 for( var integer i:=L; i<N; i:=i+10023 ) {
345 u1 := int2unichar(i);
347 bs := '11110'B & substr(bs,0,3)&'10'B & substr(bs,3,6) &'10'B & substr(bs,9,6) & '10'B & substr(bs,15,6);
348 log("i=",i," utf-8 string:", bs);
349 o_expected := bit2oct(bs);
352 setverdict(pass,"unichar2oct(",u1,") encoding ok");
354 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
358 u2 := oct2unichar(o);
363 setverdict(fail,"unmatched :", match( u2, u1 ));
367 if( get_stringencoding(o) == "UTF-8" ) {
368 setverdict(pass,"encoding type ok");
370 setverdict(fail,"encoding type nok");
376 testcase tc_UTF8_encodeDecode_BMP4_explicit() runs on MTC {
377 var octetstring o:=''O;
378 var octetstring o_expected:=''O;
379 var universal charstring u1 := char(0,0,0,0); // expected encoded value
380 var universal charstring u2 := char(0,0,0,0); //decoded value
381 const integer L := oct2int( '010000'O);
382 const integer N := oct2int('200000'O);
383 var bitstring bs :=''B;
384 for( var integer i:=L; i<N; i:=i+10023 ) {
385 u1 := int2unichar(i);
387 bs := '11110'B & substr(bs,0,3)&'10'B & substr(bs,3,6) &'10'B & substr(bs,9,6) & '10'B & substr(bs,15,6);
388 log("i=",i," utf-8 string:", bs);
389 o_expected := bit2oct(bs);
390 o:=unichar2oct(u1,"UTF-8");
392 setverdict(pass,"unichar2oct(",u1,") encoding ok");
394 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
397 u2 := oct2unichar(o,"UTF-8");
402 setverdict(fail,"unmatched :", match( u2, u1 ));
406 if( get_stringencoding(o) == "UTF-8" ) {
407 setverdict(pass,"encoding type ok");
409 setverdict(fail,"encoding type nok");
413 o_expected := c_BOM_UTF8 & o_expected;
414 o:=unichar2oct(u1,"UTF-8 BOM");
416 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
418 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
421 u2 := oct2unichar(o, "UTF-8");
426 setverdict(fail,"unmatched :", match( u2, u1 ));
429 if( get_stringencoding(o) == "UTF-8" ) {
430 setverdict(pass,"encoding type ok");
432 setverdict(fail,"encoding type nok");
439 //20 0000 - 0400 0000 UTF-8 encoding octetstring to unicode
440 // UTF-8, 5 bytes long
441 testcase tc_UTF8_encodeDecode_BMP5_implicit() runs on MTC {
442 var octetstring o:=''O;
443 var octetstring o_expected:=''O;
444 var universal charstring u1 := char(0,0,0,0); // expected encoded value
445 var universal charstring u2 := char(0,0,0,0); // encoded value
446 const integer L := oct2int( '200000'O );
447 const integer N := oct2int('04000000'O);
448 var bitstring bs :=''B;
449 for( var integer i:=L; i<N; i:=i+10000 ) {
450 u1 := int2unichar(i);
452 bs := '111110'B & substr(bs,0,2)&'10'B & substr(bs,2,6) &'10'B & substr(bs,8,6) & '10'B & substr(bs,14,6)& '10'B & substr(bs,20,6);
453 log("i=",i," utf-8 string:", bs);
454 o_expected := bit2oct(bs);
457 setverdict(pass,"unichar2oct(",u1,") encoding ok");
459 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
463 u2 := oct2unichar(o);
468 setverdict(fail,"unmatched :", match( u2, u1 ));
472 if( get_stringencoding(o) == "UTF-8" ) {
473 setverdict(pass,"encoding type ok");
475 setverdict(fail,"encoding type nok");
481 //20 0000 - 0400 0000 UTF-8 encoding octetstring to unicode
482 // UTF-8, 5 bytes long
483 testcase tc_UTF8_encodeDecode_BMP5_explicit() runs on MTC {
484 var octetstring o:=''O;
485 var octetstring o_expected:=''O;
486 var universal charstring u1 := char(0,0,0,0); // expected encoded value
487 var universal charstring u2 := char(0,0,0,0); // encoded value
488 const integer L := oct2int( '200000'O );
489 const integer N := oct2int('04000000'O);
490 var bitstring bs :=''B;
491 for( var integer i:=L; i<N; i:=i+10000 ) {
492 u1 := int2unichar(i);
494 bs := '111110'B & substr(bs,0,2)&'10'B & substr(bs,2,6) &'10'B & substr(bs,8,6) & '10'B & substr(bs,14,6)& '10'B & substr(bs,20,6);
495 log("i=",i," utf-8 string:", bs);
496 o_expected := bit2oct(bs);
497 o:=unichar2oct(u1,"UTF-8");
499 setverdict(pass,"unichar2oct(",u1,") encoding ok");
501 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
504 u2 := oct2unichar(o,"UTF-8");
509 setverdict(fail,"unmatched :", match( u2, u1 ));
513 if( get_stringencoding(o) == "UTF-8" ) {
514 setverdict(pass,"encoding type ok");
516 setverdict(fail,"encoding type nok");
520 o_expected := c_BOM_UTF8 & o_expected;
521 o:=unichar2oct(u1,"UTF-8 BOM");
523 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
525 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
528 u2 := oct2unichar(o, "UTF-8");
533 setverdict(fail,"unmatched :", match( u2, u1 ));
536 if( get_stringencoding(o) == "UTF-8" ) {
537 setverdict(pass,"encoding type ok");
539 setverdict(fail,"encoding type nok");
544 testcase tc_remove_bom_utf8() runs on MTC {
545 var octetstring o1:= 'EFBBBF7F'O;
546 var octetstring o2:= remove_bom(o1);
555 testcase tc_UTF8_negative() runs on MTC {
556 f_oct2unichar_negativetest(
559 expectedErrorMsg:="Malformed: At character position 0, octet position 0: unused/reserved octet FF.");
561 f_oct2unichar_negativetest(
563 encType := "UTF-8BE",
564 expectedErrorMsg:="oct2unichar: Invalid parameter: UTF-8BE");
566 f_oct2unichar_negativetest(
568 encType := "UTF-8 BOM",
569 expectedErrorMsg:="oct2unichar: Invalid parameter: UTF-8 BOM");
571 f_oct2unichar_negativetest(
574 expectedErrorMsg:="oct2unichar: Invalid parameter: Oracle");
579 execute(tc_UTF8_encodeDecode_BMP1_implicit());
580 execute(tc_UTF8_encodeDecode_BMP1_explicit());
581 execute(tc_UTF8_encodeDecode_BMP2_implicit());
582 execute(tc_UTF8_encodeDecode_BMP2_explicit());
583 execute(tc_UTF8_encodeDecode_BMP3_implicit());
584 execute(tc_UTF8_encodeDecode_BMP3_explicit());
585 execute(tc_UTF8_encodeDecode_BMP4_implicit());
586 execute(tc_UTF8_encodeDecode_BMP4_explicit());
587 execute(tc_UTF8_encodeDecode_BMP5_implicit());
588 execute(tc_UTF8_encodeDecode_BMP5_explicit());
589 execute(tc_remove_bom_utf8());
590 execute(tc_UTF8_negative());