Sync with 5.4.0
[deliverable/titan.core.git] / regression_test / ucharstrOper / UTF8Testcases.ttcn
1 /******************************************************************************
2 * Copyright (c) 2000-2015 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 ******************************************************************************/
8 // This module tests the following functions for UTF-8:
9 // oct2unichar
10 // unichar2oct
11 // get_stringencoding
12 module UTF8Testcases {
13
14 import from Common all;
15
16 const octetstring c_BOM_UTF8 :='EFBBBF'O
17 //type component MTC {}
18
19 //One letter tests
20 //00-0F UTF-8 decoding octetstring to unicode
21 //decoding is UTF-8 as default encoding
22 testcase tc_UTF8_encodeDecode_BMP1_implicit() runs on MTC {
23 var octetstring o:=''O;
24 var octetstring o_expected:=''O;
25 var universal charstring u1 := char(0,0,0,0);
26 var universal charstring u2 := char(0,0,0,0); //converted
27 for(var integer i:=0;i<128;i:=i+1) {
28 u1 := int2unichar(i);
29 o_expected := int2oct(i,1);
30 log(o_expected);
31 o:=unichar2oct(u1);
32 if(o==o_expected) {
33 setverdict(pass,"unichar2oct(",u1,") encoding ok");
34 } else {
35 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
36 }
37
38 u2:= oct2unichar(o)
39 log(u2);
40 if(match(u2,u1)) {
41 setverdict(pass,"matched :", match(u2,u1));
42 } else {
43 setverdict(fail,"unmatched :", match(u2,u1));
44 }
45
46 if( get_stringencoding(o) == "ASCII" ) {
47 setverdict(pass,"encoding type ok");
48 } else {
49 setverdict(fail,"encoding type nok, expected: UTF-8, received ", get_stringencoding(o) );
50 }
51
52 }
53 }
54
55 //00-0F UTF-8 encoding octetstring to unicode
56 testcase tc_UTF8_encodeDecode_BMP1_explicit() runs on MTC {
57 var octetstring o:=''O;
58 var octetstring o_expected:=''O;
59 var universal charstring u1 := char(0,0,0,0); //expected
60 var universal charstring u2 := char(0,0,0,0); //converted
61 for(var integer i:=0;i<128;i:=i+1) {
62
63 //unichar2oct, UTF-8 no bom
64 u1 := int2unichar(i);
65 o_expected := int2oct(i,1);
66 log(o_expected);
67 o:=unichar2oct(u1,"UTF-8"); //no BOM
68 if(o==o_expected) {
69 setverdict(pass,"unichar2oct(",u1,",\"UTF-8\") encoding is ok");
70 } else {
71 setverdict(fail,"unichar2oct(",u1,"\"UTF-8\") encoding is nok");
72 }
73 log(o);
74 u2 := oct2unichar(o); //default:UTF-8 no bom
75 log(u2);
76 if(match(u2,u1)) {
77 setverdict(pass);
78 } else {
79 setverdict(fail,"unmatched :", match(u2,u1));
80 }
81
82 u2 := oct2unichar(o,"UTF-8"); //
83 log(u2);
84 if(match(u2,u1)) {
85 setverdict(pass);
86 } else {
87 setverdict(fail,"unmatched :", match(u2,u1));
88 }
89
90
91 //unichar2oct, UTF-8 BOM
92 o_expected := c_BOM_UTF8 & o_expected; //bom
93 o:=unichar2oct(u1,"UTF-8 BOM")
94 if(o==o_expected) {
95 setverdict(pass,"unichar2oct(",u1,") encoding ok");
96 } else {
97 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected ", o_expected,"received ",o );
98 }
99
100 log(o);
101 u2 := oct2unichar(o,"UTF-8");
102 log(u2);
103 if(match(u2,u1)) {
104 setverdict(pass);
105 } else {
106 setverdict(fail,"unmatched :", match(u2,u1));
107 }
108
109
110 if( get_stringencoding(o) == "UTF-8" ) {
111 setverdict(pass,"encoding type ok");
112 } else {
113 setverdict(fail,"encoding type nok");
114 }
115
116 if( unichar2oct(u2, "UTF-8 BOM") == o){
117 setverdict(pass, "converting back to octetstring is ok")
118 } else {
119 setverdict(fail,"converting back to octetstring is nok, expected: ",o , " received:",unichar2oct(u2) );
120 }
121 }
122 }
123
124
125 //0080-0800 UTF-8 encoding octetstring to unicode
126 // UTF-8, 2 bytes long
127 testcase tc_UTF8_encodeDecode_BMP2_implicit() runs on MTC {
128 var octetstring o:=''O;
129 var octetstring o_expected:=''O;
130 var universal charstring u1 := char(0,0,0,0);
131 var universal charstring u2 := char(0,0,0,0);
132 const integer L := oct2int('0080'O);
133 const integer N := oct2int('0800'O);
134 var bitstring bs :=''B;
135 for( var integer i:=L; i<N; i:=i+9 ) {
136 u1 := int2unichar(i);
137 bs := int2bit(i,11);
138 o_expected := bit2oct('110'B & substr(bs,0,5)&'10'B & substr(bs,5,6));
139 log("i=",i," utf-8 string:", o_expected);
140
141 o:=unichar2oct(u1);
142 if(o==o_expected) {
143 setverdict(pass,"unichar2oct(",u1,") encoding ok");
144 } else {
145 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
146 }
147
148 //implicit
149 u2 := oct2unichar(o);
150 log( u2 );
151 if(match( u2, u1)) {
152 setverdict(pass);
153 } else {
154 setverdict(fail,"unmatched :", match( u2, u1 ));
155 }
156
157
158 if( get_stringencoding(o) == "UTF-8" ) {
159 setverdict(pass,"encoding type ok");
160 } else {
161 setverdict(fail,"encoding type nok");
162 }
163 }
164 }
165
166 //0080-0800 UTF-8 encoding unicode to octetstring and octetstring to unicode
167 // UTF-8, 2 bytes long
168 // encoding type explicite given
169 testcase tc_UTF8_encodeDecode_BMP2_explicit() runs on MTC {
170 var octetstring o:=''O;
171 var octetstring o_expected:=''O;
172 var universal charstring u1 := char(0,0,0,0);
173 var universal charstring u2 := char(0,0,0,0);
174 const integer L := oct2int('0080'O);
175 const integer N := oct2int('0800'O);
176 var bitstring bs :=''B;
177 for( var integer i:=L; i<N; i:=i+9 ) {
178 u1 := int2unichar(i);
179 bs := int2bit(i,11);
180 log("i=",i," utf-8 string:",'110'B & substr(bs,0,5)&'10'B & substr(bs,5,6));
181 o_expected := bit2oct('110'B & substr(bs,0,5)&'10'B & substr(bs,5,6));
182 log("utf-8 string without bom:",o_expected);
183 o:=unichar2oct(u1,"UTF-8"); //means: without bom
184 if(o==o_expected) {
185 setverdict(pass,"unichar2oct(",u1,",\"UTF-8\") encoding ok");
186 } else {
187 setverdict(fail,"unichar2oct(",u1,",\"UTF-8\") encoding nok, expected: ",o_expected," received: ", o);
188 }
189
190 u2 := oct2unichar(o, "UTF-8");
191 log( u2 );
192 if(match( u2, u1)) {
193 setverdict(pass);
194 } else {
195 setverdict(fail,"unmatched :", match( u2, u1 ));
196 }
197
198 if( get_stringencoding(o) == "UTF-8" ) {
199 setverdict(pass,"encoding type ok");
200 } else {
201 setverdict(fail,"encoding type nok");
202 }
203
204 // with bom:
205 o_expected := c_BOM_UTF8 & o_expected;
206 o:=unichar2oct(u1,"UTF-8 BOM");
207 if(o==o_expected) {
208 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
209 } else {
210 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
211 }
212
213 u2 := oct2unichar(o, "UTF-8");
214 log( u2 );
215 if(match( u2, u1)) {
216 setverdict(pass);
217 } else {
218 setverdict(fail,"unmatched :", match( u2, u1 ));
219 }
220
221 if( get_stringencoding(o) == "UTF-8" ) {
222 setverdict(pass,"encoding type ok");
223 } else {
224 setverdict(fail,"encoding type nok");
225 }
226 }
227 }
228
229 //0800-FFFF UTF-8 encoding octetstring to unicode
230 // UTF-8, 3 bytes long
231 testcase tc_UTF8_encodeDecode_BMP3_implicit() runs on MTC {
232 var octetstring o:=''O;
233 var octetstring o_expected:=''O;
234 var universal charstring u1 := char(0,0,0,0); //expected
235 var universal charstring u2 := char(0,0,0,0); //converted
236 const integer L := oct2int( '0800'O);
237 const integer N := oct2int('010000'O);
238 var bitstring bs :=''B;
239 for( var integer i:=L; i<N; i:=i+10027 ) {
240 u1 := int2unichar(i);
241 bs := int2bit(i,16);
242 bs := '1110'B & substr(bs,0,4)&'10'B & substr(bs,4,6) &'10'B & substr(bs,10,6);
243 log("i=",i," utf-8 string:", bs);
244 o_expected := bit2oct(bs);
245 o:=unichar2oct(u1);
246 if(o==o_expected) {
247 setverdict(pass,"unichar2oct(",u1,") encoding ok");
248 } else {
249 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
250 }
251
252 //implicit
253 u2 := oct2unichar(o);
254 log( u2 );
255 if(match( u2, u1)) {
256 setverdict(pass);
257 } else {
258 setverdict(fail,"unmatched :", match( u2, u1 ));
259 }
260
261
262 if( get_stringencoding(o) == "UTF-8" ) {
263 setverdict(pass,"encoding type ok");
264 } else {
265 setverdict(fail,"encoding type nok");
266 }
267 }
268 }
269
270 //0800-FFFF UTF-8 encoding octetstring to unicode
271 // UTF-8, 3 bytes long
272 testcase tc_UTF8_encodeDecode_BMP3_explicit() runs on MTC {
273 var octetstring o:=''O;
274 var octetstring o_expected:=''O;
275 var universal charstring u1 := char(0,0,0,0); //expected
276 var universal charstring u2 := char(0,0,0,0); //converted
277 const integer L := oct2int( '0800'O);
278 const integer N := oct2int('010000'O);
279 var bitstring bs :=''B;
280 for( var integer i:=L; i<N; i:=i+10027 ) {
281 u1 := int2unichar(i);
282 bs := int2bit(i,16);
283 bs := '1110'B & substr(bs,0,4)&'10'B & substr(bs,4,6) &'10'B & substr(bs,10,6);
284 log("i=",i," utf-8 string:", bs);
285 o_expected := bit2oct(bs);
286 o:=unichar2oct(u1,"UTF-8");
287 if(o==o_expected) {
288 setverdict(pass,"unichar2oct(",u1,") encoding ok");
289 } else {
290 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
291 }
292
293
294 u2 := oct2unichar(o,"UTF-8");
295 log( u2 );
296 if(match( u2, u1)) {
297 setverdict(pass);
298 } else {
299 setverdict(fail,"unmatched :", match( u2, u1 ));
300 }
301
302
303 if( get_stringencoding(o) == "UTF-8" ) {
304 setverdict(pass,"encoding type ok");
305 } else {
306 setverdict(fail,"encoding type nok");
307 }
308
309 //with bom:
310 o_expected := c_BOM_UTF8 & o_expected;
311 o:=unichar2oct(u1,"UTF-8 BOM");
312 if(o==o_expected) {
313 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
314 } else {
315 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
316 }
317
318 u2 := oct2unichar(o, "UTF-8");
319 log( u2 );
320 if(match( u2, u1)) {
321 setverdict(pass);
322 } else {
323 setverdict(fail,"unmatched :", match( u2, u1 ));
324 }
325
326 if( get_stringencoding(o) == "UTF-8" ) {
327 setverdict(pass,"encoding type ok");
328 } else {
329 setverdict(fail,"encoding type nok");
330 }
331 }
332 }
333
334 //10000-20 0000 UTF-8 encoding octetstring to unicode
335 // UTF-8, 4 bytes long
336 testcase tc_UTF8_encodeDecode_BMP4_implicit() runs on MTC {
337 var octetstring o:=''O;
338 var octetstring o_expected:=''O;
339 var universal charstring u1 := char(0,0,0,0); // expected encoded value
340 var universal charstring u2 := char(0,0,0,0); //decoded value
341 const integer L := oct2int( '010000'O);
342 const integer N := oct2int('200000'O);
343 var bitstring bs :=''B;
344 for( var integer i:=L; i<N; i:=i+10023 ) {
345 u1 := int2unichar(i);
346 bs := int2bit(i,21);
347 bs := '11110'B & substr(bs,0,3)&'10'B & substr(bs,3,6) &'10'B & substr(bs,9,6) & '10'B & substr(bs,15,6);
348 log("i=",i," utf-8 string:", bs);
349 o_expected := bit2oct(bs);
350 o:=unichar2oct(u1);
351 if(o==o_expected) {
352 setverdict(pass,"unichar2oct(",u1,") encoding ok");
353 } else {
354 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
355 }
356
357 //implicit
358 u2 := oct2unichar(o);
359 log( u2 );
360 if(match( u2, u1)) {
361 setverdict(pass);
362 } else {
363 setverdict(fail,"unmatched :", match( u2, u1 ));
364 }
365
366
367 if( get_stringencoding(o) == "UTF-8" ) {
368 setverdict(pass,"encoding type ok");
369 } else {
370 setverdict(fail,"encoding type nok");
371 }
372
373 }
374 }
375
376 testcase tc_UTF8_encodeDecode_BMP4_explicit() runs on MTC {
377 var octetstring o:=''O;
378 var octetstring o_expected:=''O;
379 var universal charstring u1 := char(0,0,0,0); // expected encoded value
380 var universal charstring u2 := char(0,0,0,0); //decoded value
381 const integer L := oct2int( '010000'O);
382 const integer N := oct2int('200000'O);
383 var bitstring bs :=''B;
384 for( var integer i:=L; i<N; i:=i+10023 ) {
385 u1 := int2unichar(i);
386 bs := int2bit(i,21);
387 bs := '11110'B & substr(bs,0,3)&'10'B & substr(bs,3,6) &'10'B & substr(bs,9,6) & '10'B & substr(bs,15,6);
388 log("i=",i," utf-8 string:", bs);
389 o_expected := bit2oct(bs);
390 o:=unichar2oct(u1,"UTF-8");
391 if(o==o_expected) {
392 setverdict(pass,"unichar2oct(",u1,") encoding ok");
393 } else {
394 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
395 }
396
397 u2 := oct2unichar(o,"UTF-8");
398 log( u2 );
399 if(match( u2, u1)) {
400 setverdict(pass);
401 } else {
402 setverdict(fail,"unmatched :", match( u2, u1 ));
403 }
404
405
406 if( get_stringencoding(o) == "UTF-8" ) {
407 setverdict(pass,"encoding type ok");
408 } else {
409 setverdict(fail,"encoding type nok");
410 }
411
412 //with bom:
413 o_expected := c_BOM_UTF8 & o_expected;
414 o:=unichar2oct(u1,"UTF-8 BOM");
415 if(o==o_expected) {
416 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
417 } else {
418 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
419 }
420
421 u2 := oct2unichar(o, "UTF-8");
422 log( u2 );
423 if(match( u2, u1)) {
424 setverdict(pass);
425 } else {
426 setverdict(fail,"unmatched :", match( u2, u1 ));
427 }
428
429 if( get_stringencoding(o) == "UTF-8" ) {
430 setverdict(pass,"encoding type ok");
431 } else {
432 setverdict(fail,"encoding type nok");
433 }
434
435 }
436 }
437
438
439 //20 0000 - 0400 0000 UTF-8 encoding octetstring to unicode
440 // UTF-8, 5 bytes long
441 testcase tc_UTF8_encodeDecode_BMP5_implicit() runs on MTC {
442 var octetstring o:=''O;
443 var octetstring o_expected:=''O;
444 var universal charstring u1 := char(0,0,0,0); // expected encoded value
445 var universal charstring u2 := char(0,0,0,0); // encoded value
446 const integer L := oct2int( '200000'O );
447 const integer N := oct2int('04000000'O);
448 var bitstring bs :=''B;
449 for( var integer i:=L; i<N; i:=i+10000 ) {
450 u1 := int2unichar(i);
451 bs := int2bit(i,26);
452 bs := '111110'B & substr(bs,0,2)&'10'B & substr(bs,2,6) &'10'B & substr(bs,8,6) & '10'B & substr(bs,14,6)& '10'B & substr(bs,20,6);
453 log("i=",i," utf-8 string:", bs);
454 o_expected := bit2oct(bs);
455 o:=unichar2oct(u1);
456 if(o==o_expected) {
457 setverdict(pass,"unichar2oct(",u1,") encoding ok");
458 } else {
459 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
460 }
461
462 //implicit
463 u2 := oct2unichar(o);
464 log( u2 );
465 if(match( u2, u1)) {
466 setverdict(pass);
467 } else {
468 setverdict(fail,"unmatched :", match( u2, u1 ));
469 }
470
471
472 if( get_stringencoding(o) == "UTF-8" ) {
473 setverdict(pass,"encoding type ok");
474 } else {
475 setverdict(fail,"encoding type nok");
476 }
477 }
478 }
479
480
481 //20 0000 - 0400 0000 UTF-8 encoding octetstring to unicode
482 // UTF-8, 5 bytes long
483 testcase tc_UTF8_encodeDecode_BMP5_explicit() runs on MTC {
484 var octetstring o:=''O;
485 var octetstring o_expected:=''O;
486 var universal charstring u1 := char(0,0,0,0); // expected encoded value
487 var universal charstring u2 := char(0,0,0,0); // encoded value
488 const integer L := oct2int( '200000'O );
489 const integer N := oct2int('04000000'O);
490 var bitstring bs :=''B;
491 for( var integer i:=L; i<N; i:=i+10000 ) {
492 u1 := int2unichar(i);
493 bs := int2bit(i,26);
494 bs := '111110'B & substr(bs,0,2)&'10'B & substr(bs,2,6) &'10'B & substr(bs,8,6) & '10'B & substr(bs,14,6)& '10'B & substr(bs,20,6);
495 log("i=",i," utf-8 string:", bs);
496 o_expected := bit2oct(bs);
497 o:=unichar2oct(u1,"UTF-8");
498 if(o==o_expected) {
499 setverdict(pass,"unichar2oct(",u1,") encoding ok");
500 } else {
501 setverdict(fail,"unichar2oct(",u1,") encoding nok, expected: ",o_expected," received: ", o);
502 }
503
504 u2 := oct2unichar(o,"UTF-8");
505 log( u2 );
506 if(match( u2, u1)) {
507 setverdict(pass);
508 } else {
509 setverdict(fail,"unmatched :", match( u2, u1 ));
510 }
511
512
513 if( get_stringencoding(o) == "UTF-8" ) {
514 setverdict(pass,"encoding type ok");
515 } else {
516 setverdict(fail,"encoding type nok");
517 }
518
519 //with bom:
520 o_expected := c_BOM_UTF8 & o_expected;
521 o:=unichar2oct(u1,"UTF-8 BOM");
522 if(o==o_expected) {
523 setverdict(pass,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding ok");
524 } else {
525 setverdict(fail,"unichar2oct(",u1,",\"UTF-8 BOM\") encoding nok, expected: ",o_expected," received: ", o);
526 }
527
528 u2 := oct2unichar(o, "UTF-8");
529 log( u2 );
530 if(match( u2, u1)) {
531 setverdict(pass);
532 } else {
533 setverdict(fail,"unmatched :", match( u2, u1 ));
534 }
535
536 if( get_stringencoding(o) == "UTF-8" ) {
537 setverdict(pass,"encoding type ok");
538 } else {
539 setverdict(fail,"encoding type nok");
540 }
541 }
542 }
543
544 testcase tc_remove_bom_utf8() runs on MTC {
545 var octetstring o1:= 'EFBBBF7F'O;
546 var octetstring o2:= remove_bom(o1);
547 if( o2 == '7F'O) {
548 setverdict(pass);
549 } else {
550 setverdict(fail);
551 }
552 }
553
554
555 testcase tc_UTF8_negative() runs on MTC {
556 f_oct2unichar_negativetest(
557 inputOs :='FFFF'O,
558 encType := "UTF-8",
559 expectedErrorMsg:="Malformed: At character position 0, octet position 0: unused/reserved octet FF.");
560
561 f_oct2unichar_negativetest(
562 inputOs :='FFFF'O,
563 encType := "UTF-8BE",
564 expectedErrorMsg:="oct2unichar: Invalid parameter: UTF-8BE");
565
566 f_oct2unichar_negativetest(
567 inputOs :='FFFF'O,
568 encType := "UTF-8 BOM",
569 expectedErrorMsg:="oct2unichar: Invalid parameter: UTF-8 BOM");
570
571 f_oct2unichar_negativetest(
572 inputOs :='FFFF'O,
573 encType := "Oracle",
574 expectedErrorMsg:="oct2unichar: Invalid parameter: Oracle");
575 }
576
577
578 control {
579 execute(tc_UTF8_encodeDecode_BMP1_implicit());
580 execute(tc_UTF8_encodeDecode_BMP1_explicit());
581 execute(tc_UTF8_encodeDecode_BMP2_implicit());
582 execute(tc_UTF8_encodeDecode_BMP2_explicit());
583 execute(tc_UTF8_encodeDecode_BMP3_implicit());
584 execute(tc_UTF8_encodeDecode_BMP3_explicit());
585 execute(tc_UTF8_encodeDecode_BMP4_implicit());
586 execute(tc_UTF8_encodeDecode_BMP4_explicit());
587 execute(tc_UTF8_encodeDecode_BMP5_implicit());
588 execute(tc_UTF8_encodeDecode_BMP5_explicit());
589 execute(tc_remove_bom_utf8());
590 execute(tc_UTF8_negative());
591 }
592 }
This page took 0.082853 seconds and 5 git commands to generate.