Commit | Line | Data |
---|---|---|
970ed795 EL |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Copyright (c) 2000-2014 Ericsson Telecom AB | |
3 | // All rights reserved. This program and the accompanying materials | |
4 | // are made available under the terms of the Eclipse Public License v1.0 | |
5 | // which accompanies this distribution, and is available at | |
6 | // http://www.eclipse.org/legal/epl-v10.html | |
7 | /////////////////////////////////////////////////////////////////////////////// | |
8 | #include "PredefFunc.hh" | |
9 | #include "error.h" | |
10 | #include "Int.hh" | |
11 | #include "Real.hh" | |
12 | #include "Setting.hh" | |
13 | #include "string.hh" | |
14 | #include "ustring.hh" | |
15 | #include "CompilerError.hh" | |
16 | #include <stdio.h> | |
17 | #include <sys/types.h> | |
18 | #include <regex.h> | |
19 | #include <stdint.h> | |
20 | #include "../common/memory.h" | |
21 | #include "../common/pattern.hh" | |
22 | #include <iostream> | |
23 | ||
24 | // used by regex | |
25 | #define ERRMSG_BUFSIZE 512 | |
26 | ||
27 | namespace Common { | |
28 | ||
29 | static const char utf32be[] = {'0','0','0','0','F','E','F','F',0}; | |
30 | static const char utf32le[] = {'F','F','F','E','0','0','0','0',0}; | |
31 | static const char utf16be[] = {'F','E','F','F',0}; | |
32 | static const char utf16le[] = {'F','F','F','E',0}; | |
33 | static const char utf8[] = {'E','F','B','B','B','F',0}; | |
34 | ||
35 | static inline unsigned char get_bit_value(char c, unsigned char bit_value) | |
36 | { | |
37 | switch (c) { | |
38 | case '0': | |
39 | return 0; | |
40 | case '1': | |
41 | return bit_value; | |
42 | default: | |
43 | FATAL_ERROR("Invalid binary digit (%c) in bitstring value", c); | |
44 | return 0; | |
45 | } | |
46 | } | |
47 | ||
48 | char toupper (const char c) | |
49 | { | |
50 | if (('A' <= c && 'F' >= c) || | |
51 | ('0' <= c && '9' >= c)) return c; | |
52 | switch (c) | |
53 | { | |
54 | case 'a' : return 'A'; | |
55 | case 'b' : return 'B'; | |
56 | case 'c' : return 'C'; | |
57 | case 'd' : return 'D'; | |
58 | case 'e' : return 'E'; | |
59 | case 'f' : return 'F'; | |
60 | default: | |
61 | FATAL_ERROR("%c cannot be converted to hex character", c); | |
62 | break; | |
63 | } | |
64 | } | |
65 | ||
66 | char hexdigit_to_char(unsigned char hexdigit) | |
67 | { | |
68 | if (hexdigit < 10) return '0' + hexdigit; | |
69 | else if (hexdigit < 16) return 'A' + hexdigit - 10; | |
70 | else { | |
71 | FATAL_ERROR("hexdigit_to_char(): invalid argument: %d", hexdigit); | |
72 | return '\0'; // to avoid warning | |
73 | } | |
74 | } | |
75 | ||
76 | unsigned char char_to_hexdigit(char c) | |
77 | { | |
78 | if (c >= '0' && c <= '9') return c - '0'; | |
79 | else if (c >= 'A' && c <= 'F') return c - 'A' + 10; | |
80 | else if (c >= 'a' && c <= 'f') return c - 'a' + 10; | |
81 | else { | |
82 | FATAL_ERROR("char_to_hexdigit(): invalid argument: %c", c); | |
83 | return 0; // to avoid warning | |
84 | } | |
85 | } | |
86 | ||
87 | string uchar2str(unsigned char uchar) | |
88 | { | |
89 | char str[2]; | |
90 | str[0] = hexdigit_to_char(uchar / 16); | |
91 | str[1] = hexdigit_to_char(uchar % 16); | |
92 | return string(2, str); | |
93 | } | |
94 | ||
95 | unsigned char str2uchar(const char& c1, const char& c2) | |
96 | { | |
97 | unsigned char uc = 0; | |
98 | uc = char_to_hexdigit(c1); | |
99 | uc <<= 4; | |
100 | uc += char_to_hexdigit(c2); | |
101 | return uc; | |
102 | } | |
103 | ||
104 | int_val_t rem(const int_val_t& left, const int_val_t& right) | |
105 | { | |
106 | return (left - right * (left / right)); | |
107 | } | |
108 | ||
109 | int_val_t mod(const int_val_t& left, const int_val_t& right) | |
110 | { | |
111 | int_val_t r = right < 0 ? -right : right; | |
112 | if (left > 0) { | |
113 | return rem(left, r); | |
114 | } else { | |
115 | int_val_t result = rem(left, r); | |
116 | return result == 0 ? result : result + r; | |
117 | } | |
118 | } | |
119 | ||
120 | string* to_uppercase(const string& value) | |
121 | { | |
122 | string *s = new string(value); | |
123 | for (size_t i = 0; i < s->size(); i++) { | |
124 | char& c=(*s)[i]; | |
125 | if (c >= 'a' && c <= 'z') c = c - 'a' + 'A'; | |
126 | } | |
127 | return s; | |
128 | } | |
129 | ||
130 | string* not4b_bit(const string& bstr) | |
131 | { | |
132 | string *s=new string(bstr); | |
133 | for(size_t i=0; i<s->size(); i++) { | |
134 | char& c=(*s)[i]; | |
135 | switch(c) { | |
136 | case '0': c='1'; break; | |
137 | case '1': c='0'; break; | |
138 | default: | |
139 | FATAL_ERROR("not4b_bit(): Invalid char in bitstring."); | |
140 | } // switch c | |
141 | } // for i | |
142 | return s; | |
143 | } | |
144 | ||
145 | string* not4b_hex(const string& hstr) | |
146 | { | |
147 | string *s=new string(hstr); | |
148 | for(size_t i=0; i<s->size(); i++) { | |
149 | char& c=(*s)[i]; | |
150 | switch(c) { | |
151 | case '0': c='F'; break; | |
152 | case '1': c='E'; break; | |
153 | case '2': c='D'; break; | |
154 | case '3': c='C'; break; | |
155 | case '4': c='B'; break; | |
156 | case '5': c='A'; break; | |
157 | case '6': c='9'; break; | |
158 | case '7': c='8'; break; | |
159 | case '8': c='7'; break; | |
160 | case '9': c='6'; break; | |
161 | case 'A': c='5'; break; | |
162 | case 'B': c='4'; break; | |
163 | case 'C': c='3'; break; | |
164 | case 'D': c='2'; break; | |
165 | case 'E': c='1'; break; | |
166 | case 'F': c='0'; break; | |
167 | case 'a': c='5'; break; | |
168 | case 'b': c='4'; break; | |
169 | case 'c': c='3'; break; | |
170 | case 'd': c='2'; break; | |
171 | case 'e': c='1'; break; | |
172 | case 'f': c='0'; break; | |
173 | default: | |
174 | FATAL_ERROR("not4b_hex(): Invalid char in hexstring."); | |
175 | } // switch c | |
176 | } // for i | |
177 | return s; | |
178 | } | |
179 | ||
180 | string* and4b(const string& left, const string& right) | |
181 | { | |
182 | string *s=new string(left); | |
183 | for(size_t i=0; i<s->size(); i++) { | |
184 | char& c=(*s)[i]; | |
185 | c=hexdigit_to_char(char_to_hexdigit(c) & char_to_hexdigit(right[i])); | |
186 | } // for i | |
187 | return s; | |
188 | } | |
189 | ||
190 | string* or4b(const string& left, const string& right) | |
191 | { | |
192 | string *s=new string(left); | |
193 | for(size_t i=0; i<s->size(); i++) { | |
194 | char& c=(*s)[i]; | |
195 | c=hexdigit_to_char(char_to_hexdigit(c) | char_to_hexdigit(right[i])); | |
196 | } // for i | |
197 | return s; | |
198 | } | |
199 | ||
200 | string* xor4b(const string& left, const string& right) | |
201 | { | |
202 | string *s=new string(left); | |
203 | for(size_t i=0; i<s->size(); i++) { | |
204 | char& c=(*s)[i]; | |
205 | c=hexdigit_to_char(char_to_hexdigit(c) ^ char_to_hexdigit(right[i])); | |
206 | } // for i | |
207 | return s; | |
208 | } | |
209 | ||
210 | string* shift_left(const string& value, const Int& count) | |
211 | { | |
212 | if (count > 0) { | |
213 | string *s = new string; | |
214 | if (count < static_cast<Int>(value.size())) *s = value.substr(count); | |
215 | s->resize(value.size(), '0'); | |
216 | return s; | |
217 | } else if (count < 0) return shift_right(value, -count); | |
218 | else return new string(value); | |
219 | } | |
220 | ||
221 | string* shift_right(const string& value, const Int& count) | |
222 | { | |
223 | if (count > 0) { | |
224 | string *s = new string; | |
225 | if (count < static_cast<Int>(value.size())) { | |
226 | s->resize(count, '0'); | |
227 | *s += value.substr(0, value.size()-count); | |
228 | } else s->resize(value.size(), '0'); | |
229 | return s; | |
230 | } else if (count < 0) return shift_left(value, -count); | |
231 | else return new string(value); | |
232 | } | |
233 | ||
234 | string* rotate_left(const string& value, const Int& p_count) | |
235 | { | |
236 | size_t size = value.size(); | |
237 | if (size == 0) return new string(value); | |
238 | else if (p_count < 0) return rotate_right(value, -p_count); | |
239 | size_t count = p_count % size; | |
240 | if (count == 0) return new string(value); | |
241 | else return new string(value.substr(count) + value.substr(0, count)); | |
242 | } | |
243 | ||
244 | string* rotate_right(const string& value, const Int& p_count) | |
245 | { | |
246 | size_t size = value.size(); | |
247 | if (size == 0) return new string(value); | |
248 | else if (p_count < 0) return rotate_left(value, -p_count); | |
249 | size_t count = p_count % size; | |
250 | if (count == 0) return new string(value); | |
251 | else return new string(value.substr(size - count) + | |
252 | value.substr(0, size - count)); | |
253 | } | |
254 | ||
255 | ||
256 | ustring* rotate_left(const ustring& value, const Int& p_count) | |
257 | { | |
258 | size_t size = value.size(); | |
259 | if (size == 0) return new ustring(value); | |
260 | else if (p_count < 0) return rotate_right(value, -p_count); | |
261 | size_t count = p_count % size; | |
262 | if (count == 0) return new ustring(value); | |
263 | else return new ustring(value.substr(count) + value.substr(0, count)); | |
264 | } | |
265 | ||
266 | ustring* rotate_right(const ustring& value, const Int& p_count) | |
267 | { | |
268 | size_t size = value.size(); | |
269 | if (size == 0) return new ustring(value); | |
270 | else if (p_count < 0) return rotate_left(value, -p_count); | |
271 | size_t count = p_count % size; | |
272 | if (count == 0) return new ustring(value); | |
273 | else return new ustring(value.substr(size - count) + | |
274 | value.substr(0, size - count)); | |
275 | } | |
276 | ||
277 | int_val_t* bit2int(const string& bstr) | |
278 | { | |
279 | size_t nof_bits = bstr.size(); | |
280 | // skip the leading zeros | |
281 | size_t start_index = 0; | |
282 | while (start_index < nof_bits && bstr[start_index] == '0') start_index++; | |
283 | int_val_t *ret_val = new int_val_t((Int)0); | |
284 | for (size_t i = start_index; i < nof_bits; i++) { | |
285 | *ret_val <<= 1; | |
286 | if (bstr[i] == '1') *ret_val += 1; | |
287 | } | |
288 | return ret_val; | |
289 | } | |
290 | ||
291 | int_val_t* hex2int(const string& hstr) | |
292 | { | |
293 | size_t nof_digits = hstr.size(); | |
294 | size_t start_index = 0; | |
295 | // Skip the leading zeros. | |
296 | while (start_index < nof_digits && hstr[start_index] == '0') | |
297 | start_index++; | |
298 | int_val_t *ret_val = new int_val_t((Int)0); | |
299 | for (size_t i = start_index; i < nof_digits; i++) { | |
300 | *ret_val <<= 4; | |
301 | *ret_val += char_to_hexdigit(hstr[i]); | |
302 | } | |
303 | return ret_val; | |
304 | } | |
305 | ||
306 | Int unichar2int(const ustring& ustr) | |
307 | { | |
308 | if (ustr.size() != 1) FATAL_ERROR("unichar2int(): invalid argument"); | |
309 | const ustring::universal_char& uchar = ustr.u_str()[0]; | |
310 | Int ret_val = (uchar.group << 24) | (uchar.plane << 16) | (uchar.row << 8) | | |
311 | uchar.cell; | |
312 | return ret_val; | |
313 | } | |
314 | ||
315 | string *int2bit(const int_val_t& value, const Int& length) | |
316 | { | |
317 | if (length < 0) FATAL_ERROR("int2bit(): negative length"); | |
318 | size_t string_length = static_cast<size_t>(length); | |
319 | if (static_cast<Int>(string_length) != length || | |
320 | string_length > string::max_string_len) | |
321 | FATAL_ERROR("int2bit(): length is too large"); | |
322 | if (value < 0) FATAL_ERROR("int2bit(): negative value"); | |
323 | string *bstr = new string; | |
324 | bstr->resize(string_length); | |
325 | int_val_t tmp_value = value; | |
326 | for (size_t i = 1; i <= string_length; i++) { | |
327 | (*bstr)[string_length - i] = (tmp_value & 1).get_val() ? '1' : '0'; | |
328 | tmp_value >>= 1; | |
329 | } | |
330 | if (tmp_value != 0) | |
331 | FATAL_ERROR("int2bit(): %s does not fit in %lu bits", \ | |
332 | value.t_str().c_str(), (unsigned long)string_length); | |
333 | return bstr; | |
334 | } | |
335 | ||
336 | static const char hdigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', | |
337 | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; | |
338 | ||
339 | string *int2hex(const int_val_t& value, const Int& length) | |
340 | { | |
341 | if (length < 0) | |
342 | FATAL_ERROR("int2hex(): negative length"); | |
343 | size_t string_length = static_cast<size_t>(length); | |
344 | if (static_cast<Int>(string_length) != length || | |
345 | string_length > string::max_string_len) | |
346 | FATAL_ERROR("int2hex(): length is too large"); | |
347 | if (value < 0) FATAL_ERROR("int2hex(): negative value"); | |
348 | string *hstr = new string; | |
349 | hstr->resize(string_length); | |
350 | int_val_t tmp_value = value; | |
351 | for (size_t i = 1; i <= string_length; i++) { | |
352 | (*hstr)[string_length - i] = hdigits[(tmp_value & 0x0f).get_val()]; | |
353 | tmp_value >>= 4; | |
354 | } | |
355 | if (tmp_value != 0) { | |
356 | FATAL_ERROR("int2hex(): %s does not fit in %lu hexadecimal digits", | |
357 | value.t_str().c_str(), (unsigned long)string_length); | |
358 | } | |
359 | return hstr; | |
360 | } | |
361 | ||
362 | ustring *int2unichar(const Int& value) | |
363 | { | |
364 | if (value < 0 || value > 2147483647) | |
365 | FATAL_ERROR("int2unichar(): invalid argument"); | |
366 | unsigned char group = (value >> 24) & 0xFF, | |
367 | plane = (value >> 16) & 0xFF, | |
368 | row = (value >> 8) & 0xFF, | |
369 | cell = value & 0xFF; | |
370 | return new ustring(group, plane, row, cell); | |
371 | } | |
372 | ||
373 | string *oct2char(const string& ostr) | |
374 | { | |
375 | string *cstr = new string; | |
376 | size_t ostr_size = ostr.size(); | |
377 | if (ostr_size % 2) | |
378 | FATAL_ERROR("oct2char(): argument has odd length: %lu", | |
379 | (unsigned long) ostr_size); | |
380 | size_t cstr_size = ostr_size / 2; | |
381 | cstr->resize(cstr_size); | |
382 | const char *ostr_ptr = ostr.c_str(); | |
383 | for (size_t i = 0; i < cstr_size; i++) { | |
384 | unsigned char c = 16 * char_to_hexdigit(ostr_ptr[2 * i]) + | |
385 | char_to_hexdigit(ostr_ptr[2 * i + 1]); | |
386 | if (c > 127) FATAL_ERROR("oct2char(): resulting charstring contains " \ | |
387 | "non-ascii character: %d", c); | |
388 | (*cstr)[i] = c; | |
389 | } | |
390 | return cstr; | |
391 | } | |
392 | ||
393 | string *char2oct(const string& cstr) | |
394 | { | |
395 | string *ostr = new string; | |
396 | size_t cstr_size = cstr.size(); | |
397 | ostr->resize(cstr_size * 2, '0'); | |
398 | const char *cstr_ptr = cstr.c_str(); | |
399 | for (size_t i = 0; i < cstr_size; i++) { | |
400 | unsigned char c = cstr_ptr[i]; | |
401 | (*ostr)[2 * i] = hexdigit_to_char(c / 16); | |
402 | (*ostr)[2 * i + 1] = hexdigit_to_char(c % 16); | |
403 | } | |
404 | return ostr; | |
405 | } | |
406 | ||
407 | string *bit2hex(const string& bstr) | |
408 | { | |
409 | size_t size=bstr.size(); | |
410 | size_t hsize=(size+3)/4; | |
411 | string *hstr = new string; | |
412 | string *bstr4=NULL; | |
413 | if(size%4) { | |
414 | bstr4=new string; | |
415 | bstr4->resize(hsize*4,'0'); | |
416 | bstr4->replace(4-(size%4),size,bstr); | |
417 | } | |
418 | hstr->resize(hsize,'0'); | |
419 | string b4(4,"0000"); | |
420 | for(size_t i=0;i<hsize;i++) { | |
421 | unsigned int u; | |
422 | if(size%4)b4=bstr4->substr(i*4,4); | |
423 | else b4=bstr.substr(i*4,4); | |
424 | if(b4[0]=='1')u=8;else u=0; | |
425 | if(b4[1]=='1')u+=4; | |
426 | if(b4[2]=='1')u+=2; | |
427 | if(b4[3]=='1')u++; | |
428 | (*hstr)[i]=hdigits[u]; | |
429 | } | |
430 | if(bstr4!=NULL)delete bstr4; | |
431 | return hstr; | |
432 | } | |
433 | ||
434 | string *hex2oct(const string& hstr) | |
435 | { | |
436 | if(hstr.size()%2==0)return new string(hstr); | |
437 | else { | |
438 | string *ostr=new string("0"); | |
439 | (*ostr)+=hstr; | |
440 | return ostr; | |
441 | } | |
442 | } | |
443 | ||
444 | string *asn_hex2oct(const string& hstr) | |
445 | { | |
446 | string *ostr = new string(hstr); | |
447 | size_t size = ostr->size(); | |
448 | if (size % 2) ostr->resize(size + 1, '0'); | |
449 | return ostr; | |
450 | } | |
451 | ||
452 | string *bit2oct(const string& bstr) | |
453 | { | |
454 | string *s1,*s2; | |
455 | s1=bit2hex(bstr); | |
456 | s2=hex2oct(*s1); | |
457 | delete s1; | |
458 | return s2; | |
459 | } | |
460 | ||
461 | string *asn_bit2oct(const string& bstr) | |
462 | { | |
463 | size_t size = bstr.size(); | |
464 | string *ostr = new string; | |
465 | ostr->resize(((size+7)/8)*2); | |
466 | for(size_t i=0, j=0; i<size; ) { | |
467 | unsigned char digit1=0, digit2=0; | |
468 | digit1 += get_bit_value(bstr[i++], 8); | |
469 | if (i < size) { | |
470 | digit1 += get_bit_value(bstr[i++], 4); | |
471 | if (i < size) { | |
472 | digit1 += get_bit_value(bstr[i++], 2); | |
473 | if (i < size) { | |
474 | digit1 += get_bit_value(bstr[i++], 1); | |
475 | if (i < size) { | |
476 | digit2 += get_bit_value(bstr[i++], 8); | |
477 | if (i < size) { | |
478 | digit2 += get_bit_value(bstr[i++], 4); | |
479 | if (i < size) { | |
480 | digit2 += get_bit_value(bstr[i++], 2); | |
481 | if (i < size) digit2 += get_bit_value(bstr[i++], 1); | |
482 | } | |
483 | } | |
484 | } | |
485 | } | |
486 | } | |
487 | } | |
488 | (*ostr)[j++] = hexdigit_to_char(digit1); | |
489 | (*ostr)[j++] = hexdigit_to_char(digit2); | |
490 | } | |
491 | return ostr; | |
492 | } | |
493 | ||
494 | string *hex2bit(const string& hstr) | |
495 | { | |
496 | size_t size=hstr.size(); | |
497 | string *bstr = new string; | |
498 | bstr->resize(4*size); | |
499 | for(size_t i=0; i<size; i++) { | |
500 | switch(hstr[i]) { | |
501 | case '0': | |
502 | bstr->replace(4*i, 4, "0000"); | |
503 | break; | |
504 | case '1': | |
505 | bstr->replace(4*i, 4, "0001"); | |
506 | break; | |
507 | case '2': | |
508 | bstr->replace(4*i, 4, "0010"); | |
509 | break; | |
510 | case '3': | |
511 | bstr->replace(4*i, 4, "0011"); | |
512 | break; | |
513 | case '4': | |
514 | bstr->replace(4*i, 4, "0100"); | |
515 | break; | |
516 | case '5': | |
517 | bstr->replace(4*i, 4, "0101"); | |
518 | break; | |
519 | case '6': | |
520 | bstr->replace(4*i, 4, "0110"); | |
521 | break; | |
522 | case '7': | |
523 | bstr->replace(4*i, 4, "0111"); | |
524 | break; | |
525 | case '8': | |
526 | bstr->replace(4*i, 4, "1000"); | |
527 | break; | |
528 | case '9': | |
529 | bstr->replace(4*i, 4, "1001"); | |
530 | break; | |
531 | case 'A': | |
532 | case 'a': | |
533 | bstr->replace(4*i, 4, "1010"); | |
534 | break; | |
535 | case 'B': | |
536 | case 'b': | |
537 | bstr->replace(4*i, 4, "1011"); | |
538 | break; | |
539 | case 'C': | |
540 | case 'c': | |
541 | bstr->replace(4*i, 4, "1100"); | |
542 | break; | |
543 | case 'D': | |
544 | case 'd': | |
545 | bstr->replace(4*i, 4, "1101"); | |
546 | break; | |
547 | case 'E': | |
548 | case 'e': | |
549 | bstr->replace(4*i, 4, "1110"); | |
550 | break; | |
551 | case 'F': | |
552 | case 'f': | |
553 | bstr->replace(4*i, 4, "1111"); | |
554 | break; | |
555 | default: | |
556 | FATAL_ERROR("Common::hex2bit(): invalid hexadecimal " | |
557 | "digit in hexstring value"); | |
558 | } | |
559 | } | |
560 | return bstr; | |
561 | } | |
562 | ||
563 | int_val_t* float2int(const Real& value, const Location& loc) | |
564 | { | |
565 | // We shouldn't mimic generality with `Int'. | |
566 | if (value >= (Real)LLONG_MIN && value <= (Real)LLONG_MAX) | |
567 | return new int_val_t((Int)value); | |
568 | char buf[512] = ""; | |
569 | snprintf(buf, 511, "%f", value); | |
570 | char *dot = strchr(buf, '.'); | |
571 | if (!dot) FATAL_ERROR("Conversion of float value `%f' to integer failed", value); | |
572 | else memset(dot, 0, sizeof(buf) - (dot - buf)); | |
573 | return new int_val_t(buf, loc); | |
574 | } | |
575 | ||
576 | /* TTCN-3 float values that have absolute value smaller than this are | |
577 | displayed in exponential notation. Same as in core/Float.hh */ | |
578 | #ifndef MIN_DECIMAL_FLOAT | |
579 | #define MIN_DECIMAL_FLOAT 1.0E-4 | |
580 | #endif | |
581 | /* TTCN-3 float values that have absolute value larger or equal than | |
582 | this are displayed in exponential notation. Same as in | |
583 | core/Float.hh */ | |
584 | #ifndef MAX_DECIMAL_FLOAT | |
585 | #define MAX_DECIMAL_FLOAT 1.0E+10 | |
586 | #endif | |
587 | ||
588 | string *float2str(const Real& value) | |
589 | { | |
590 | char str_buf[64]; | |
591 | if ( (value > -MAX_DECIMAL_FLOAT && value <= -MIN_DECIMAL_FLOAT) | |
592 | || (value >= MIN_DECIMAL_FLOAT && value < MAX_DECIMAL_FLOAT) | |
593 | || (value == 0.0)) | |
594 | snprintf(str_buf,64,"%f",value); | |
595 | else snprintf(str_buf,64,"%e",value); | |
596 | return new string(str_buf); | |
597 | } | |
598 | ||
599 | string* regexp(const string& instr, const string& expression, | |
600 | const Int& groupno) | |
601 | { | |
602 | string *retval=0; | |
603 | ||
604 | if(groupno<0) { | |
605 | FATAL_ERROR("regexp(): groupno must be a non-negative integer"); | |
606 | return retval; | |
607 | } | |
608 | // do not report the warnings again | |
609 | // they were already reported while checking the operands | |
610 | unsigned orig_verb_level = verb_level; | |
611 | verb_level &= ~(1|2); | |
612 | char *posix_str=TTCN_pattern_to_regexp(expression.c_str()); | |
613 | verb_level = orig_verb_level; | |
614 | if(posix_str==NULL) { | |
615 | FATAL_ERROR("regexp(): Cannot convert pattern `%s' to POSIX-equivalent.", | |
616 | expression.c_str()); | |
617 | return retval; | |
618 | } | |
619 | ||
620 | regex_t posix_regexp; | |
621 | int ret_val=regcomp(&posix_regexp, posix_str, REG_EXTENDED); | |
622 | Free(posix_str); | |
623 | if(ret_val!=0) { | |
624 | /* regexp error */ | |
625 | char msg[ERRMSG_BUFSIZE]; | |
626 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
627 | FATAL_ERROR("regexp(): regcomp() failed: %s", msg); | |
628 | return retval; | |
629 | } | |
630 | ||
631 | size_t nmatch=groupno+1; | |
632 | if(nmatch>posix_regexp.re_nsub) { | |
633 | FATAL_ERROR("regexp(): requested groupno is %lu, but this expression " | |
634 | "contains only %lu group(s).", (unsigned long) (nmatch - 1), | |
635 | (unsigned long) posix_regexp.re_nsub); | |
636 | return retval; | |
637 | } | |
638 | regmatch_t* pmatch=(regmatch_t*)Malloc((nmatch+1)*sizeof(regmatch_t)); | |
639 | ret_val=regexec(&posix_regexp, instr.c_str(), nmatch+1, pmatch, 0); | |
640 | if(ret_val==0) { | |
641 | if(pmatch[nmatch].rm_so != -1 && pmatch[nmatch].rm_eo != -1) | |
642 | retval = new string(instr.substr(pmatch[nmatch].rm_so, | |
643 | pmatch[nmatch].rm_eo - pmatch[nmatch].rm_so)); | |
644 | else retval=new string(); | |
645 | } | |
646 | Free(pmatch); | |
647 | if(ret_val!=0) { | |
648 | if(ret_val==REG_NOMATCH) { | |
649 | regfree(&posix_regexp); | |
650 | retval=new string(); | |
651 | } | |
652 | else { | |
653 | /* regexp error */ | |
654 | char msg[ERRMSG_BUFSIZE]; | |
655 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
656 | FATAL_ERROR("regexp(): regexec() failed: %s", msg); | |
657 | } | |
658 | } | |
659 | else regfree(&posix_regexp); | |
660 | ||
661 | return retval; | |
662 | } | |
663 | ||
664 | ustring* regexp(const ustring& instr, const ustring& expression, | |
665 | const Int& groupno) | |
666 | { | |
667 | ustring *retval=0; | |
668 | ||
669 | if(groupno<0) { | |
670 | FATAL_ERROR("regexp(): groupno must be a non-negative integer"); | |
671 | return retval; | |
672 | } | |
673 | // do not report the warnings again | |
674 | // they were already reported while checking the operands | |
675 | unsigned orig_verb_level = verb_level; | |
676 | verb_level &= ~(1|2); | |
677 | int* user_groups; | |
678 | char *posix_str = TTCN_pattern_to_regexp_uni( | |
679 | expression.get_stringRepr_for_pattern().c_str(), &user_groups); | |
680 | if (user_groups == 0) | |
681 | FATAL_ERROR("regexp(): Cannot find any groups in the second argument."); | |
682 | verb_level = orig_verb_level; | |
683 | if(posix_str==NULL) { | |
684 | FATAL_ERROR("regexp(): Cannot convert pattern `%s' to POSIX-equivalent.", | |
685 | expression.get_stringRepr().c_str()); | |
686 | return retval; | |
687 | } | |
688 | ||
689 | regex_t posix_regexp; | |
690 | int ret_val=regcomp(&posix_regexp, posix_str, REG_EXTENDED); | |
691 | Free(posix_str); | |
692 | if(ret_val!=0) { | |
693 | /* regexp error */ | |
694 | char msg[ERRMSG_BUFSIZE]; | |
695 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
696 | FATAL_ERROR("regexp(): regcomp() failed: %s", msg); | |
697 | return retval; | |
698 | } | |
699 | ||
700 | size_t nmatch=user_groups[groupno+1]+1; | |
701 | if(nmatch>posix_regexp.re_nsub) { | |
702 | FATAL_ERROR("regexp(): requested groupno is %lu, but this expression " | |
703 | "contains only %lu group(s).", (unsigned long) (groupno), | |
704 | (unsigned long) user_groups[0]); | |
705 | return retval; | |
706 | } | |
707 | ||
708 | Free(user_groups); | |
709 | ||
710 | regmatch_t* pmatch = (regmatch_t*)Malloc((nmatch+1)*sizeof(regmatch_t)); | |
711 | char* tmp = instr.convert_to_regexp_form(); | |
712 | string instr_conv(tmp); | |
713 | Free(tmp); | |
714 | ret_val = regexec(&posix_regexp, instr_conv.c_str(), nmatch+1, pmatch, 0); | |
715 | if(ret_val == 0) { | |
716 | if(pmatch[nmatch].rm_so != -1 && pmatch[nmatch].rm_eo != -1) { | |
717 | retval = new ustring( | |
718 | instr_conv.substr(pmatch[nmatch].rm_so, | |
719 | pmatch[nmatch].rm_eo - pmatch[nmatch].rm_so) | |
720 | .convert_stringRepr_for_pattern()); | |
721 | } else { retval = new ustring(); } | |
722 | } | |
723 | Free(pmatch); | |
724 | if(ret_val!=0) { | |
725 | if(ret_val==REG_NOMATCH) { | |
726 | regfree(&posix_regexp); | |
727 | retval=new ustring(); | |
728 | } | |
729 | else { | |
730 | /* regexp error */ | |
731 | char msg[ERRMSG_BUFSIZE]; | |
732 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
733 | FATAL_ERROR("regexp(): regexec() failed: %s", msg); | |
734 | } | |
735 | } | |
736 | else regfree(&posix_regexp); | |
737 | ||
738 | return retval; | |
739 | } | |
740 | ||
741 | string* remove_bom(const string& encoded_value) | |
742 | { | |
743 | size_t length = encoded_value.size(); | |
744 | if (0 == length) return new string(); | |
745 | if (length % 2 || 0 > length) { | |
746 | ERROR("remove_bom(): Wrong string. The number of nibbles (%d) in string " | |
747 | "shall be divisible by 2", static_cast<int>(length)); | |
748 | return new string(encoded_value); | |
749 | } | |
750 | ||
751 | int length_of_BOM = 0; | |
752 | string str_uppercase(encoded_value); | |
753 | size_t enough = length > sizeof(utf32be)-1 ? sizeof(utf32be)-1 : length; | |
754 | for (size_t i = 0; i < enough; ++i) { | |
755 | str_uppercase[i] = toupper(encoded_value[i]); | |
756 | } | |
757 | ||
758 | if (str_uppercase.find(utf32be, 0) < length) length_of_BOM = sizeof(utf32be)-1; | |
759 | else if (str_uppercase.find(utf32le, 0) < length) length_of_BOM = sizeof(utf32le)-1; | |
760 | else if (str_uppercase.find(utf16be, 0) < length) length_of_BOM = sizeof(utf16be)-1; | |
761 | else if (str_uppercase.find(utf16le, 0) < length) length_of_BOM = sizeof(utf16le)-1; | |
762 | else if (str_uppercase.find(utf8, 0) < length) length_of_BOM = sizeof(utf8)-1; | |
763 | else return new string(encoded_value); // no BOM found | |
764 | ||
765 | return new string(encoded_value.substr(length_of_BOM, length)); | |
766 | } | |
767 | ||
768 | static CharCoding::CharCodingType is_ascii (size_t length, const unsigned char* strptr) | |
769 | { | |
770 | const unsigned char nonASCII = 1 << 7;// MSB is 1 in case of non ASCII character | |
771 | CharCoding::CharCodingType ret = CharCoding::ASCII; | |
772 | for (size_t i = 0; i < length; ++i) { | |
773 | if ( strptr[i] & nonASCII) { | |
774 | ret = CharCoding::UNKNOWN; | |
775 | break; | |
776 | } | |
777 | } | |
778 | return ret; | |
779 | } | |
780 | ||
781 | static CharCoding::CharCodingType is_utf8(size_t length, const unsigned char* strptr) | |
782 | { | |
783 | const char MSB = 1 << 7; // MSB is 1 in case of non ASCII character | |
784 | const char MSBmin1 = 1 << 6; // 0100 0000 | |
785 | size_t i = 0; | |
786 | while (length > i) { | |
787 | if ( strptr[i] & MSB) { // non ASCII char | |
788 | char maskUTF8 = 1 << 6; // 111x xxxx shows how many additional bytes are there | |
789 | if (!(strptr[i] & maskUTF8)) return CharCoding::UNKNOWN; // accepted 11xxx xxxx but received 10xx xxxx | |
790 | unsigned int noofUTF8 = 0; // 11xx xxxxx -> 2 bytes, 111x xxxxx -> 3 bytes , 1111 xxxxx -> 4 bytes in UTF-8 | |
791 | while (strptr[i] & maskUTF8) { | |
792 | ++noofUTF8; | |
793 | maskUTF8 >>= 1; // shift right the mask | |
794 | } | |
795 | // the second and third (and so on) UTF-8 byte looks like 10xx xxxx | |
796 | while (0 < noofUTF8 ) { | |
797 | ++i; | |
798 | if (!(strptr[i] & MSB) || (strptr[i] & MSBmin1) || i >= length) { // if not like this: 10xx xxxx | |
799 | return CharCoding::UNKNOWN; | |
800 | } | |
801 | --noofUTF8; | |
802 | } | |
803 | } | |
804 | ++i; | |
805 | } | |
806 | return CharCoding::UTF_8; | |
807 | } | |
808 | ||
809 | string* get_stringencoding(const string& encoded_value) | |
810 | { | |
811 | size_t length = encoded_value.size(); | |
812 | if (0 == length) return new string("<unknown>"); | |
813 | if (length % 2 || 0 > length) { | |
814 | ERROR("get_stringencoding(): Wrong string. The number of nibbles (%d) in string " | |
815 | "shall be divisible by 2", static_cast<int>(length)); | |
816 | return new string("<unknown>"); | |
817 | } | |
818 | ||
819 | string str_uppercase(encoded_value); | |
820 | size_t enough = length > sizeof(utf32be)-1 ? sizeof(utf32be)-1 : length; | |
821 | for (size_t i = 0; i < enough; ++i) { | |
822 | str_uppercase[i] = toupper(encoded_value[i]); | |
823 | } | |
824 | ||
825 | if (str_uppercase.find(utf32be, 0) < length) return new string("UTF-32BE"); | |
826 | else if (str_uppercase.find(utf32le, 0) < length) return new string("UTF-32LE"); | |
827 | else if (str_uppercase.find(utf16be, 0) < length) return new string("UTF-16BE"); | |
828 | else if (str_uppercase.find(utf16le, 0) < length) return new string("UTF-16LE"); | |
829 | else if (str_uppercase.find(utf8, 0) < length) return new string("UTF-8"); | |
830 | ||
831 | unsigned char *uc_str = new unsigned char[length/2]; | |
832 | string ret; | |
833 | for (size_t i = 0; i < length / 2; ++i) { | |
834 | uc_str[i] = str2uchar(encoded_value[2 * i], encoded_value[2 * i + 1]); | |
835 | } | |
836 | if (is_ascii (length / 2, uc_str) == CharCoding::ASCII) ret = "ASCII"; | |
837 | else if (CharCoding::UTF_8 == is_utf8 (length / 2, uc_str)) ret = "UTF-8"; | |
838 | else ret = "<unknown>"; | |
839 | ||
840 | delete [] uc_str; | |
841 | return new string(ret); | |
842 | } | |
843 | ||
844 | static size_t check_BOM(CharCoding::CharCodingType expected_coding, size_t n_uc, unsigned char* uc_str) | |
845 | { | |
846 | if (0 == n_uc) return 0; | |
847 | ||
848 | switch (expected_coding) { | |
849 | case CharCoding::UTF32: | |
850 | case CharCoding::UTF32BE: | |
851 | case CharCoding::UTF32LE: | |
852 | if (4 > n_uc) { | |
853 | ERROR("decode_utf32(): The string is shorter than the expected BOM"); | |
854 | return 0; | |
855 | } | |
856 | break; | |
857 | case CharCoding::UTF16: | |
858 | case CharCoding::UTF16BE: | |
859 | case CharCoding::UTF16LE: | |
860 | if (2 > n_uc) { | |
861 | ERROR("decode_utf16(): The string is shorter than the expected BOM"); | |
862 | return 0; | |
863 | } | |
864 | break; | |
865 | default: break; | |
866 | } | |
867 | ||
868 | //BOM indicates that the byte order is determined by a byte order mark, | |
869 | //if present at the beginning the length of BOM is returned. | |
870 | bool badBOM = false; | |
871 | string errmsg; | |
872 | string caller; | |
873 | switch (expected_coding) { | |
874 | case CharCoding::UTF32BE: | |
875 | case CharCoding::UTF32: | |
876 | if (0x00 == uc_str[0] && 0x00 == uc_str[1] && 0xFE == uc_str[2] && 0xFF == uc_str[3]) | |
877 | return 4; | |
878 | badBOM = true; | |
879 | caller = "decode_utf32()"; | |
880 | errmsg = "UTF-32BE"; | |
881 | break; | |
882 | case CharCoding::UTF32LE: | |
883 | if (0xFF == uc_str[0] && 0xFE == uc_str[1] && 0x00 == uc_str[2] && 0x00 == uc_str[3]) | |
884 | return 4; | |
885 | badBOM = true; | |
886 | caller = "decode_utf32()"; | |
887 | errmsg = "UTF-32LE"; | |
888 | break; | |
889 | case CharCoding::UTF16BE: | |
890 | case CharCoding::UTF16: | |
891 | if (0xFE == uc_str[0] && 0xFF == uc_str[1]) | |
892 | return 2; | |
893 | badBOM = true; | |
894 | caller = "decode_utf16()"; | |
895 | errmsg = "UTF-16BE"; | |
896 | break; | |
897 | case CharCoding::UTF16LE: | |
898 | if (0xFF == uc_str[0] && 0xFE == uc_str[1]) | |
899 | return 2; | |
900 | badBOM = true; | |
901 | caller = "decode_utf16()"; | |
902 | errmsg = "UTF-16LE"; | |
903 | break; | |
904 | case CharCoding::UTF_8: | |
905 | if (0xEF == uc_str[0] && 0xBB == uc_str[1] && 0xBF == uc_str[2]) | |
906 | return 3; | |
907 | return 0; | |
908 | default: | |
909 | if (CharCoding::UTF32 == expected_coding || CharCoding::UTF16 == expected_coding) { | |
910 | const char* str = CharCoding::UTF32 == expected_coding ? "UTF-32" : "UTF-16"; | |
911 | ERROR("Wrong %s string. No BOM detected, however the given coding type (%s) " | |
912 | "expects it to define the endianness", str, str); | |
913 | } | |
914 | else { | |
915 | ERROR("Wrong string. No BOM detected"); | |
916 | } | |
917 | } | |
918 | if (badBOM) ERROR("%s: Wrong %s string. The expected coding could not be verified", | |
919 | caller.c_str(), errmsg.c_str()); | |
920 | return 0; | |
921 | } | |
922 | ||
923 | static void fill_continuing_octets(int n_continuing, unsigned char *continuing_ptr, | |
924 | size_t n_uc, const unsigned char* uc_str, int start_pos, | |
925 | int uchar_pos) | |
926 | { | |
927 | for (int i = 0; i < n_continuing; i++) { | |
928 | if (start_pos + i < static_cast<int>(n_uc)) { | |
929 | unsigned char octet = uc_str[start_pos + i]; | |
930 | if ((octet & 0xC0) != 0x80) { | |
931 | ERROR("decode_utf8(): Malformed: At character position %u, octet position %u: %02X is " | |
932 | "not a valid continuing octet.", uchar_pos, start_pos + i, octet); | |
933 | return; | |
934 | } | |
935 | continuing_ptr[i] = octet & 0x3F; | |
936 | } | |
937 | else { | |
938 | if (start_pos + i == static_cast<int>(n_uc)) { | |
939 | if (i > 0) { | |
940 | // only a part of octets is missing | |
941 | ERROR("decode_utf8(): Incomplete: At character position %d, octet position %d: %d out " | |
942 | "of %d continuing octets %s missing from the end of the stream.", | |
943 | uchar_pos, start_pos + i, n_continuing - i, n_continuing, | |
944 | n_continuing - i > 1 ? "are" : "is"); | |
945 | return; | |
946 | } | |
947 | else { | |
948 | // all octets are missing | |
949 | ERROR("decode_utf8(): Incomplete: At character position %d, octet position %d: %d " | |
950 | "continuing octet%s missing from the end of the stream.", uchar_pos, | |
951 | start_pos, n_continuing, n_continuing > 1 ? "s are" : " is"); | |
952 | return; | |
953 | } | |
954 | } | |
955 | continuing_ptr[i] = 0; | |
956 | } | |
957 | } | |
958 | } | |
959 | ||
960 | ustring decode_utf8(const string & ostr, CharCoding::CharCodingType expected_coding) | |
961 | { | |
962 | size_t length = ostr.size(); | |
963 | if (0 == length) return ustring(); | |
964 | if (length % 2) { | |
965 | ERROR("decode_utf8(): Wrong UTF-8 string. The number of nibbles (%d) in octetstring " | |
966 | "shall be divisible by 2", static_cast<int>(length)); | |
967 | return ustring(); | |
968 | } | |
969 | ||
970 | unsigned char *uc_str = new unsigned char[length/2]; | |
971 | for (size_t i = 0; i < length / 2; ++i) { | |
972 | uc_str[i] = str2uchar(ostr[2 * i], ostr[2 * i + 1]); | |
973 | } | |
974 | ustring ucstr; | |
975 | size_t start = check_BOM(CharCoding::UTF_8, length /2, uc_str); | |
976 | ||
977 | for (size_t i = start; i < length / 2;) { | |
978 | // perform the decoding character by character | |
979 | if (uc_str[i] <= 0x7F) { | |
980 | // character encoded on a single octet: 0xxxxxxx (7 useful bits) | |
981 | unsigned char g = 0; | |
982 | unsigned char p = 0; | |
983 | unsigned char r = 0; | |
984 | unsigned char c = uc_str[i]; | |
985 | ucstr += ustring(g, p, r, c); | |
986 | ++i; | |
987 | } | |
988 | else if (uc_str[i] <= 0xBF) { | |
989 | // continuing octet (10xxxxxx) without leading octet ==> malformed | |
990 | ERROR("decode_utf8(): Malformed: At character position %d, octet position %d: continuing " | |
991 | "octet %02X without leading octet.", static_cast<int>(ucstr.size()), | |
992 | static_cast<int>(i), uc_str[i]); | |
993 | goto dec_error; | |
994 | } | |
995 | else if (uc_str[i] <= 0xDF) { | |
996 | // character encoded on 2 octets: 110xxxxx 10xxxxxx (11 useful bits) | |
997 | unsigned char octets[2]; | |
998 | octets[0] = uc_str[i] & 0x1F; | |
999 | fill_continuing_octets(1, octets + 1, length / 2, uc_str, i + 1, ucstr.size()); | |
1000 | unsigned char g = 0; | |
1001 | unsigned char p = 0; | |
1002 | unsigned char r = octets[0] >> 2; | |
1003 | unsigned char c = octets[0] << 6 | octets[1]; | |
1004 | if (r == 0x00 && c < 0x80) { | |
1005 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 2-octet " | |
1006 | "encoding for quadruple (0, 0, 0, %u).", static_cast<int>(ucstr.size()), | |
1007 | static_cast<int>(i), c); | |
1008 | goto dec_error; | |
1009 | } | |
1010 | ucstr += ustring(g, p, r, c); | |
1011 | i += 2; | |
1012 | } | |
1013 | else if (uc_str[i] <= 0xEF) { | |
1014 | // character encoded on 3 octets: 1110xxxx 10xxxxxx 10xxxxxx | |
1015 | // (16 useful bits) | |
1016 | unsigned char octets[3]; | |
1017 | octets[0] = uc_str[i] & 0x0F; | |
1018 | fill_continuing_octets(2, octets + 1, length / 2, uc_str, i + 1,ucstr.size()); | |
1019 | unsigned char g = 0; | |
1020 | unsigned char p = 0; | |
1021 | unsigned char r = octets[0] << 4 | octets[1] >> 2; | |
1022 | unsigned char c = octets[1] << 6 | octets[2]; | |
1023 | if (r < 0x08) { | |
1024 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 3-octet " | |
1025 | "encoding for quadruple (0, 0, %u, %u).", static_cast<int>(ucstr.size()), | |
1026 | static_cast<int>(i), r, c); | |
1027 | goto dec_error; | |
1028 | } | |
1029 | ucstr += ustring(g, p, r, c); | |
1030 | i += 3; | |
1031 | } | |
1032 | else if (uc_str[i] <= 0xF7) { | |
1033 | // character encoded on 4 octets: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
1034 | // (21 useful bits) | |
1035 | unsigned char octets[4]; | |
1036 | octets[0] = uc_str[i] & 0x07; | |
1037 | fill_continuing_octets(3, octets + 1, length / 2, uc_str, i + 1, ucstr.size()); | |
1038 | unsigned char g = 0; | |
1039 | unsigned char p = octets[0] << 2 | octets[1] >> 4; | |
1040 | unsigned char r = octets[1] << 4 | octets[2] >> 2; | |
1041 | unsigned char c = octets[2] << 6 | octets[3]; | |
1042 | if (p == 0x00) { | |
1043 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 4-octet " | |
1044 | "encoding for quadruple (0, 0, %u, %u).", static_cast<int>(ucstr.size()), | |
1045 | static_cast<int>(i), r, c); | |
1046 | goto dec_error; | |
1047 | } | |
1048 | ucstr += ustring(g, p, r, c); | |
1049 | i += 4; | |
1050 | } | |
1051 | else if (uc_str[i] <= 0xFB) { | |
1052 | // character encoded on 5 octets: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx | |
1053 | // 10xxxxxx (26 useful bits) | |
1054 | unsigned char octets[5]; | |
1055 | octets[0] = uc_str[i] & 0x03; | |
1056 | fill_continuing_octets(4, octets + 1, length / 2, uc_str, i + 1, ucstr.size()); | |
1057 | unsigned char g = octets[0]; | |
1058 | unsigned char p = octets[1] << 2 | octets[2] >> 4; | |
1059 | unsigned char r = octets[2] << 4 | octets[3] >> 2; | |
1060 | unsigned char c = octets[3] << 6 | octets[4]; | |
1061 | if (g == 0x00 && p < 0x20) { | |
1062 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 5-octet " | |
1063 | "encoding for quadruple (0, %u, %u, %u).", static_cast<int>(ucstr.size()), | |
1064 | static_cast<int>(i), p, r, c); | |
1065 | goto dec_error; | |
1066 | } | |
1067 | ucstr += ustring(g, p, r, c); | |
1068 | i += 5; | |
1069 | } | |
1070 | else if (uc_str[i] <= 0xFD) { | |
1071 | // character encoded on 6 octets: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx | |
1072 | // 10xxxxxx 10xxxxxx (31 useful bits) | |
1073 | unsigned char octets[6]; | |
1074 | octets[0] = uc_str[i] & 0x01; | |
1075 | fill_continuing_octets(5, octets + 1, length / 2, uc_str, i + 1,ucstr.size()); | |
1076 | unsigned char g = octets[0] << 6 | octets[1]; | |
1077 | unsigned char p = octets[2] << 2 | octets[3] >> 4; | |
1078 | unsigned char r = octets[3] << 4 | octets[4] >> 2; | |
1079 | unsigned char c = octets[4] << 6 | octets[5]; | |
1080 | if (g < 0x04) { | |
1081 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 6-octet " | |
1082 | "encoding for quadruple (%u, %u, %u, %u).", static_cast<int>(ucstr.size()), | |
1083 | static_cast<int>(i), g, p, r, c); | |
1084 | goto dec_error; | |
1085 | } | |
1086 | ucstr += ustring(g, p, r, c); | |
1087 | i += 6; | |
1088 | } | |
1089 | else { | |
1090 | // not used code points: FE and FF => malformed | |
1091 | ERROR("decode_utf8(): Malformed: At character position %d, octet position %d: " | |
1092 | "unused/reserved octet %02X.", static_cast<int>(ucstr.size()), | |
1093 | static_cast<int>(i), uc_str[i]); | |
1094 | goto dec_error; | |
1095 | } | |
1096 | } | |
1097 | ||
1098 | dec_error: | |
1099 | delete[] uc_str; | |
1100 | return ucstr; | |
1101 | } | |
1102 | ||
1103 | } |