Commit | Line | Data |
---|---|---|
d44e3c4f | 1 | /****************************************************************************** |
2 | * Copyright (c) 2000-2016 Ericsson Telecom AB | |
3 | * All rights reserved. This program and the accompanying materials | |
4 | * are made available under the terms of the Eclipse Public License v1.0 | |
5 | * which accompanies this distribution, and is available at | |
6 | * http://www.eclipse.org/legal/epl-v10.html | |
7 | * | |
8 | * Contributors: | |
9 | * Baji, Laszlo | |
10 | * Balasko, Jeno | |
11 | * Baranyi, Botond | |
12 | * Kovacs, Ferenc | |
13 | * Raduly, Csaba | |
14 | * Zalanyi, Balazs Andor | |
15 | * | |
16 | ******************************************************************************/ | |
970ed795 EL |
17 | #include "PredefFunc.hh" |
18 | #include "error.h" | |
19 | #include "Int.hh" | |
20 | #include "Real.hh" | |
21 | #include "Setting.hh" | |
22 | #include "string.hh" | |
23 | #include "ustring.hh" | |
24 | #include "CompilerError.hh" | |
25 | #include <stdio.h> | |
26 | #include <sys/types.h> | |
27 | #include <regex.h> | |
28 | #include <stdint.h> | |
29 | #include "../common/memory.h" | |
30 | #include "../common/pattern.hh" | |
31 | #include <iostream> | |
32 | ||
33 | // used by regex | |
34 | #define ERRMSG_BUFSIZE 512 | |
35 | ||
36 | namespace Common { | |
37 | ||
38 | static const char utf32be[] = {'0','0','0','0','F','E','F','F',0}; | |
39 | static const char utf32le[] = {'F','F','F','E','0','0','0','0',0}; | |
40 | static const char utf16be[] = {'F','E','F','F',0}; | |
41 | static const char utf16le[] = {'F','F','F','E',0}; | |
42 | static const char utf8[] = {'E','F','B','B','B','F',0}; | |
43 | ||
44 | static inline unsigned char get_bit_value(char c, unsigned char bit_value) | |
45 | { | |
46 | switch (c) { | |
47 | case '0': | |
48 | return 0; | |
49 | case '1': | |
50 | return bit_value; | |
51 | default: | |
52 | FATAL_ERROR("Invalid binary digit (%c) in bitstring value", c); | |
53 | return 0; | |
54 | } | |
55 | } | |
56 | ||
57 | char toupper (const char c) | |
58 | { | |
59 | if (('A' <= c && 'F' >= c) || | |
60 | ('0' <= c && '9' >= c)) return c; | |
61 | switch (c) | |
62 | { | |
63 | case 'a' : return 'A'; | |
64 | case 'b' : return 'B'; | |
65 | case 'c' : return 'C'; | |
66 | case 'd' : return 'D'; | |
67 | case 'e' : return 'E'; | |
68 | case 'f' : return 'F'; | |
69 | default: | |
70 | FATAL_ERROR("%c cannot be converted to hex character", c); | |
71 | break; | |
72 | } | |
73 | } | |
74 | ||
75 | char hexdigit_to_char(unsigned char hexdigit) | |
76 | { | |
77 | if (hexdigit < 10) return '0' + hexdigit; | |
78 | else if (hexdigit < 16) return 'A' + hexdigit - 10; | |
79 | else { | |
80 | FATAL_ERROR("hexdigit_to_char(): invalid argument: %d", hexdigit); | |
81 | return '\0'; // to avoid warning | |
82 | } | |
83 | } | |
84 | ||
85 | unsigned char char_to_hexdigit(char c) | |
86 | { | |
87 | if (c >= '0' && c <= '9') return c - '0'; | |
88 | else if (c >= 'A' && c <= 'F') return c - 'A' + 10; | |
89 | else if (c >= 'a' && c <= 'f') return c - 'a' + 10; | |
90 | else { | |
91 | FATAL_ERROR("char_to_hexdigit(): invalid argument: %c", c); | |
92 | return 0; // to avoid warning | |
93 | } | |
94 | } | |
95 | ||
96 | string uchar2str(unsigned char uchar) | |
97 | { | |
98 | char str[2]; | |
99 | str[0] = hexdigit_to_char(uchar / 16); | |
100 | str[1] = hexdigit_to_char(uchar % 16); | |
101 | return string(2, str); | |
102 | } | |
103 | ||
104 | unsigned char str2uchar(const char& c1, const char& c2) | |
105 | { | |
106 | unsigned char uc = 0; | |
107 | uc = char_to_hexdigit(c1); | |
108 | uc <<= 4; | |
109 | uc += char_to_hexdigit(c2); | |
110 | return uc; | |
111 | } | |
112 | ||
113 | int_val_t rem(const int_val_t& left, const int_val_t& right) | |
114 | { | |
115 | return (left - right * (left / right)); | |
116 | } | |
117 | ||
118 | int_val_t mod(const int_val_t& left, const int_val_t& right) | |
119 | { | |
120 | int_val_t r = right < 0 ? -right : right; | |
121 | if (left > 0) { | |
122 | return rem(left, r); | |
123 | } else { | |
124 | int_val_t result = rem(left, r); | |
125 | return result == 0 ? result : result + r; | |
126 | } | |
127 | } | |
128 | ||
129 | string* to_uppercase(const string& value) | |
130 | { | |
131 | string *s = new string(value); | |
132 | for (size_t i = 0; i < s->size(); i++) { | |
133 | char& c=(*s)[i]; | |
134 | if (c >= 'a' && c <= 'z') c = c - 'a' + 'A'; | |
135 | } | |
136 | return s; | |
137 | } | |
138 | ||
139 | string* not4b_bit(const string& bstr) | |
140 | { | |
141 | string *s=new string(bstr); | |
142 | for(size_t i=0; i<s->size(); i++) { | |
143 | char& c=(*s)[i]; | |
144 | switch(c) { | |
145 | case '0': c='1'; break; | |
146 | case '1': c='0'; break; | |
147 | default: | |
148 | FATAL_ERROR("not4b_bit(): Invalid char in bitstring."); | |
149 | } // switch c | |
150 | } // for i | |
151 | return s; | |
152 | } | |
153 | ||
154 | string* not4b_hex(const string& hstr) | |
155 | { | |
156 | string *s=new string(hstr); | |
157 | for(size_t i=0; i<s->size(); i++) { | |
158 | char& c=(*s)[i]; | |
159 | switch(c) { | |
160 | case '0': c='F'; break; | |
161 | case '1': c='E'; break; | |
162 | case '2': c='D'; break; | |
163 | case '3': c='C'; break; | |
164 | case '4': c='B'; break; | |
165 | case '5': c='A'; break; | |
166 | case '6': c='9'; break; | |
167 | case '7': c='8'; break; | |
168 | case '8': c='7'; break; | |
169 | case '9': c='6'; break; | |
170 | case 'A': c='5'; break; | |
171 | case 'B': c='4'; break; | |
172 | case 'C': c='3'; break; | |
173 | case 'D': c='2'; break; | |
174 | case 'E': c='1'; break; | |
175 | case 'F': c='0'; break; | |
176 | case 'a': c='5'; break; | |
177 | case 'b': c='4'; break; | |
178 | case 'c': c='3'; break; | |
179 | case 'd': c='2'; break; | |
180 | case 'e': c='1'; break; | |
181 | case 'f': c='0'; break; | |
182 | default: | |
183 | FATAL_ERROR("not4b_hex(): Invalid char in hexstring."); | |
184 | } // switch c | |
185 | } // for i | |
186 | return s; | |
187 | } | |
188 | ||
189 | string* and4b(const string& left, const string& right) | |
190 | { | |
191 | string *s=new string(left); | |
192 | for(size_t i=0; i<s->size(); i++) { | |
193 | char& c=(*s)[i]; | |
194 | c=hexdigit_to_char(char_to_hexdigit(c) & char_to_hexdigit(right[i])); | |
195 | } // for i | |
196 | return s; | |
197 | } | |
198 | ||
199 | string* or4b(const string& left, const string& right) | |
200 | { | |
201 | string *s=new string(left); | |
202 | for(size_t i=0; i<s->size(); i++) { | |
203 | char& c=(*s)[i]; | |
204 | c=hexdigit_to_char(char_to_hexdigit(c) | char_to_hexdigit(right[i])); | |
205 | } // for i | |
206 | return s; | |
207 | } | |
208 | ||
209 | string* xor4b(const string& left, const string& right) | |
210 | { | |
211 | string *s=new string(left); | |
212 | for(size_t i=0; i<s->size(); i++) { | |
213 | char& c=(*s)[i]; | |
214 | c=hexdigit_to_char(char_to_hexdigit(c) ^ char_to_hexdigit(right[i])); | |
215 | } // for i | |
216 | return s; | |
217 | } | |
218 | ||
219 | string* shift_left(const string& value, const Int& count) | |
220 | { | |
221 | if (count > 0) { | |
222 | string *s = new string; | |
223 | if (count < static_cast<Int>(value.size())) *s = value.substr(count); | |
224 | s->resize(value.size(), '0'); | |
225 | return s; | |
226 | } else if (count < 0) return shift_right(value, -count); | |
227 | else return new string(value); | |
228 | } | |
229 | ||
230 | string* shift_right(const string& value, const Int& count) | |
231 | { | |
232 | if (count > 0) { | |
233 | string *s = new string; | |
234 | if (count < static_cast<Int>(value.size())) { | |
235 | s->resize(count, '0'); | |
236 | *s += value.substr(0, value.size()-count); | |
237 | } else s->resize(value.size(), '0'); | |
238 | return s; | |
239 | } else if (count < 0) return shift_left(value, -count); | |
240 | else return new string(value); | |
241 | } | |
242 | ||
243 | string* rotate_left(const string& value, const Int& p_count) | |
244 | { | |
245 | size_t size = value.size(); | |
246 | if (size == 0) return new string(value); | |
247 | else if (p_count < 0) return rotate_right(value, -p_count); | |
248 | size_t count = p_count % size; | |
249 | if (count == 0) return new string(value); | |
250 | else return new string(value.substr(count) + value.substr(0, count)); | |
251 | } | |
252 | ||
253 | string* rotate_right(const string& value, const Int& p_count) | |
254 | { | |
255 | size_t size = value.size(); | |
256 | if (size == 0) return new string(value); | |
257 | else if (p_count < 0) return rotate_left(value, -p_count); | |
258 | size_t count = p_count % size; | |
259 | if (count == 0) return new string(value); | |
260 | else return new string(value.substr(size - count) + | |
261 | value.substr(0, size - count)); | |
262 | } | |
263 | ||
264 | ||
265 | ustring* rotate_left(const ustring& value, const Int& p_count) | |
266 | { | |
267 | size_t size = value.size(); | |
268 | if (size == 0) return new ustring(value); | |
269 | else if (p_count < 0) return rotate_right(value, -p_count); | |
270 | size_t count = p_count % size; | |
271 | if (count == 0) return new ustring(value); | |
272 | else return new ustring(value.substr(count) + value.substr(0, count)); | |
273 | } | |
274 | ||
275 | ustring* rotate_right(const ustring& value, const Int& p_count) | |
276 | { | |
277 | size_t size = value.size(); | |
278 | if (size == 0) return new ustring(value); | |
279 | else if (p_count < 0) return rotate_left(value, -p_count); | |
280 | size_t count = p_count % size; | |
281 | if (count == 0) return new ustring(value); | |
282 | else return new ustring(value.substr(size - count) + | |
283 | value.substr(0, size - count)); | |
284 | } | |
285 | ||
286 | int_val_t* bit2int(const string& bstr) | |
287 | { | |
288 | size_t nof_bits = bstr.size(); | |
289 | // skip the leading zeros | |
290 | size_t start_index = 0; | |
291 | while (start_index < nof_bits && bstr[start_index] == '0') start_index++; | |
292 | int_val_t *ret_val = new int_val_t((Int)0); | |
293 | for (size_t i = start_index; i < nof_bits; i++) { | |
294 | *ret_val <<= 1; | |
295 | if (bstr[i] == '1') *ret_val += 1; | |
296 | } | |
297 | return ret_val; | |
298 | } | |
299 | ||
300 | int_val_t* hex2int(const string& hstr) | |
301 | { | |
302 | size_t nof_digits = hstr.size(); | |
303 | size_t start_index = 0; | |
304 | // Skip the leading zeros. | |
305 | while (start_index < nof_digits && hstr[start_index] == '0') | |
306 | start_index++; | |
307 | int_val_t *ret_val = new int_val_t((Int)0); | |
308 | for (size_t i = start_index; i < nof_digits; i++) { | |
309 | *ret_val <<= 4; | |
310 | *ret_val += char_to_hexdigit(hstr[i]); | |
311 | } | |
312 | return ret_val; | |
313 | } | |
314 | ||
315 | Int unichar2int(const ustring& ustr) | |
316 | { | |
317 | if (ustr.size() != 1) FATAL_ERROR("unichar2int(): invalid argument"); | |
318 | const ustring::universal_char& uchar = ustr.u_str()[0]; | |
319 | Int ret_val = (uchar.group << 24) | (uchar.plane << 16) | (uchar.row << 8) | | |
320 | uchar.cell; | |
321 | return ret_val; | |
322 | } | |
323 | ||
324 | string *int2bit(const int_val_t& value, const Int& length) | |
325 | { | |
326 | if (length < 0) FATAL_ERROR("int2bit(): negative length"); | |
327 | size_t string_length = static_cast<size_t>(length); | |
328 | if (static_cast<Int>(string_length) != length || | |
329 | string_length > string::max_string_len) | |
330 | FATAL_ERROR("int2bit(): length is too large"); | |
331 | if (value < 0) FATAL_ERROR("int2bit(): negative value"); | |
332 | string *bstr = new string; | |
333 | bstr->resize(string_length); | |
334 | int_val_t tmp_value = value; | |
335 | for (size_t i = 1; i <= string_length; i++) { | |
336 | (*bstr)[string_length - i] = (tmp_value & 1).get_val() ? '1' : '0'; | |
337 | tmp_value >>= 1; | |
338 | } | |
339 | if (tmp_value != 0) | |
340 | FATAL_ERROR("int2bit(): %s does not fit in %lu bits", \ | |
341 | value.t_str().c_str(), (unsigned long)string_length); | |
342 | return bstr; | |
343 | } | |
344 | ||
345 | static const char hdigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', | |
346 | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; | |
347 | ||
348 | string *int2hex(const int_val_t& value, const Int& length) | |
349 | { | |
350 | if (length < 0) | |
351 | FATAL_ERROR("int2hex(): negative length"); | |
352 | size_t string_length = static_cast<size_t>(length); | |
353 | if (static_cast<Int>(string_length) != length || | |
354 | string_length > string::max_string_len) | |
355 | FATAL_ERROR("int2hex(): length is too large"); | |
356 | if (value < 0) FATAL_ERROR("int2hex(): negative value"); | |
357 | string *hstr = new string; | |
358 | hstr->resize(string_length); | |
359 | int_val_t tmp_value = value; | |
360 | for (size_t i = 1; i <= string_length; i++) { | |
361 | (*hstr)[string_length - i] = hdigits[(tmp_value & 0x0f).get_val()]; | |
362 | tmp_value >>= 4; | |
363 | } | |
364 | if (tmp_value != 0) { | |
365 | FATAL_ERROR("int2hex(): %s does not fit in %lu hexadecimal digits", | |
366 | value.t_str().c_str(), (unsigned long)string_length); | |
367 | } | |
368 | return hstr; | |
369 | } | |
370 | ||
371 | ustring *int2unichar(const Int& value) | |
372 | { | |
373 | if (value < 0 || value > 2147483647) | |
374 | FATAL_ERROR("int2unichar(): invalid argument"); | |
375 | unsigned char group = (value >> 24) & 0xFF, | |
376 | plane = (value >> 16) & 0xFF, | |
377 | row = (value >> 8) & 0xFF, | |
378 | cell = value & 0xFF; | |
379 | return new ustring(group, plane, row, cell); | |
380 | } | |
381 | ||
382 | string *oct2char(const string& ostr) | |
383 | { | |
384 | string *cstr = new string; | |
385 | size_t ostr_size = ostr.size(); | |
386 | if (ostr_size % 2) | |
387 | FATAL_ERROR("oct2char(): argument has odd length: %lu", | |
388 | (unsigned long) ostr_size); | |
389 | size_t cstr_size = ostr_size / 2; | |
390 | cstr->resize(cstr_size); | |
391 | const char *ostr_ptr = ostr.c_str(); | |
392 | for (size_t i = 0; i < cstr_size; i++) { | |
393 | unsigned char c = 16 * char_to_hexdigit(ostr_ptr[2 * i]) + | |
394 | char_to_hexdigit(ostr_ptr[2 * i + 1]); | |
395 | if (c > 127) FATAL_ERROR("oct2char(): resulting charstring contains " \ | |
396 | "non-ascii character: %d", c); | |
397 | (*cstr)[i] = c; | |
398 | } | |
399 | return cstr; | |
400 | } | |
401 | ||
402 | string *char2oct(const string& cstr) | |
403 | { | |
404 | string *ostr = new string; | |
405 | size_t cstr_size = cstr.size(); | |
406 | ostr->resize(cstr_size * 2, '0'); | |
407 | const char *cstr_ptr = cstr.c_str(); | |
408 | for (size_t i = 0; i < cstr_size; i++) { | |
409 | unsigned char c = cstr_ptr[i]; | |
410 | (*ostr)[2 * i] = hexdigit_to_char(c / 16); | |
411 | (*ostr)[2 * i + 1] = hexdigit_to_char(c % 16); | |
412 | } | |
413 | return ostr; | |
414 | } | |
415 | ||
416 | string *bit2hex(const string& bstr) | |
417 | { | |
418 | size_t size=bstr.size(); | |
419 | size_t hsize=(size+3)/4; | |
420 | string *hstr = new string; | |
421 | string *bstr4=NULL; | |
422 | if(size%4) { | |
423 | bstr4=new string; | |
424 | bstr4->resize(hsize*4,'0'); | |
425 | bstr4->replace(4-(size%4),size,bstr); | |
426 | } | |
427 | hstr->resize(hsize,'0'); | |
428 | string b4(4,"0000"); | |
429 | for(size_t i=0;i<hsize;i++) { | |
430 | unsigned int u; | |
431 | if(size%4)b4=bstr4->substr(i*4,4); | |
432 | else b4=bstr.substr(i*4,4); | |
433 | if(b4[0]=='1')u=8;else u=0; | |
434 | if(b4[1]=='1')u+=4; | |
435 | if(b4[2]=='1')u+=2; | |
436 | if(b4[3]=='1')u++; | |
437 | (*hstr)[i]=hdigits[u]; | |
438 | } | |
439 | if(bstr4!=NULL)delete bstr4; | |
440 | return hstr; | |
441 | } | |
442 | ||
443 | string *hex2oct(const string& hstr) | |
444 | { | |
445 | if(hstr.size()%2==0)return new string(hstr); | |
446 | else { | |
447 | string *ostr=new string("0"); | |
448 | (*ostr)+=hstr; | |
449 | return ostr; | |
450 | } | |
451 | } | |
452 | ||
453 | string *asn_hex2oct(const string& hstr) | |
454 | { | |
455 | string *ostr = new string(hstr); | |
456 | size_t size = ostr->size(); | |
457 | if (size % 2) ostr->resize(size + 1, '0'); | |
458 | return ostr; | |
459 | } | |
460 | ||
461 | string *bit2oct(const string& bstr) | |
462 | { | |
463 | string *s1,*s2; | |
464 | s1=bit2hex(bstr); | |
465 | s2=hex2oct(*s1); | |
466 | delete s1; | |
467 | return s2; | |
468 | } | |
469 | ||
470 | string *asn_bit2oct(const string& bstr) | |
471 | { | |
472 | size_t size = bstr.size(); | |
473 | string *ostr = new string; | |
474 | ostr->resize(((size+7)/8)*2); | |
475 | for(size_t i=0, j=0; i<size; ) { | |
476 | unsigned char digit1=0, digit2=0; | |
477 | digit1 += get_bit_value(bstr[i++], 8); | |
478 | if (i < size) { | |
479 | digit1 += get_bit_value(bstr[i++], 4); | |
480 | if (i < size) { | |
481 | digit1 += get_bit_value(bstr[i++], 2); | |
482 | if (i < size) { | |
483 | digit1 += get_bit_value(bstr[i++], 1); | |
484 | if (i < size) { | |
485 | digit2 += get_bit_value(bstr[i++], 8); | |
486 | if (i < size) { | |
487 | digit2 += get_bit_value(bstr[i++], 4); | |
488 | if (i < size) { | |
489 | digit2 += get_bit_value(bstr[i++], 2); | |
490 | if (i < size) digit2 += get_bit_value(bstr[i++], 1); | |
491 | } | |
492 | } | |
493 | } | |
494 | } | |
495 | } | |
496 | } | |
497 | (*ostr)[j++] = hexdigit_to_char(digit1); | |
498 | (*ostr)[j++] = hexdigit_to_char(digit2); | |
499 | } | |
500 | return ostr; | |
501 | } | |
502 | ||
503 | string *hex2bit(const string& hstr) | |
504 | { | |
505 | size_t size=hstr.size(); | |
506 | string *bstr = new string; | |
507 | bstr->resize(4*size); | |
508 | for(size_t i=0; i<size; i++) { | |
509 | switch(hstr[i]) { | |
510 | case '0': | |
511 | bstr->replace(4*i, 4, "0000"); | |
512 | break; | |
513 | case '1': | |
514 | bstr->replace(4*i, 4, "0001"); | |
515 | break; | |
516 | case '2': | |
517 | bstr->replace(4*i, 4, "0010"); | |
518 | break; | |
519 | case '3': | |
520 | bstr->replace(4*i, 4, "0011"); | |
521 | break; | |
522 | case '4': | |
523 | bstr->replace(4*i, 4, "0100"); | |
524 | break; | |
525 | case '5': | |
526 | bstr->replace(4*i, 4, "0101"); | |
527 | break; | |
528 | case '6': | |
529 | bstr->replace(4*i, 4, "0110"); | |
530 | break; | |
531 | case '7': | |
532 | bstr->replace(4*i, 4, "0111"); | |
533 | break; | |
534 | case '8': | |
535 | bstr->replace(4*i, 4, "1000"); | |
536 | break; | |
537 | case '9': | |
538 | bstr->replace(4*i, 4, "1001"); | |
539 | break; | |
540 | case 'A': | |
541 | case 'a': | |
542 | bstr->replace(4*i, 4, "1010"); | |
543 | break; | |
544 | case 'B': | |
545 | case 'b': | |
546 | bstr->replace(4*i, 4, "1011"); | |
547 | break; | |
548 | case 'C': | |
549 | case 'c': | |
550 | bstr->replace(4*i, 4, "1100"); | |
551 | break; | |
552 | case 'D': | |
553 | case 'd': | |
554 | bstr->replace(4*i, 4, "1101"); | |
555 | break; | |
556 | case 'E': | |
557 | case 'e': | |
558 | bstr->replace(4*i, 4, "1110"); | |
559 | break; | |
560 | case 'F': | |
561 | case 'f': | |
562 | bstr->replace(4*i, 4, "1111"); | |
563 | break; | |
564 | default: | |
565 | FATAL_ERROR("Common::hex2bit(): invalid hexadecimal " | |
566 | "digit in hexstring value"); | |
567 | } | |
568 | } | |
569 | return bstr; | |
570 | } | |
571 | ||
572 | int_val_t* float2int(const Real& value, const Location& loc) | |
573 | { | |
574 | // We shouldn't mimic generality with `Int'. | |
575 | if (value >= (Real)LLONG_MIN && value <= (Real)LLONG_MAX) | |
576 | return new int_val_t((Int)value); | |
577 | char buf[512] = ""; | |
578 | snprintf(buf, 511, "%f", value); | |
579 | char *dot = strchr(buf, '.'); | |
580 | if (!dot) FATAL_ERROR("Conversion of float value `%f' to integer failed", value); | |
581 | else memset(dot, 0, sizeof(buf) - (dot - buf)); | |
582 | return new int_val_t(buf, loc); | |
583 | } | |
584 | ||
585 | /* TTCN-3 float values that have absolute value smaller than this are | |
586 | displayed in exponential notation. Same as in core/Float.hh */ | |
587 | #ifndef MIN_DECIMAL_FLOAT | |
588 | #define MIN_DECIMAL_FLOAT 1.0E-4 | |
589 | #endif | |
590 | /* TTCN-3 float values that have absolute value larger or equal than | |
591 | this are displayed in exponential notation. Same as in | |
592 | core/Float.hh */ | |
593 | #ifndef MAX_DECIMAL_FLOAT | |
594 | #define MAX_DECIMAL_FLOAT 1.0E+10 | |
595 | #endif | |
596 | ||
597 | string *float2str(const Real& value) | |
598 | { | |
599 | char str_buf[64]; | |
600 | if ( (value > -MAX_DECIMAL_FLOAT && value <= -MIN_DECIMAL_FLOAT) | |
601 | || (value >= MIN_DECIMAL_FLOAT && value < MAX_DECIMAL_FLOAT) | |
602 | || (value == 0.0)) | |
603 | snprintf(str_buf,64,"%f",value); | |
604 | else snprintf(str_buf,64,"%e",value); | |
605 | return new string(str_buf); | |
606 | } | |
607 | ||
608 | string* regexp(const string& instr, const string& expression, | |
609 | const Int& groupno) | |
610 | { | |
611 | string *retval=0; | |
612 | ||
613 | if(groupno<0) { | |
614 | FATAL_ERROR("regexp(): groupno must be a non-negative integer"); | |
615 | return retval; | |
616 | } | |
617 | // do not report the warnings again | |
618 | // they were already reported while checking the operands | |
619 | unsigned orig_verb_level = verb_level; | |
620 | verb_level &= ~(1|2); | |
621 | char *posix_str=TTCN_pattern_to_regexp(expression.c_str()); | |
622 | verb_level = orig_verb_level; | |
623 | if(posix_str==NULL) { | |
624 | FATAL_ERROR("regexp(): Cannot convert pattern `%s' to POSIX-equivalent.", | |
625 | expression.c_str()); | |
626 | return retval; | |
627 | } | |
628 | ||
629 | regex_t posix_regexp; | |
630 | int ret_val=regcomp(&posix_regexp, posix_str, REG_EXTENDED); | |
631 | Free(posix_str); | |
632 | if(ret_val!=0) { | |
633 | /* regexp error */ | |
634 | char msg[ERRMSG_BUFSIZE]; | |
635 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
636 | FATAL_ERROR("regexp(): regcomp() failed: %s", msg); | |
637 | return retval; | |
638 | } | |
639 | ||
640 | size_t nmatch=groupno+1; | |
641 | if(nmatch>posix_regexp.re_nsub) { | |
642 | FATAL_ERROR("regexp(): requested groupno is %lu, but this expression " | |
643 | "contains only %lu group(s).", (unsigned long) (nmatch - 1), | |
644 | (unsigned long) posix_regexp.re_nsub); | |
645 | return retval; | |
646 | } | |
647 | regmatch_t* pmatch=(regmatch_t*)Malloc((nmatch+1)*sizeof(regmatch_t)); | |
648 | ret_val=regexec(&posix_regexp, instr.c_str(), nmatch+1, pmatch, 0); | |
649 | if(ret_val==0) { | |
650 | if(pmatch[nmatch].rm_so != -1 && pmatch[nmatch].rm_eo != -1) | |
651 | retval = new string(instr.substr(pmatch[nmatch].rm_so, | |
652 | pmatch[nmatch].rm_eo - pmatch[nmatch].rm_so)); | |
653 | else retval=new string(); | |
654 | } | |
655 | Free(pmatch); | |
656 | if(ret_val!=0) { | |
657 | if(ret_val==REG_NOMATCH) { | |
658 | regfree(&posix_regexp); | |
659 | retval=new string(); | |
660 | } | |
661 | else { | |
662 | /* regexp error */ | |
663 | char msg[ERRMSG_BUFSIZE]; | |
664 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
665 | FATAL_ERROR("regexp(): regexec() failed: %s", msg); | |
666 | } | |
667 | } | |
668 | else regfree(&posix_regexp); | |
669 | ||
670 | return retval; | |
671 | } | |
672 | ||
673 | ustring* regexp(const ustring& instr, const ustring& expression, | |
674 | const Int& groupno) | |
675 | { | |
676 | ustring *retval=0; | |
677 | ||
678 | if(groupno<0) { | |
679 | FATAL_ERROR("regexp(): groupno must be a non-negative integer"); | |
680 | return retval; | |
681 | } | |
682 | // do not report the warnings again | |
683 | // they were already reported while checking the operands | |
684 | unsigned orig_verb_level = verb_level; | |
685 | verb_level &= ~(1|2); | |
686 | int* user_groups; | |
687 | char *posix_str = TTCN_pattern_to_regexp_uni( | |
688 | expression.get_stringRepr_for_pattern().c_str(), &user_groups); | |
689 | if (user_groups == 0) | |
690 | FATAL_ERROR("regexp(): Cannot find any groups in the second argument."); | |
691 | verb_level = orig_verb_level; | |
692 | if(posix_str==NULL) { | |
693 | FATAL_ERROR("regexp(): Cannot convert pattern `%s' to POSIX-equivalent.", | |
694 | expression.get_stringRepr().c_str()); | |
695 | return retval; | |
696 | } | |
697 | ||
698 | regex_t posix_regexp; | |
699 | int ret_val=regcomp(&posix_regexp, posix_str, REG_EXTENDED); | |
700 | Free(posix_str); | |
701 | if(ret_val!=0) { | |
702 | /* regexp error */ | |
703 | char msg[ERRMSG_BUFSIZE]; | |
704 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
705 | FATAL_ERROR("regexp(): regcomp() failed: %s", msg); | |
706 | return retval; | |
707 | } | |
708 | ||
709 | size_t nmatch=user_groups[groupno+1]+1; | |
710 | if(nmatch>posix_regexp.re_nsub) { | |
711 | FATAL_ERROR("regexp(): requested groupno is %lu, but this expression " | |
712 | "contains only %lu group(s).", (unsigned long) (groupno), | |
713 | (unsigned long) user_groups[0]); | |
714 | return retval; | |
715 | } | |
716 | ||
717 | Free(user_groups); | |
718 | ||
719 | regmatch_t* pmatch = (regmatch_t*)Malloc((nmatch+1)*sizeof(regmatch_t)); | |
720 | char* tmp = instr.convert_to_regexp_form(); | |
721 | string instr_conv(tmp); | |
722 | Free(tmp); | |
723 | ret_val = regexec(&posix_regexp, instr_conv.c_str(), nmatch+1, pmatch, 0); | |
724 | if(ret_val == 0) { | |
725 | if(pmatch[nmatch].rm_so != -1 && pmatch[nmatch].rm_eo != -1) { | |
726 | retval = new ustring( | |
727 | instr_conv.substr(pmatch[nmatch].rm_so, | |
728 | pmatch[nmatch].rm_eo - pmatch[nmatch].rm_so) | |
729 | .convert_stringRepr_for_pattern()); | |
730 | } else { retval = new ustring(); } | |
731 | } | |
732 | Free(pmatch); | |
733 | if(ret_val!=0) { | |
734 | if(ret_val==REG_NOMATCH) { | |
735 | regfree(&posix_regexp); | |
736 | retval=new ustring(); | |
737 | } | |
738 | else { | |
739 | /* regexp error */ | |
740 | char msg[ERRMSG_BUFSIZE]; | |
741 | regerror(ret_val, &posix_regexp, msg, sizeof(msg)); | |
742 | FATAL_ERROR("regexp(): regexec() failed: %s", msg); | |
743 | } | |
744 | } | |
745 | else regfree(&posix_regexp); | |
746 | ||
747 | return retval; | |
748 | } | |
749 | ||
750 | string* remove_bom(const string& encoded_value) | |
751 | { | |
752 | size_t length = encoded_value.size(); | |
753 | if (0 == length) return new string(); | |
86be9305 | 754 | if (length % 2) { |
970ed795 EL |
755 | ERROR("remove_bom(): Wrong string. The number of nibbles (%d) in string " |
756 | "shall be divisible by 2", static_cast<int>(length)); | |
757 | return new string(encoded_value); | |
758 | } | |
759 | ||
760 | int length_of_BOM = 0; | |
761 | string str_uppercase(encoded_value); | |
762 | size_t enough = length > sizeof(utf32be)-1 ? sizeof(utf32be)-1 : length; | |
763 | for (size_t i = 0; i < enough; ++i) { | |
764 | str_uppercase[i] = toupper(encoded_value[i]); | |
765 | } | |
766 | ||
767 | if (str_uppercase.find(utf32be, 0) < length) length_of_BOM = sizeof(utf32be)-1; | |
768 | else if (str_uppercase.find(utf32le, 0) < length) length_of_BOM = sizeof(utf32le)-1; | |
769 | else if (str_uppercase.find(utf16be, 0) < length) length_of_BOM = sizeof(utf16be)-1; | |
770 | else if (str_uppercase.find(utf16le, 0) < length) length_of_BOM = sizeof(utf16le)-1; | |
771 | else if (str_uppercase.find(utf8, 0) < length) length_of_BOM = sizeof(utf8)-1; | |
772 | else return new string(encoded_value); // no BOM found | |
773 | ||
774 | return new string(encoded_value.substr(length_of_BOM, length)); | |
775 | } | |
776 | ||
777 | static CharCoding::CharCodingType is_ascii (size_t length, const unsigned char* strptr) | |
778 | { | |
779 | const unsigned char nonASCII = 1 << 7;// MSB is 1 in case of non ASCII character | |
780 | CharCoding::CharCodingType ret = CharCoding::ASCII; | |
781 | for (size_t i = 0; i < length; ++i) { | |
782 | if ( strptr[i] & nonASCII) { | |
783 | ret = CharCoding::UNKNOWN; | |
784 | break; | |
785 | } | |
786 | } | |
787 | return ret; | |
788 | } | |
789 | ||
790 | static CharCoding::CharCodingType is_utf8(size_t length, const unsigned char* strptr) | |
791 | { | |
792 | const char MSB = 1 << 7; // MSB is 1 in case of non ASCII character | |
793 | const char MSBmin1 = 1 << 6; // 0100 0000 | |
794 | size_t i = 0; | |
795 | while (length > i) { | |
796 | if ( strptr[i] & MSB) { // non ASCII char | |
797 | char maskUTF8 = 1 << 6; // 111x xxxx shows how many additional bytes are there | |
798 | if (!(strptr[i] & maskUTF8)) return CharCoding::UNKNOWN; // accepted 11xxx xxxx but received 10xx xxxx | |
799 | unsigned int noofUTF8 = 0; // 11xx xxxxx -> 2 bytes, 111x xxxxx -> 3 bytes , 1111 xxxxx -> 4 bytes in UTF-8 | |
800 | while (strptr[i] & maskUTF8) { | |
801 | ++noofUTF8; | |
802 | maskUTF8 >>= 1; // shift right the mask | |
803 | } | |
804 | // the second and third (and so on) UTF-8 byte looks like 10xx xxxx | |
805 | while (0 < noofUTF8 ) { | |
806 | ++i; | |
807 | if (!(strptr[i] & MSB) || (strptr[i] & MSBmin1) || i >= length) { // if not like this: 10xx xxxx | |
808 | return CharCoding::UNKNOWN; | |
809 | } | |
810 | --noofUTF8; | |
811 | } | |
812 | } | |
813 | ++i; | |
814 | } | |
815 | return CharCoding::UTF_8; | |
816 | } | |
817 | ||
818 | string* get_stringencoding(const string& encoded_value) | |
819 | { | |
820 | size_t length = encoded_value.size(); | |
821 | if (0 == length) return new string("<unknown>"); | |
86be9305 | 822 | if (length % 2) { |
970ed795 EL |
823 | ERROR("get_stringencoding(): Wrong string. The number of nibbles (%d) in string " |
824 | "shall be divisible by 2", static_cast<int>(length)); | |
825 | return new string("<unknown>"); | |
826 | } | |
827 | ||
828 | string str_uppercase(encoded_value); | |
829 | size_t enough = length > sizeof(utf32be)-1 ? sizeof(utf32be)-1 : length; | |
830 | for (size_t i = 0; i < enough; ++i) { | |
831 | str_uppercase[i] = toupper(encoded_value[i]); | |
832 | } | |
833 | ||
834 | if (str_uppercase.find(utf32be, 0) < length) return new string("UTF-32BE"); | |
835 | else if (str_uppercase.find(utf32le, 0) < length) return new string("UTF-32LE"); | |
836 | else if (str_uppercase.find(utf16be, 0) < length) return new string("UTF-16BE"); | |
837 | else if (str_uppercase.find(utf16le, 0) < length) return new string("UTF-16LE"); | |
838 | else if (str_uppercase.find(utf8, 0) < length) return new string("UTF-8"); | |
839 | ||
840 | unsigned char *uc_str = new unsigned char[length/2]; | |
841 | string ret; | |
842 | for (size_t i = 0; i < length / 2; ++i) { | |
843 | uc_str[i] = str2uchar(encoded_value[2 * i], encoded_value[2 * i + 1]); | |
844 | } | |
845 | if (is_ascii (length / 2, uc_str) == CharCoding::ASCII) ret = "ASCII"; | |
846 | else if (CharCoding::UTF_8 == is_utf8 (length / 2, uc_str)) ret = "UTF-8"; | |
847 | else ret = "<unknown>"; | |
848 | ||
849 | delete [] uc_str; | |
850 | return new string(ret); | |
851 | } | |
852 | ||
853 | static size_t check_BOM(CharCoding::CharCodingType expected_coding, size_t n_uc, unsigned char* uc_str) | |
854 | { | |
855 | if (0 == n_uc) return 0; | |
856 | ||
857 | switch (expected_coding) { | |
858 | case CharCoding::UTF32: | |
859 | case CharCoding::UTF32BE: | |
860 | case CharCoding::UTF32LE: | |
861 | if (4 > n_uc) { | |
862 | ERROR("decode_utf32(): The string is shorter than the expected BOM"); | |
863 | return 0; | |
864 | } | |
865 | break; | |
866 | case CharCoding::UTF16: | |
867 | case CharCoding::UTF16BE: | |
868 | case CharCoding::UTF16LE: | |
869 | if (2 > n_uc) { | |
870 | ERROR("decode_utf16(): The string is shorter than the expected BOM"); | |
871 | return 0; | |
872 | } | |
873 | break; | |
874 | default: break; | |
875 | } | |
876 | ||
877 | //BOM indicates that the byte order is determined by a byte order mark, | |
878 | //if present at the beginning the length of BOM is returned. | |
879 | bool badBOM = false; | |
880 | string errmsg; | |
881 | string caller; | |
882 | switch (expected_coding) { | |
883 | case CharCoding::UTF32BE: | |
884 | case CharCoding::UTF32: | |
885 | if (0x00 == uc_str[0] && 0x00 == uc_str[1] && 0xFE == uc_str[2] && 0xFF == uc_str[3]) | |
886 | return 4; | |
887 | badBOM = true; | |
888 | caller = "decode_utf32()"; | |
889 | errmsg = "UTF-32BE"; | |
890 | break; | |
891 | case CharCoding::UTF32LE: | |
892 | if (0xFF == uc_str[0] && 0xFE == uc_str[1] && 0x00 == uc_str[2] && 0x00 == uc_str[3]) | |
893 | return 4; | |
894 | badBOM = true; | |
895 | caller = "decode_utf32()"; | |
896 | errmsg = "UTF-32LE"; | |
897 | break; | |
898 | case CharCoding::UTF16BE: | |
899 | case CharCoding::UTF16: | |
900 | if (0xFE == uc_str[0] && 0xFF == uc_str[1]) | |
901 | return 2; | |
902 | badBOM = true; | |
903 | caller = "decode_utf16()"; | |
904 | errmsg = "UTF-16BE"; | |
905 | break; | |
906 | case CharCoding::UTF16LE: | |
907 | if (0xFF == uc_str[0] && 0xFE == uc_str[1]) | |
908 | return 2; | |
909 | badBOM = true; | |
910 | caller = "decode_utf16()"; | |
911 | errmsg = "UTF-16LE"; | |
912 | break; | |
913 | case CharCoding::UTF_8: | |
914 | if (0xEF == uc_str[0] && 0xBB == uc_str[1] && 0xBF == uc_str[2]) | |
915 | return 3; | |
916 | return 0; | |
917 | default: | |
918 | if (CharCoding::UTF32 == expected_coding || CharCoding::UTF16 == expected_coding) { | |
919 | const char* str = CharCoding::UTF32 == expected_coding ? "UTF-32" : "UTF-16"; | |
920 | ERROR("Wrong %s string. No BOM detected, however the given coding type (%s) " | |
921 | "expects it to define the endianness", str, str); | |
922 | } | |
923 | else { | |
924 | ERROR("Wrong string. No BOM detected"); | |
925 | } | |
926 | } | |
927 | if (badBOM) ERROR("%s: Wrong %s string. The expected coding could not be verified", | |
928 | caller.c_str(), errmsg.c_str()); | |
929 | return 0; | |
930 | } | |
931 | ||
932 | static void fill_continuing_octets(int n_continuing, unsigned char *continuing_ptr, | |
933 | size_t n_uc, const unsigned char* uc_str, int start_pos, | |
934 | int uchar_pos) | |
935 | { | |
936 | for (int i = 0; i < n_continuing; i++) { | |
937 | if (start_pos + i < static_cast<int>(n_uc)) { | |
938 | unsigned char octet = uc_str[start_pos + i]; | |
939 | if ((octet & 0xC0) != 0x80) { | |
940 | ERROR("decode_utf8(): Malformed: At character position %u, octet position %u: %02X is " | |
941 | "not a valid continuing octet.", uchar_pos, start_pos + i, octet); | |
942 | return; | |
943 | } | |
944 | continuing_ptr[i] = octet & 0x3F; | |
945 | } | |
946 | else { | |
947 | if (start_pos + i == static_cast<int>(n_uc)) { | |
948 | if (i > 0) { | |
949 | // only a part of octets is missing | |
950 | ERROR("decode_utf8(): Incomplete: At character position %d, octet position %d: %d out " | |
951 | "of %d continuing octets %s missing from the end of the stream.", | |
952 | uchar_pos, start_pos + i, n_continuing - i, n_continuing, | |
953 | n_continuing - i > 1 ? "are" : "is"); | |
954 | return; | |
955 | } | |
956 | else { | |
957 | // all octets are missing | |
958 | ERROR("decode_utf8(): Incomplete: At character position %d, octet position %d: %d " | |
959 | "continuing octet%s missing from the end of the stream.", uchar_pos, | |
960 | start_pos, n_continuing, n_continuing > 1 ? "s are" : " is"); | |
961 | return; | |
962 | } | |
963 | } | |
964 | continuing_ptr[i] = 0; | |
965 | } | |
966 | } | |
967 | } | |
968 | ||
969 | ustring decode_utf8(const string & ostr, CharCoding::CharCodingType expected_coding) | |
970 | { | |
971 | size_t length = ostr.size(); | |
972 | if (0 == length) return ustring(); | |
973 | if (length % 2) { | |
974 | ERROR("decode_utf8(): Wrong UTF-8 string. The number of nibbles (%d) in octetstring " | |
975 | "shall be divisible by 2", static_cast<int>(length)); | |
976 | return ustring(); | |
977 | } | |
978 | ||
979 | unsigned char *uc_str = new unsigned char[length/2]; | |
980 | for (size_t i = 0; i < length / 2; ++i) { | |
981 | uc_str[i] = str2uchar(ostr[2 * i], ostr[2 * i + 1]); | |
982 | } | |
983 | ustring ucstr; | |
984 | size_t start = check_BOM(CharCoding::UTF_8, length /2, uc_str); | |
985 | ||
986 | for (size_t i = start; i < length / 2;) { | |
987 | // perform the decoding character by character | |
988 | if (uc_str[i] <= 0x7F) { | |
989 | // character encoded on a single octet: 0xxxxxxx (7 useful bits) | |
990 | unsigned char g = 0; | |
991 | unsigned char p = 0; | |
992 | unsigned char r = 0; | |
993 | unsigned char c = uc_str[i]; | |
994 | ucstr += ustring(g, p, r, c); | |
995 | ++i; | |
996 | } | |
997 | else if (uc_str[i] <= 0xBF) { | |
998 | // continuing octet (10xxxxxx) without leading octet ==> malformed | |
999 | ERROR("decode_utf8(): Malformed: At character position %d, octet position %d: continuing " | |
1000 | "octet %02X without leading octet.", static_cast<int>(ucstr.size()), | |
1001 | static_cast<int>(i), uc_str[i]); | |
1002 | goto dec_error; | |
1003 | } | |
1004 | else if (uc_str[i] <= 0xDF) { | |
1005 | // character encoded on 2 octets: 110xxxxx 10xxxxxx (11 useful bits) | |
1006 | unsigned char octets[2]; | |
1007 | octets[0] = uc_str[i] & 0x1F; | |
1008 | fill_continuing_octets(1, octets + 1, length / 2, uc_str, i + 1, ucstr.size()); | |
1009 | unsigned char g = 0; | |
1010 | unsigned char p = 0; | |
1011 | unsigned char r = octets[0] >> 2; | |
1012 | unsigned char c = octets[0] << 6 | octets[1]; | |
1013 | if (r == 0x00 && c < 0x80) { | |
1014 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 2-octet " | |
1015 | "encoding for quadruple (0, 0, 0, %u).", static_cast<int>(ucstr.size()), | |
1016 | static_cast<int>(i), c); | |
1017 | goto dec_error; | |
1018 | } | |
1019 | ucstr += ustring(g, p, r, c); | |
1020 | i += 2; | |
1021 | } | |
1022 | else if (uc_str[i] <= 0xEF) { | |
1023 | // character encoded on 3 octets: 1110xxxx 10xxxxxx 10xxxxxx | |
1024 | // (16 useful bits) | |
1025 | unsigned char octets[3]; | |
1026 | octets[0] = uc_str[i] & 0x0F; | |
1027 | fill_continuing_octets(2, octets + 1, length / 2, uc_str, i + 1,ucstr.size()); | |
1028 | unsigned char g = 0; | |
1029 | unsigned char p = 0; | |
1030 | unsigned char r = octets[0] << 4 | octets[1] >> 2; | |
1031 | unsigned char c = octets[1] << 6 | octets[2]; | |
1032 | if (r < 0x08) { | |
1033 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 3-octet " | |
1034 | "encoding for quadruple (0, 0, %u, %u).", static_cast<int>(ucstr.size()), | |
1035 | static_cast<int>(i), r, c); | |
1036 | goto dec_error; | |
1037 | } | |
1038 | ucstr += ustring(g, p, r, c); | |
1039 | i += 3; | |
1040 | } | |
1041 | else if (uc_str[i] <= 0xF7) { | |
1042 | // character encoded on 4 octets: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
1043 | // (21 useful bits) | |
1044 | unsigned char octets[4]; | |
1045 | octets[0] = uc_str[i] & 0x07; | |
1046 | fill_continuing_octets(3, octets + 1, length / 2, uc_str, i + 1, ucstr.size()); | |
1047 | unsigned char g = 0; | |
1048 | unsigned char p = octets[0] << 2 | octets[1] >> 4; | |
1049 | unsigned char r = octets[1] << 4 | octets[2] >> 2; | |
1050 | unsigned char c = octets[2] << 6 | octets[3]; | |
1051 | if (p == 0x00) { | |
1052 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 4-octet " | |
1053 | "encoding for quadruple (0, 0, %u, %u).", static_cast<int>(ucstr.size()), | |
1054 | static_cast<int>(i), r, c); | |
1055 | goto dec_error; | |
1056 | } | |
1057 | ucstr += ustring(g, p, r, c); | |
1058 | i += 4; | |
1059 | } | |
1060 | else if (uc_str[i] <= 0xFB) { | |
1061 | // character encoded on 5 octets: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx | |
1062 | // 10xxxxxx (26 useful bits) | |
1063 | unsigned char octets[5]; | |
1064 | octets[0] = uc_str[i] & 0x03; | |
1065 | fill_continuing_octets(4, octets + 1, length / 2, uc_str, i + 1, ucstr.size()); | |
1066 | unsigned char g = octets[0]; | |
1067 | unsigned char p = octets[1] << 2 | octets[2] >> 4; | |
1068 | unsigned char r = octets[2] << 4 | octets[3] >> 2; | |
1069 | unsigned char c = octets[3] << 6 | octets[4]; | |
1070 | if (g == 0x00 && p < 0x20) { | |
1071 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 5-octet " | |
1072 | "encoding for quadruple (0, %u, %u, %u).", static_cast<int>(ucstr.size()), | |
1073 | static_cast<int>(i), p, r, c); | |
1074 | goto dec_error; | |
1075 | } | |
1076 | ucstr += ustring(g, p, r, c); | |
1077 | i += 5; | |
1078 | } | |
1079 | else if (uc_str[i] <= 0xFD) { | |
1080 | // character encoded on 6 octets: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx | |
1081 | // 10xxxxxx 10xxxxxx (31 useful bits) | |
1082 | unsigned char octets[6]; | |
1083 | octets[0] = uc_str[i] & 0x01; | |
1084 | fill_continuing_octets(5, octets + 1, length / 2, uc_str, i + 1,ucstr.size()); | |
1085 | unsigned char g = octets[0] << 6 | octets[1]; | |
1086 | unsigned char p = octets[2] << 2 | octets[3] >> 4; | |
1087 | unsigned char r = octets[3] << 4 | octets[4] >> 2; | |
1088 | unsigned char c = octets[4] << 6 | octets[5]; | |
1089 | if (g < 0x04) { | |
1090 | ERROR("decode_utf8(): Overlong: At character position %d, octet position %d: 6-octet " | |
1091 | "encoding for quadruple (%u, %u, %u, %u).", static_cast<int>(ucstr.size()), | |
1092 | static_cast<int>(i), g, p, r, c); | |
1093 | goto dec_error; | |
1094 | } | |
1095 | ucstr += ustring(g, p, r, c); | |
1096 | i += 6; | |
1097 | } | |
1098 | else { | |
1099 | // not used code points: FE and FF => malformed | |
1100 | ERROR("decode_utf8(): Malformed: At character position %d, octet position %d: " | |
1101 | "unused/reserved octet %02X.", static_cast<int>(ucstr.size()), | |
1102 | static_cast<int>(i), uc_str[i]); | |
1103 | goto dec_error; | |
1104 | } | |
1105 | } | |
1106 | ||
1107 | dec_error: | |
1108 | delete[] uc_str; | |
1109 | return ucstr; | |
1110 | } | |
1111 | ||
1112 | } |