Sync with 5.4.0
[deliverable/titan.core.git] / compiler2 / ustring.cc
1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2000-2015 Ericsson Telecom AB
3 // All rights reserved. This program and the accompanying materials
4 // are made available under the terms of the Eclipse Public License v1.0
5 // which accompanies this distribution, and is available at
6 // http://www.eclipse.org/legal/epl-v10.html
7 ///////////////////////////////////////////////////////////////////////////////
8 #include <stdio.h>
9 #include <string.h>
10
11 #include "../common/memory.h"
12 #include "../common/Quadruple.hh"
13 #include "error.h"
14
15 #include "string.hh"
16 #include "ustring.hh"
17 #include "PredefFunc.hh"
18
19 #include "Int.hh"
20
21 /** The amount of memory needed for an ustring containing n characters. */
22 #define MEMORY_SIZE(n) (sizeof(ustring_struct) + \
23 ((n) - 1) * sizeof(universal_char))
24
25 void ustring::init_struct(size_t n_uchars)
26 {
27 if (n_uchars == 0) {
28 /** This will represent the empty strings so they won't need allocated
29 * memory, this delays the memory allocation until it is really needed. */
30 static ustring_struct empty_string = { 1, 0, { { '\0', '\0', '\0', '\0' } } };
31 val_ptr = &empty_string;
32 empty_string.ref_count++;
33 } else {
34 val_ptr = (ustring_struct*)Malloc(MEMORY_SIZE(n_uchars));
35 val_ptr->ref_count = 1;
36 val_ptr->n_uchars = n_uchars;
37 }
38 }
39
40 void ustring::enlarge_memory(size_t incr)
41 {
42 if (incr > max_string_len - val_ptr->n_uchars)
43 FATAL_ERROR("ustring::enlarge_memory(size_t): length overflow");
44 size_t new_length = val_ptr->n_uchars + incr;
45 if (val_ptr->ref_count == 1) {
46 val_ptr = (ustring_struct*)Realloc(val_ptr, MEMORY_SIZE(new_length));
47 val_ptr->n_uchars = new_length;
48 } else {
49 ustring_struct *old_ptr = val_ptr;
50 old_ptr->ref_count--;
51 init_struct(new_length);
52 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr, old_ptr->n_uchars *
53 sizeof(universal_char));
54 }
55 }
56
57 void ustring::copy_value()
58 {
59 if (val_ptr->ref_count > 1) {
60 ustring_struct *old_ptr = val_ptr;
61 old_ptr->ref_count--;
62 init_struct(old_ptr->n_uchars);
63 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
64 old_ptr->n_uchars * sizeof(universal_char));
65 }
66 }
67
68 void ustring::clean_up()
69 {
70 if (val_ptr->ref_count > 1) val_ptr->ref_count--;
71 else if (val_ptr->ref_count == 1) Free(val_ptr);
72 else FATAL_ERROR("ustring::clean_up()");
73 }
74
75 int ustring::compare(const ustring& s) const
76 {
77 if (val_ptr == s.val_ptr) return 0;
78 for (size_t i = 0; ; i++) {
79 if (i == val_ptr->n_uchars) {
80 if (i == s.val_ptr->n_uchars) return 0;
81 else return -1;
82 } else if (i == s.val_ptr->n_uchars) return 1;
83 else if (val_ptr->uchars_ptr[i].group > s.val_ptr->uchars_ptr[i].group)
84 return 1;
85 else if (val_ptr->uchars_ptr[i].group < s.val_ptr->uchars_ptr[i].group)
86 return -1;
87 else if (val_ptr->uchars_ptr[i].plane > s.val_ptr->uchars_ptr[i].plane)
88 return 1;
89 else if (val_ptr->uchars_ptr[i].plane < s.val_ptr->uchars_ptr[i].plane)
90 return -1;
91 else if (val_ptr->uchars_ptr[i].row > s.val_ptr->uchars_ptr[i].row)
92 return 1;
93 else if (val_ptr->uchars_ptr[i].row < s.val_ptr->uchars_ptr[i].row)
94 return -1;
95 else if (val_ptr->uchars_ptr[i].cell > s.val_ptr->uchars_ptr[i].cell)
96 return 1;
97 else if (val_ptr->uchars_ptr[i].cell < s.val_ptr->uchars_ptr[i].cell)
98 return -1;
99 }
100 return 0; // should never get here
101 }
102
103 ustring::ustring(unsigned char p_group, unsigned char p_plane,
104 unsigned char p_row, unsigned char p_cell)
105 {
106 init_struct(1);
107 val_ptr->uchars_ptr[0].group = p_group;
108 val_ptr->uchars_ptr[0].plane = p_plane;
109 val_ptr->uchars_ptr[0].row = p_row;
110 val_ptr->uchars_ptr[0].cell = p_cell;
111 }
112
113 ustring::ustring(size_t n, const universal_char *uc_ptr)
114 {
115 // Check for UTF8 encoding and decode it
116 // incase the editor encoded the TTCN-3 file with UTF-8
117 string octet_str;
118 bool isUTF8 = true;
119 for (size_t i = 0; i < n; ++i) {
120 if (uc_ptr[i].group != 0 || uc_ptr[i].plane != 0 || uc_ptr[i].row != 0) {
121 // Not UTF8
122 isUTF8 = false;
123 break;
124 }
125 octet_str += Common::hexdigit_to_char(uc_ptr[i].cell / 16);
126 octet_str += Common::hexdigit_to_char(uc_ptr[i].cell % 16);
127 }
128 if (isUTF8) {
129 string* ret = Common::get_stringencoding(octet_str);
130 if ("UTF-8" != *ret) {
131 isUTF8 = false;
132 }
133 delete ret;
134 }
135 if (isUTF8) {
136 ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
137 val_ptr = s.val_ptr;
138 val_ptr->ref_count++;
139 } else {
140 init_struct(n);
141 memcpy(val_ptr->uchars_ptr, uc_ptr, n * sizeof(universal_char));
142 }
143 }
144
145 ustring::ustring(const string& s)
146 {
147 // Check for UTF8 encoding and decode it
148 // incase the editor encoded the TTCN-3 file with UTF-8
149 string octet_str;
150 bool isUTF8 = true;
151 size_t len = s.size();
152 for (size_t i = 0; i < len; ++i) {
153 octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) / 16);
154 octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) % 16);
155 }
156 if (isUTF8) {
157 string* ret = Common::get_stringencoding(octet_str);
158 if ("UTF-8" != *ret) {
159 isUTF8 = false;
160 }
161 delete ret;
162 }
163 if (isUTF8) {
164 ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
165 val_ptr = s.val_ptr;
166 val_ptr->ref_count++;
167 } else {
168 init_struct(s.size());
169 const char *src = s.c_str();
170 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
171 val_ptr->uchars_ptr[i].group = 0;
172 val_ptr->uchars_ptr[i].plane = 0;
173 val_ptr->uchars_ptr[i].row = 0;
174 val_ptr->uchars_ptr[i].cell = src[i];
175 }
176 }
177 }
178
179 void ustring::clear()
180 {
181 if (val_ptr->n_uchars > 0) {
182 clean_up();
183 init_struct(0);
184 }
185 }
186
187 ustring ustring::substr(size_t pos, size_t n) const
188 {
189 if (pos > val_ptr->n_uchars)
190 FATAL_ERROR("ustring::substr(size_t, size_t): position is outside of string");
191 if (pos == 0 && n >= val_ptr->n_uchars) return *this;
192 if (n > val_ptr->n_uchars - pos) n = val_ptr->n_uchars - pos;
193 return ustring(n, val_ptr->uchars_ptr + pos);
194 }
195
196 void ustring::replace(size_t pos, size_t n, const ustring& s)
197 {
198 if (pos > val_ptr->n_uchars)
199 FATAL_ERROR("ustring::replace(): start position is outside the string");
200 if (pos + n > val_ptr->n_uchars)
201 FATAL_ERROR("ustring::replace(): end position is outside the string");
202 size_t s_len = s.size();
203 /* The replacement string is greater than the maximum string length. The
204 replaced characters are taken into account. */
205 if (s_len > max_string_len - val_ptr->n_uchars + n)
206 FATAL_ERROR("ustring::replace(): length overflow");
207 size_t new_size = val_ptr->n_uchars - n + s_len;
208 if (new_size == 0) {
209 clean_up();
210 init_struct(0);
211 } else {
212 ustring_struct *old_ptr = val_ptr;
213 old_ptr->ref_count--;
214 init_struct(new_size);
215 memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
216 pos * sizeof(universal_char));
217 memcpy(val_ptr->uchars_ptr + pos, s.u_str(),
218 s_len * sizeof(universal_char));
219 memcpy(val_ptr->uchars_ptr + pos + s_len, old_ptr->uchars_ptr + pos + n,
220 (old_ptr->n_uchars - pos - n) * sizeof(universal_char));
221 if (old_ptr->ref_count == 0) Free(old_ptr);
222 }
223 }
224
225 string ustring::get_stringRepr() const
226 {
227 string ret_val;
228 enum { INIT, PCHAR, UCHAR } state = INIT;
229 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
230 const universal_char& uchar = val_ptr->uchars_ptr[i];
231 if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
232 string::is_printable(uchar.cell)) {
233 // the actual character is printable
234 switch (state) {
235 case UCHAR: // concatenation sign if previous part was not printable
236 ret_val += " & ";
237 // no break
238 case INIT: // opening "
239 ret_val += '"';
240 // no break
241 case PCHAR: // the character itself
242 ret_val.append_stringRepr(uchar.cell);
243 break;
244 }
245 state = PCHAR;
246 } else {
247 // the actual character is not printable
248 switch (state) {
249 case PCHAR: // closing " if previous part was printable
250 ret_val += '"';
251 // no break
252 case UCHAR: // concatenation sign
253 ret_val += " & ";
254 // no break
255 case INIT: // the character itself in quadruple notation
256 ret_val += "char(";
257 ret_val += Common::Int2string(uchar.group);
258 ret_val += ", ";
259 ret_val += Common::Int2string(uchar.plane);
260 ret_val += ", ";
261 ret_val += Common::Int2string(uchar.row);
262 ret_val += ", ";
263 ret_val += Common::Int2string(uchar.cell);
264 ret_val += ')';
265 break;
266 }
267 state = UCHAR;
268 }
269 }
270 // final steps
271 switch (state) {
272 case INIT: // the string was empty
273 ret_val += "\"\"";
274 break;
275 case PCHAR: // last character was printable -> closing "
276 ret_val += '"';
277 break;
278 default:
279 break;
280 }
281 return ret_val;
282 }
283
284 string ustring::get_stringRepr_for_pattern() const {
285 string ret_val; // empty string
286 for (size_t i = 0; i < val_ptr->n_uchars; i++) {
287 const universal_char& uchar = val_ptr->uchars_ptr[i];
288 if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
289 string::is_printable(uchar.cell)) {
290 ret_val.append_stringRepr(uchar.cell);
291 } else {
292 ret_val += "\\q{";
293 ret_val += Common::Int2string(uchar.group);
294 ret_val += ",";
295 ret_val += Common::Int2string(uchar.plane);
296 ret_val += ",";
297 ret_val += Common::Int2string(uchar.row);
298 ret_val += ",";
299 ret_val += Common::Int2string(uchar.cell);
300 ret_val += "}";
301 }
302 }
303 return ret_val;
304 }
305
306 char* ustring::convert_to_regexp_form() const {
307 char* res = (char*)Malloc(val_ptr->n_uchars * 8 + 1);
308 char* ptr = res;
309 res[val_ptr->n_uchars * 8] = '\0';
310 Quad q;
311 for (size_t i = 0; i < val_ptr->n_uchars; i++, ptr += 8) {
312 const universal_char& uchar = val_ptr->uchars_ptr[i];
313 q.set(uchar.group, uchar.plane, uchar.row, uchar.cell);
314 Quad::get_hexrepr(q, ptr);
315 }
316 return res;
317 }
318
319 ustring& ustring::operator=(const ustring& s)
320 {
321 if(&s != this) {
322 clean_up();
323 val_ptr = s.val_ptr;
324 val_ptr->ref_count++;
325 }
326 return *this;
327 }
328
329 ustring::universal_char& ustring::operator[](size_t n)
330 {
331 if (n >= val_ptr->n_uchars)
332 FATAL_ERROR("ustring::operator[](size_t): position is outside the string");
333 copy_value();
334 return val_ptr->uchars_ptr[n];
335 }
336
337 const ustring::universal_char& ustring::operator[](size_t n) const
338 {
339 if (n >= val_ptr->n_uchars)
340 FATAL_ERROR("ustring::operator[](size_t) const: position is outside the string");
341 return val_ptr->uchars_ptr[n];
342 }
343
344 ustring ustring::operator+(const string& s2) const
345 {
346 size_t s2_size = s2.size();
347 if (s2_size > max_string_len - val_ptr->n_uchars)
348 FATAL_ERROR("ustring::operator+(const string&): length overflow");
349 if (s2_size > 0) {
350 ustring s(val_ptr->n_uchars + s2_size);
351 memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
352 sizeof(universal_char));
353 const char *src = s2.c_str();
354 for (size_t i = 0; i < s2_size; i++) {
355 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].group = 0;
356 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].plane = 0;
357 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].row = 0;
358 s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].cell = src[i];
359 }
360 return s;
361 } else return *this;
362 }
363
364 ustring ustring::operator+(const ustring& s2) const
365 {
366 if (s2.val_ptr->n_uchars > max_string_len - val_ptr->n_uchars)
367 FATAL_ERROR("ustring::operator+(const ustring&): length overflow");
368 if (val_ptr->n_uchars == 0) return s2;
369 else if (s2.val_ptr->n_uchars == 0) return *this;
370 else {
371 ustring s(val_ptr->n_uchars + s2.val_ptr->n_uchars);
372 memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
373 sizeof(universal_char));
374 memcpy(s.val_ptr->uchars_ptr + val_ptr->n_uchars,
375 s2.val_ptr->uchars_ptr, s2.val_ptr->n_uchars * sizeof(universal_char));
376 return s;
377 }
378 }
379
380 ustring& ustring::operator+=(const string& s)
381 {
382 size_t s_size = s.size();
383 if (s_size > 0) {
384 size_t old_size = val_ptr->n_uchars;
385 enlarge_memory(s_size);
386 const char *src = s.c_str();
387 for (size_t i = 0; i < s_size; i++) {
388 val_ptr->uchars_ptr[old_size + i].group = 0;
389 val_ptr->uchars_ptr[old_size + i].plane = 0;
390 val_ptr->uchars_ptr[old_size + i].row = 0;
391 val_ptr->uchars_ptr[old_size + i].cell = src[i];
392 }
393 }
394 return *this;
395 }
396
397 ustring& ustring::operator+=(const ustring& s)
398 {
399 if (s.val_ptr->n_uchars > 0) {
400 if (val_ptr->n_uchars > 0) {
401 size_t old_size = val_ptr->n_uchars, s_size = s.val_ptr->n_uchars;
402 enlarge_memory(s_size);
403 memcpy(val_ptr->uchars_ptr + old_size, s.val_ptr->uchars_ptr,
404 s_size * sizeof(universal_char));
405 } else {
406 clean_up();
407 val_ptr = s.val_ptr;
408 val_ptr->ref_count++;
409 }
410 }
411 return *this;
412 }
413
414 bool ustring::operator==(const ustring& s2) const
415 {
416 if (val_ptr == s2.val_ptr) return true;
417 else if (val_ptr->n_uchars != s2.val_ptr->n_uchars) return false;
418 else return !memcmp(val_ptr->uchars_ptr, s2.val_ptr->uchars_ptr,
419 val_ptr->n_uchars * sizeof(universal_char));
420 }
421
422 bool operator==(const ustring::universal_char& uc1,
423 const ustring::universal_char& uc2)
424 {
425 return uc1.group == uc2.group && uc1.plane == uc2.plane &&
426 uc1.row == uc2.row && uc1.cell == uc2.cell;
427 }
428
429 bool operator<(const ustring::universal_char& uc1,
430 const ustring::universal_char& uc2)
431 {
432 if (uc1.group < uc2.group) return true;
433 else if (uc1.group > uc2.group) return false;
434 else if (uc1.plane < uc2.plane) return true;
435 else if (uc1.plane > uc2.plane) return false;
436 else if (uc1.row < uc2.row) return true;
437 else if (uc1.row > uc2.row) return false;
438 else return uc1.cell < uc2.cell;
439 }
440
441 string ustring_to_uft8(const ustring& ustr)
442 {
443 string ret_val;
444 for(size_t i = 0; i < ustr.size(); i++) {
445 unsigned char g = ustr[i].group;
446 unsigned char p = ustr[i].plane;
447 unsigned char r = ustr[i].row;
448 unsigned char c = ustr[i].cell;
449 if(g == 0x00 && p <= 0x1F) {
450 if(p == 0x00) {
451 if(r == 0x00 && c <= 0x7F) {
452 // 1 octet
453 ret_val += c;
454 } // r
455 // 2 octets
456 else if(r <= 0x07) {
457 ret_val += (0xC0 | r << 2 | c >> 6);
458 ret_val += (0x80 | (c & 0x3F));
459 } // r
460 // 3 octets
461 else {
462 ret_val += (0xE0 | r >> 4);
463 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
464 ret_val += (0x80 | (c & 0x3F));
465 } // r
466 } // p
467 // 4 octets
468 else {
469 ret_val += (0xF0 | p >> 2);
470 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
471 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
472 ret_val += (0x80 | (c & 0x3F));
473 } // p
474 } //g
475 // 5 octets
476 else if(g <= 0x03) {
477 ret_val += (0xF8 | g);
478 ret_val += (0x80 | p >> 2);
479 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
480 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
481 ret_val += (0x80 | (c & 0x3F));
482 } // g
483 // 6 octets
484 else {
485 ret_val += (0xFC | g >> 6);
486 ret_val += (0x80 | (g & 0x3F));
487 ret_val += (0x80 | p >> 2);
488 ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
489 ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
490 ret_val += (0x80 | (c & 0x3F));
491 }
492 } // for i
493 return ret_val;
494 }
This page took 0.042464 seconds and 5 git commands to generate.