1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2000-2015 Ericsson Telecom AB
3 // All rights reserved. This program and the accompanying materials
4 // are made available under the terms of the Eclipse Public License v1.0
5 // which accompanies this distribution, and is available at
6 // http://www.eclipse.org/legal/epl-v10.html
7 ///////////////////////////////////////////////////////////////////////////////
11 #include "../common/memory.h"
12 #include "../common/Quadruple.hh"
17 #include "PredefFunc.hh"
21 /** The amount of memory needed for an ustring containing n characters. */
22 #define MEMORY_SIZE(n) (sizeof(ustring_struct) + \
23 ((n) - 1) * sizeof(universal_char))
25 void ustring::init_struct(size_t n_uchars
)
28 /** This will represent the empty strings so they won't need allocated
29 * memory, this delays the memory allocation until it is really needed. */
30 static ustring_struct empty_string
= { 1, 0, { { '\0', '\0', '\0', '\0' } } };
31 val_ptr
= &empty_string
;
32 empty_string
.ref_count
++;
34 val_ptr
= (ustring_struct
*)Malloc(MEMORY_SIZE(n_uchars
));
35 val_ptr
->ref_count
= 1;
36 val_ptr
->n_uchars
= n_uchars
;
40 void ustring::enlarge_memory(size_t incr
)
42 if (incr
> max_string_len
- val_ptr
->n_uchars
)
43 FATAL_ERROR("ustring::enlarge_memory(size_t): length overflow");
44 size_t new_length
= val_ptr
->n_uchars
+ incr
;
45 if (val_ptr
->ref_count
== 1) {
46 val_ptr
= (ustring_struct
*)Realloc(val_ptr
, MEMORY_SIZE(new_length
));
47 val_ptr
->n_uchars
= new_length
;
49 ustring_struct
*old_ptr
= val_ptr
;
51 init_struct(new_length
);
52 memcpy(val_ptr
->uchars_ptr
, old_ptr
->uchars_ptr
, old_ptr
->n_uchars
*
53 sizeof(universal_char
));
57 void ustring::copy_value()
59 if (val_ptr
->ref_count
> 1) {
60 ustring_struct
*old_ptr
= val_ptr
;
62 init_struct(old_ptr
->n_uchars
);
63 memcpy(val_ptr
->uchars_ptr
, old_ptr
->uchars_ptr
,
64 old_ptr
->n_uchars
* sizeof(universal_char
));
68 void ustring::clean_up()
70 if (val_ptr
->ref_count
> 1) val_ptr
->ref_count
--;
71 else if (val_ptr
->ref_count
== 1) Free(val_ptr
);
72 else FATAL_ERROR("ustring::clean_up()");
75 int ustring::compare(const ustring
& s
) const
77 if (val_ptr
== s
.val_ptr
) return 0;
78 for (size_t i
= 0; ; i
++) {
79 if (i
== val_ptr
->n_uchars
) {
80 if (i
== s
.val_ptr
->n_uchars
) return 0;
82 } else if (i
== s
.val_ptr
->n_uchars
) return 1;
83 else if (val_ptr
->uchars_ptr
[i
].group
> s
.val_ptr
->uchars_ptr
[i
].group
)
85 else if (val_ptr
->uchars_ptr
[i
].group
< s
.val_ptr
->uchars_ptr
[i
].group
)
87 else if (val_ptr
->uchars_ptr
[i
].plane
> s
.val_ptr
->uchars_ptr
[i
].plane
)
89 else if (val_ptr
->uchars_ptr
[i
].plane
< s
.val_ptr
->uchars_ptr
[i
].plane
)
91 else if (val_ptr
->uchars_ptr
[i
].row
> s
.val_ptr
->uchars_ptr
[i
].row
)
93 else if (val_ptr
->uchars_ptr
[i
].row
< s
.val_ptr
->uchars_ptr
[i
].row
)
95 else if (val_ptr
->uchars_ptr
[i
].cell
> s
.val_ptr
->uchars_ptr
[i
].cell
)
97 else if (val_ptr
->uchars_ptr
[i
].cell
< s
.val_ptr
->uchars_ptr
[i
].cell
)
100 return 0; // should never get here
103 ustring::ustring(unsigned char p_group
, unsigned char p_plane
,
104 unsigned char p_row
, unsigned char p_cell
)
107 val_ptr
->uchars_ptr
[0].group
= p_group
;
108 val_ptr
->uchars_ptr
[0].plane
= p_plane
;
109 val_ptr
->uchars_ptr
[0].row
= p_row
;
110 val_ptr
->uchars_ptr
[0].cell
= p_cell
;
113 ustring::ustring(size_t n
, const universal_char
*uc_ptr
)
115 // Check for UTF8 encoding and decode it
116 // incase the editor encoded the TTCN-3 file with UTF-8
119 for (size_t i
= 0; i
< n
; ++i
) {
120 if (uc_ptr
[i
].group
!= 0 || uc_ptr
[i
].plane
!= 0 || uc_ptr
[i
].row
!= 0) {
125 octet_str
+= Common::hexdigit_to_char(uc_ptr
[i
].cell
/ 16);
126 octet_str
+= Common::hexdigit_to_char(uc_ptr
[i
].cell
% 16);
129 string
* ret
= Common::get_stringencoding(octet_str
);
130 if ("UTF-8" != *ret
) {
136 ustring s
= Common::decode_utf8(octet_str
, CharCoding::UTF_8
);
138 val_ptr
->ref_count
++;
141 memcpy(val_ptr
->uchars_ptr
, uc_ptr
, n
* sizeof(universal_char
));
145 ustring::ustring(const string
& s
)
147 // Check for UTF8 encoding and decode it
148 // incase the editor encoded the TTCN-3 file with UTF-8
151 size_t len
= s
.size();
152 for (size_t i
= 0; i
< len
; ++i
) {
153 octet_str
+= Common::hexdigit_to_char((unsigned char)(s
[i
]) / 16);
154 octet_str
+= Common::hexdigit_to_char((unsigned char)(s
[i
]) % 16);
157 string
* ret
= Common::get_stringencoding(octet_str
);
158 if ("UTF-8" != *ret
) {
164 ustring s
= Common::decode_utf8(octet_str
, CharCoding::UTF_8
);
166 val_ptr
->ref_count
++;
168 init_struct(s
.size());
169 const char *src
= s
.c_str();
170 for (size_t i
= 0; i
< val_ptr
->n_uchars
; i
++) {
171 val_ptr
->uchars_ptr
[i
].group
= 0;
172 val_ptr
->uchars_ptr
[i
].plane
= 0;
173 val_ptr
->uchars_ptr
[i
].row
= 0;
174 val_ptr
->uchars_ptr
[i
].cell
= src
[i
];
179 void ustring::clear()
181 if (val_ptr
->n_uchars
> 0) {
187 ustring
ustring::substr(size_t pos
, size_t n
) const
189 if (pos
> val_ptr
->n_uchars
)
190 FATAL_ERROR("ustring::substr(size_t, size_t): position is outside of string");
191 if (pos
== 0 && n
>= val_ptr
->n_uchars
) return *this;
192 if (n
> val_ptr
->n_uchars
- pos
) n
= val_ptr
->n_uchars
- pos
;
193 return ustring(n
, val_ptr
->uchars_ptr
+ pos
);
196 void ustring::replace(size_t pos
, size_t n
, const ustring
& s
)
198 if (pos
> val_ptr
->n_uchars
)
199 FATAL_ERROR("ustring::replace(): start position is outside the string");
200 if (pos
+ n
> val_ptr
->n_uchars
)
201 FATAL_ERROR("ustring::replace(): end position is outside the string");
202 size_t s_len
= s
.size();
203 /* The replacement string is greater than the maximum string length. The
204 replaced characters are taken into account. */
205 if (s_len
> max_string_len
- val_ptr
->n_uchars
+ n
)
206 FATAL_ERROR("ustring::replace(): length overflow");
207 size_t new_size
= val_ptr
->n_uchars
- n
+ s_len
;
212 ustring_struct
*old_ptr
= val_ptr
;
213 old_ptr
->ref_count
--;
214 init_struct(new_size
);
215 memcpy(val_ptr
->uchars_ptr
, old_ptr
->uchars_ptr
,
216 pos
* sizeof(universal_char
));
217 memcpy(val_ptr
->uchars_ptr
+ pos
, s
.u_str(),
218 s_len
* sizeof(universal_char
));
219 memcpy(val_ptr
->uchars_ptr
+ pos
+ s_len
, old_ptr
->uchars_ptr
+ pos
+ n
,
220 (old_ptr
->n_uchars
- pos
- n
) * sizeof(universal_char
));
221 if (old_ptr
->ref_count
== 0) Free(old_ptr
);
225 string
ustring::get_stringRepr() const
228 enum { INIT
, PCHAR
, UCHAR
} state
= INIT
;
229 for (size_t i
= 0; i
< val_ptr
->n_uchars
; i
++) {
230 const universal_char
& uchar
= val_ptr
->uchars_ptr
[i
];
231 if (uchar
.group
== 0 && uchar
.plane
== 0 && uchar
.row
== 0 &&
232 string::is_printable(uchar
.cell
)) {
233 // the actual character is printable
235 case UCHAR
: // concatenation sign if previous part was not printable
238 case INIT
: // opening "
241 case PCHAR
: // the character itself
242 ret_val
.append_stringRepr(uchar
.cell
);
247 // the actual character is not printable
249 case PCHAR
: // closing " if previous part was printable
252 case UCHAR
: // concatenation sign
255 case INIT
: // the character itself in quadruple notation
257 ret_val
+= Common::Int2string(uchar
.group
);
259 ret_val
+= Common::Int2string(uchar
.plane
);
261 ret_val
+= Common::Int2string(uchar
.row
);
263 ret_val
+= Common::Int2string(uchar
.cell
);
272 case INIT
: // the string was empty
275 case PCHAR
: // last character was printable -> closing "
284 string
ustring::get_stringRepr_for_pattern() const {
285 string ret_val
; // empty string
286 for (size_t i
= 0; i
< val_ptr
->n_uchars
; i
++) {
287 const universal_char
& uchar
= val_ptr
->uchars_ptr
[i
];
288 if (uchar
.group
== 0 && uchar
.plane
== 0 && uchar
.row
== 0 &&
289 string::is_printable(uchar
.cell
)) {
290 ret_val
.append_stringRepr(uchar
.cell
);
293 ret_val
+= Common::Int2string(uchar
.group
);
295 ret_val
+= Common::Int2string(uchar
.plane
);
297 ret_val
+= Common::Int2string(uchar
.row
);
299 ret_val
+= Common::Int2string(uchar
.cell
);
306 char* ustring::convert_to_regexp_form() const {
307 char* res
= (char*)Malloc(val_ptr
->n_uchars
* 8 + 1);
309 res
[val_ptr
->n_uchars
* 8] = '\0';
311 for (size_t i
= 0; i
< val_ptr
->n_uchars
; i
++, ptr
+= 8) {
312 const universal_char
& uchar
= val_ptr
->uchars_ptr
[i
];
313 q
.set(uchar
.group
, uchar
.plane
, uchar
.row
, uchar
.cell
);
314 Quad::get_hexrepr(q
, ptr
);
319 ustring
& ustring::operator=(const ustring
& s
)
324 val_ptr
->ref_count
++;
329 ustring::universal_char
& ustring::operator[](size_t n
)
331 if (n
>= val_ptr
->n_uchars
)
332 FATAL_ERROR("ustring::operator[](size_t): position is outside the string");
334 return val_ptr
->uchars_ptr
[n
];
337 const ustring::universal_char
& ustring::operator[](size_t n
) const
339 if (n
>= val_ptr
->n_uchars
)
340 FATAL_ERROR("ustring::operator[](size_t) const: position is outside the string");
341 return val_ptr
->uchars_ptr
[n
];
344 ustring
ustring::operator+(const string
& s2
) const
346 size_t s2_size
= s2
.size();
347 if (s2_size
> max_string_len
- val_ptr
->n_uchars
)
348 FATAL_ERROR("ustring::operator+(const string&): length overflow");
350 ustring
s(val_ptr
->n_uchars
+ s2_size
);
351 memcpy(s
.val_ptr
->uchars_ptr
, val_ptr
->uchars_ptr
, val_ptr
->n_uchars
*
352 sizeof(universal_char
));
353 const char *src
= s2
.c_str();
354 for (size_t i
= 0; i
< s2_size
; i
++) {
355 s
.val_ptr
->uchars_ptr
[val_ptr
->n_uchars
+ i
].group
= 0;
356 s
.val_ptr
->uchars_ptr
[val_ptr
->n_uchars
+ i
].plane
= 0;
357 s
.val_ptr
->uchars_ptr
[val_ptr
->n_uchars
+ i
].row
= 0;
358 s
.val_ptr
->uchars_ptr
[val_ptr
->n_uchars
+ i
].cell
= src
[i
];
364 ustring
ustring::operator+(const ustring
& s2
) const
366 if (s2
.val_ptr
->n_uchars
> max_string_len
- val_ptr
->n_uchars
)
367 FATAL_ERROR("ustring::operator+(const ustring&): length overflow");
368 if (val_ptr
->n_uchars
== 0) return s2
;
369 else if (s2
.val_ptr
->n_uchars
== 0) return *this;
371 ustring
s(val_ptr
->n_uchars
+ s2
.val_ptr
->n_uchars
);
372 memcpy(s
.val_ptr
->uchars_ptr
, val_ptr
->uchars_ptr
, val_ptr
->n_uchars
*
373 sizeof(universal_char
));
374 memcpy(s
.val_ptr
->uchars_ptr
+ val_ptr
->n_uchars
,
375 s2
.val_ptr
->uchars_ptr
, s2
.val_ptr
->n_uchars
* sizeof(universal_char
));
380 ustring
& ustring::operator+=(const string
& s
)
382 size_t s_size
= s
.size();
384 size_t old_size
= val_ptr
->n_uchars
;
385 enlarge_memory(s_size
);
386 const char *src
= s
.c_str();
387 for (size_t i
= 0; i
< s_size
; i
++) {
388 val_ptr
->uchars_ptr
[old_size
+ i
].group
= 0;
389 val_ptr
->uchars_ptr
[old_size
+ i
].plane
= 0;
390 val_ptr
->uchars_ptr
[old_size
+ i
].row
= 0;
391 val_ptr
->uchars_ptr
[old_size
+ i
].cell
= src
[i
];
397 ustring
& ustring::operator+=(const ustring
& s
)
399 if (s
.val_ptr
->n_uchars
> 0) {
400 if (val_ptr
->n_uchars
> 0) {
401 size_t old_size
= val_ptr
->n_uchars
, s_size
= s
.val_ptr
->n_uchars
;
402 enlarge_memory(s_size
);
403 memcpy(val_ptr
->uchars_ptr
+ old_size
, s
.val_ptr
->uchars_ptr
,
404 s_size
* sizeof(universal_char
));
408 val_ptr
->ref_count
++;
414 bool ustring::operator==(const ustring
& s2
) const
416 if (val_ptr
== s2
.val_ptr
) return true;
417 else if (val_ptr
->n_uchars
!= s2
.val_ptr
->n_uchars
) return false;
418 else return !memcmp(val_ptr
->uchars_ptr
, s2
.val_ptr
->uchars_ptr
,
419 val_ptr
->n_uchars
* sizeof(universal_char
));
422 bool operator==(const ustring::universal_char
& uc1
,
423 const ustring::universal_char
& uc2
)
425 return uc1
.group
== uc2
.group
&& uc1
.plane
== uc2
.plane
&&
426 uc1
.row
== uc2
.row
&& uc1
.cell
== uc2
.cell
;
429 bool operator<(const ustring::universal_char
& uc1
,
430 const ustring::universal_char
& uc2
)
432 if (uc1
.group
< uc2
.group
) return true;
433 else if (uc1
.group
> uc2
.group
) return false;
434 else if (uc1
.plane
< uc2
.plane
) return true;
435 else if (uc1
.plane
> uc2
.plane
) return false;
436 else if (uc1
.row
< uc2
.row
) return true;
437 else if (uc1
.row
> uc2
.row
) return false;
438 else return uc1
.cell
< uc2
.cell
;
441 string
ustring_to_uft8(const ustring
& ustr
)
444 for(size_t i
= 0; i
< ustr
.size(); i
++) {
445 unsigned char g
= ustr
[i
].group
;
446 unsigned char p
= ustr
[i
].plane
;
447 unsigned char r
= ustr
[i
].row
;
448 unsigned char c
= ustr
[i
].cell
;
449 if(g
== 0x00 && p
<= 0x1F) {
451 if(r
== 0x00 && c
<= 0x7F) {
457 ret_val
+= (0xC0 | r
<< 2 | c
>> 6);
458 ret_val
+= (0x80 | (c
& 0x3F));
462 ret_val
+= (0xE0 | r
>> 4);
463 ret_val
+= (0x80 | (r
<< 2 & 0x3C) | c
>> 6);
464 ret_val
+= (0x80 | (c
& 0x3F));
469 ret_val
+= (0xF0 | p
>> 2);
470 ret_val
+= (0x80 | (p
<< 4 & 0x30) | r
>> 4);
471 ret_val
+= (0x80 | (r
<< 2 & 0x3C) | c
>> 6);
472 ret_val
+= (0x80 | (c
& 0x3F));
477 ret_val
+= (0xF8 | g
);
478 ret_val
+= (0x80 | p
>> 2);
479 ret_val
+= (0x80 | (p
<< 4 & 0x30) | r
>> 4);
480 ret_val
+= (0x80 | (r
<< 2 & 0x3C) | c
>> 6);
481 ret_val
+= (0x80 | (c
& 0x3F));
485 ret_val
+= (0xFC | g
>> 6);
486 ret_val
+= (0x80 | (g
& 0x3F));
487 ret_val
+= (0x80 | p
>> 2);
488 ret_val
+= (0x80 | (p
<< 4 & 0x30) | r
>> 4);
489 ret_val
+= (0x80 | (r
<< 2 & 0x3C) | c
>> 6);
490 ret_val
+= (0x80 | (c
& 0x3F));