compiler2/ustring.cc

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Copyright (c) 2000-2015 Ericsson Telecom AB
   3 // All rights reserved. This program and the accompanying materials
   4 // are made available under the terms of the Eclipse Public License v1.0
   5 // which accompanies this distribution, and is available at
   6 // http://www.eclipse.org/legal/epl-v10.html
   7 ///////////////////////////////////////////////////////////////////////////////
   8 #include <stdio.h>
   9 #include <string.h>
  10
  11 #include "../common/memory.h"
  12 #include "../common/Quadruple.hh"
  13 #include "error.h"
  14
  15 #include "string.hh"
  16 #include "ustring.hh"
  17 #include "PredefFunc.hh"
  18
  19 #include "Int.hh"
  20
  21 /** The amount of memory needed for an ustring containing n characters. */
  22 #define MEMORY_SIZE(n) (sizeof(ustring_struct) + \
  23   ((n) - 1) * sizeof(universal_char))
  24
  25 void ustring::init_struct(size_t n_uchars)
  26 {
  27   if (n_uchars == 0) {
  28     /** This will represent the empty strings so they won't need allocated
  29      * memory, this delays the memory allocation until it is really needed. */
  30     static ustring_struct empty_string = { 1, 0, { { '\0', '\0', '\0', '\0' } } };
  31     val_ptr = &empty_string;
  32     empty_string.ref_count++;
  33   } else {
  34     val_ptr = (ustring_struct*)Malloc(MEMORY_SIZE(n_uchars));
  35     val_ptr->ref_count = 1;
  36     val_ptr->n_uchars = n_uchars;
  37   }
  38 }
  39
  40 void ustring::enlarge_memory(size_t incr)
  41 {
  42   if (incr > max_string_len - val_ptr->n_uchars)
  43     FATAL_ERROR("ustring::enlarge_memory(size_t): length overflow");
  44   size_t new_length = val_ptr->n_uchars + incr;
  45   if (val_ptr->ref_count == 1) {
  46     val_ptr = (ustring_struct*)Realloc(val_ptr, MEMORY_SIZE(new_length));
  47     val_ptr->n_uchars = new_length;
  48   } else {
  49     ustring_struct *old_ptr = val_ptr;
  50     old_ptr->ref_count--;
  51     init_struct(new_length);
  52     memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr, old_ptr->n_uchars *
  53       sizeof(universal_char));
  54   }
  55 }
  56
  57 void ustring::copy_value()
  58 {
  59   if (val_ptr->ref_count > 1) {
  60     ustring_struct *old_ptr = val_ptr;
  61     old_ptr->ref_count--;
  62     init_struct(old_ptr->n_uchars);
  63     memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
  64       old_ptr->n_uchars * sizeof(universal_char));
  65   }
  66 }
  67
  68 void ustring::clean_up()
  69 {
  70   if (val_ptr->ref_count > 1) val_ptr->ref_count--;
  71   else if (val_ptr->ref_count == 1) Free(val_ptr);
  72   else FATAL_ERROR("ustring::clean_up()");
  73 }
  74
  75 int ustring::compare(const ustring& s) const
  76 {
  77   if (val_ptr == s.val_ptr) return 0;
  78   for (size_t i = 0; ; i++) {
  79     if (i == val_ptr->n_uchars) {
  80       if (i == s.val_ptr->n_uchars) return 0;
  81       else return -1;
  82     } else if (i == s.val_ptr->n_uchars) return 1;
  83     else if (val_ptr->uchars_ptr[i].group > s.val_ptr->uchars_ptr[i].group)
  84       return 1;
  85     else if (val_ptr->uchars_ptr[i].group < s.val_ptr->uchars_ptr[i].group)
  86       return -1;
  87     else if (val_ptr->uchars_ptr[i].plane > s.val_ptr->uchars_ptr[i].plane)
  88       return 1;
  89     else if (val_ptr->uchars_ptr[i].plane < s.val_ptr->uchars_ptr[i].plane)
  90       return -1;
  91     else if (val_ptr->uchars_ptr[i].row > s.val_ptr->uchars_ptr[i].row)
  92       return 1;
  93     else if (val_ptr->uchars_ptr[i].row < s.val_ptr->uchars_ptr[i].row)
  94       return -1;
  95     else if (val_ptr->uchars_ptr[i].cell > s.val_ptr->uchars_ptr[i].cell)
  96       return 1;
  97     else if (val_ptr->uchars_ptr[i].cell < s.val_ptr->uchars_ptr[i].cell)
  98       return -1;
  99   }
 100   return 0; // should never get here
 101 }
 102
 103 ustring::ustring(unsigned char p_group, unsigned char p_plane,
 104   unsigned char p_row, unsigned char p_cell)
 105 {
 106   init_struct(1);
 107   val_ptr->uchars_ptr[0].group = p_group;
 108   val_ptr->uchars_ptr[0].plane = p_plane;
 109   val_ptr->uchars_ptr[0].row = p_row;
 110   val_ptr->uchars_ptr[0].cell = p_cell;
 111 }
 112
 113 ustring::ustring(size_t n, const universal_char *uc_ptr)
 114 {
 115   // Check for UTF8 encoding and decode it
 116   // incase the editor encoded the TTCN-3 file with UTF-8
 117   string octet_str;
 118   bool isUTF8 = true;
 119   for (size_t i = 0; i < n; ++i) {
 120     if (uc_ptr[i].group != 0 || uc_ptr[i].plane != 0 || uc_ptr[i].row != 0) {
 121       // Not UTF8
 122       isUTF8 = false;
 123       break;
 124     }
 125     octet_str += Common::hexdigit_to_char(uc_ptr[i].cell / 16);
 126     octet_str += Common::hexdigit_to_char(uc_ptr[i].cell % 16);
 127   }
 128   if (isUTF8) {
 129     string* ret = Common::get_stringencoding(octet_str);
 130     if ("UTF-8" != *ret) {
 131       isUTF8 = false;
 132     }
 133     delete ret;
 134   }
 135   if (isUTF8) {
 136     ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
 137     val_ptr = s.val_ptr;
 138     val_ptr->ref_count++;
 139   } else {
 140     init_struct(n);
 141     memcpy(val_ptr->uchars_ptr, uc_ptr, n * sizeof(universal_char));
 142   }
 143 }
 144
 145 ustring::ustring(const string& s)
 146 {
 147   // Check for UTF8 encoding and decode it
 148   // incase the editor encoded the TTCN-3 file with UTF-8
 149   string octet_str;
 150   bool isUTF8 = true;
 151   size_t len = s.size();
 152   for (size_t i = 0; i < len; ++i) {
 153     octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) / 16);
 154     octet_str += Common::hexdigit_to_char((unsigned char)(s[i]) % 16);
 155   }
 156   if (isUTF8) {
 157     string* ret = Common::get_stringencoding(octet_str);
 158     if ("UTF-8" != *ret) {
 159       isUTF8 = false;
 160     }
 161     delete ret;
 162   }
 163   if (isUTF8) {
 164     ustring s = Common::decode_utf8(octet_str, CharCoding::UTF_8);
 165     val_ptr = s.val_ptr;
 166     val_ptr->ref_count++;
 167   } else {
 168     init_struct(s.size());
 169     const char *src = s.c_str();
 170     for (size_t i = 0; i < val_ptr->n_uchars; i++) {
 171       val_ptr->uchars_ptr[i].group = 0;
 172       val_ptr->uchars_ptr[i].plane = 0;
 173       val_ptr->uchars_ptr[i].row = 0;
 174       val_ptr->uchars_ptr[i].cell = src[i];
 175     }
 176   }
 177 }
 178
 179 void ustring::clear()
 180 {
 181   if (val_ptr->n_uchars > 0) {
 182     clean_up();
 183     init_struct(0);
 184   }
 185 }
 186
 187 ustring ustring::substr(size_t pos, size_t n) const
 188 {
 189   if (pos > val_ptr->n_uchars)
 190     FATAL_ERROR("ustring::substr(size_t, size_t): position is outside of string");
 191   if (pos == 0 && n >= val_ptr->n_uchars) return *this;
 192   if (n > val_ptr->n_uchars - pos) n = val_ptr->n_uchars - pos;
 193   return ustring(n, val_ptr->uchars_ptr + pos);
 194 }
 195
 196 void ustring::replace(size_t pos, size_t n, const ustring& s)
 197 {
 198   if (pos > val_ptr->n_uchars)
 199     FATAL_ERROR("ustring::replace(): start position is outside the string");
 200   if (pos + n > val_ptr->n_uchars)
 201     FATAL_ERROR("ustring::replace(): end position is outside the string");
 202   size_t s_len = s.size();
 203   /* The replacement string is greater than the maximum string length.  The
 204      replaced characters are taken into account.  */
 205   if (s_len > max_string_len - val_ptr->n_uchars + n)
 206         FATAL_ERROR("ustring::replace(): length overflow");
 207   size_t new_size = val_ptr->n_uchars - n + s_len;
 208   if (new_size == 0) {
 209     clean_up();
 210     init_struct(0);
 211   } else {
 212     ustring_struct *old_ptr = val_ptr;
 213     old_ptr->ref_count--;
 214         init_struct(new_size);
 215         memcpy(val_ptr->uchars_ptr, old_ptr->uchars_ptr,
 216                    pos * sizeof(universal_char));
 217     memcpy(val_ptr->uchars_ptr + pos, s.u_str(),
 218            s_len * sizeof(universal_char));
 219     memcpy(val_ptr->uchars_ptr + pos + s_len, old_ptr->uchars_ptr + pos + n,
 220                (old_ptr->n_uchars - pos - n) * sizeof(universal_char));
 221         if (old_ptr->ref_count == 0) Free(old_ptr);
 222   }
 223 }
 224
 225 string ustring::get_stringRepr() const
 226 {
 227   string ret_val;
 228   enum { INIT, PCHAR, UCHAR } state = INIT;
 229   for (size_t i = 0; i < val_ptr->n_uchars; i++) {
 230     const universal_char& uchar = val_ptr->uchars_ptr[i];
 231     if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
 232         string::is_printable(uchar.cell)) {
 233       // the actual character is printable
 234       switch (state) {
 235       case UCHAR: // concatenation sign if previous part was not printable
 236         ret_val += " & ";
 237         // no break
 238       case INIT: // opening "
 239         ret_val += '"';
 240         // no break
 241       case PCHAR: // the character itself
 242         ret_val.append_stringRepr(uchar.cell);
 243         break;
 244       }
 245       state = PCHAR;
 246     } else {
 247       // the actual character is not printable
 248       switch (state) {
 249       case PCHAR: // closing " if previous part was printable
 250         ret_val += '"';
 251         // no break
 252       case UCHAR: // concatenation sign
 253         ret_val += " & ";
 254         // no break
 255       case INIT: // the character itself in quadruple notation
 256         ret_val += "char(";
 257         ret_val += Common::Int2string(uchar.group);
 258         ret_val += ", ";
 259         ret_val += Common::Int2string(uchar.plane);
 260         ret_val += ", ";
 261         ret_val += Common::Int2string(uchar.row);
 262         ret_val += ", ";
 263         ret_val += Common::Int2string(uchar.cell);
 264         ret_val += ')';
 265         break;
 266       }
 267       state = UCHAR;
 268     }
 269   }
 270   // final steps
 271   switch (state) {
 272   case INIT: // the string was empty
 273     ret_val += "\"\"";
 274     break;
 275   case PCHAR: // last character was printable -> closing "
 276     ret_val += '"';
 277     break;
 278   default:
 279     break;
 280   }
 281   return ret_val;
 282 }
 283
 284 string ustring::get_stringRepr_for_pattern() const {
 285   string ret_val; // empty string
 286   for (size_t i = 0; i < val_ptr->n_uchars; i++) {
 287     const universal_char& uchar = val_ptr->uchars_ptr[i];
 288     if (uchar.group == 0 && uchar.plane == 0 && uchar.row == 0 &&
 289       string::is_printable(uchar.cell)) {
 290       ret_val.append_stringRepr(uchar.cell);
 291     } else {
 292       ret_val += "\\q{";
 293       ret_val += Common::Int2string(uchar.group);
 294       ret_val += ",";
 295       ret_val += Common::Int2string(uchar.plane);
 296       ret_val += ",";
 297       ret_val += Common::Int2string(uchar.row);
 298       ret_val += ",";
 299       ret_val += Common::Int2string(uchar.cell);
 300       ret_val += "}";
 301     }
 302   }
 303   return ret_val;
 304 }
 305
 306 char* ustring::convert_to_regexp_form() const {
 307   char* res = (char*)Malloc(val_ptr->n_uchars * 8 + 1);
 308   char* ptr = res;
 309   res[val_ptr->n_uchars * 8] = '\0';
 310   Quad q;
 311   for (size_t i = 0; i < val_ptr->n_uchars; i++, ptr += 8) {
 312     const universal_char& uchar = val_ptr->uchars_ptr[i];
 313     q.set(uchar.group, uchar.plane, uchar.row, uchar.cell);
 314     Quad::get_hexrepr(q, ptr);
 315   }
 316   return res;
 317 }
 318
 319 ustring& ustring::operator=(const ustring& s)
 320 {
 321   if(&s != this) {
 322     clean_up();
 323     val_ptr = s.val_ptr;
 324     val_ptr->ref_count++;
 325   }
 326   return *this;
 327 }
 328
 329 ustring::universal_char& ustring::operator[](size_t n)
 330 {
 331   if (n >= val_ptr->n_uchars)
 332     FATAL_ERROR("ustring::operator[](size_t): position is outside the string");
 333   copy_value();
 334   return val_ptr->uchars_ptr[n];
 335 }
 336
 337 const ustring::universal_char& ustring::operator[](size_t n) const
 338 {
 339   if (n >= val_ptr->n_uchars)
 340     FATAL_ERROR("ustring::operator[](size_t) const: position is outside the string");
 341   return val_ptr->uchars_ptr[n];
 342 }
 343
 344 ustring ustring::operator+(const string& s2) const
 345 {
 346   size_t s2_size = s2.size();
 347   if (s2_size > max_string_len - val_ptr->n_uchars)
 348     FATAL_ERROR("ustring::operator+(const string&): length overflow");
 349   if (s2_size > 0) {
 350     ustring s(val_ptr->n_uchars + s2_size);
 351     memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
 352       sizeof(universal_char));
 353     const char *src = s2.c_str();
 354     for (size_t i = 0; i < s2_size; i++) {
 355       s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].group = 0;
 356       s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].plane = 0;
 357       s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].row = 0;
 358       s.val_ptr->uchars_ptr[val_ptr->n_uchars + i].cell = src[i];
 359     }
 360     return s;
 361   } else return *this;
 362 }
 363
 364 ustring ustring::operator+(const ustring& s2) const
 365 {
 366   if (s2.val_ptr->n_uchars > max_string_len - val_ptr->n_uchars)
 367     FATAL_ERROR("ustring::operator+(const ustring&): length overflow");
 368   if (val_ptr->n_uchars == 0) return s2;
 369   else if (s2.val_ptr->n_uchars == 0) return *this;
 370   else {
 371     ustring s(val_ptr->n_uchars + s2.val_ptr->n_uchars);
 372     memcpy(s.val_ptr->uchars_ptr, val_ptr->uchars_ptr, val_ptr->n_uchars *
 373       sizeof(universal_char));
 374     memcpy(s.val_ptr->uchars_ptr + val_ptr->n_uchars,
 375       s2.val_ptr->uchars_ptr, s2.val_ptr->n_uchars * sizeof(universal_char));
 376     return s;
 377   }
 378 }
 379
 380 ustring& ustring::operator+=(const string& s)
 381 {
 382   size_t s_size = s.size();
 383   if (s_size > 0) {
 384     size_t old_size = val_ptr->n_uchars;
 385     enlarge_memory(s_size);
 386     const char *src = s.c_str();
 387     for (size_t i = 0; i < s_size; i++) {
 388       val_ptr->uchars_ptr[old_size + i].group = 0;
 389       val_ptr->uchars_ptr[old_size + i].plane = 0;
 390       val_ptr->uchars_ptr[old_size + i].row = 0;
 391       val_ptr->uchars_ptr[old_size + i].cell = src[i];
 392     }
 393   }
 394   return *this;
 395 }
 396
 397 ustring& ustring::operator+=(const ustring& s)
 398 {
 399   if (s.val_ptr->n_uchars > 0) {
 400     if (val_ptr->n_uchars > 0) {
 401       size_t old_size = val_ptr->n_uchars, s_size = s.val_ptr->n_uchars;
 402       enlarge_memory(s_size);
 403       memcpy(val_ptr->uchars_ptr + old_size, s.val_ptr->uchars_ptr,
 404         s_size * sizeof(universal_char));
 405     } else {
 406       clean_up();
 407       val_ptr = s.val_ptr;
 408       val_ptr->ref_count++;
 409     }
 410   }
 411   return *this;
 412 }
 413
 414 bool ustring::operator==(const ustring& s2) const
 415 {
 416   if (val_ptr == s2.val_ptr) return true;
 417   else if (val_ptr->n_uchars != s2.val_ptr->n_uchars) return false;
 418   else return !memcmp(val_ptr->uchars_ptr, s2.val_ptr->uchars_ptr,
 419     val_ptr->n_uchars * sizeof(universal_char));
 420 }
 421
 422 bool operator==(const ustring::universal_char& uc1,
 423   const ustring::universal_char& uc2)
 424 {
 425   return uc1.group == uc2.group && uc1.plane == uc2.plane &&
 426     uc1.row == uc2.row && uc1.cell == uc2.cell;
 427 }
 428
 429 bool operator<(const ustring::universal_char& uc1,
 430   const ustring::universal_char& uc2)
 431 {
 432   if (uc1.group < uc2.group) return true;
 433   else if (uc1.group > uc2.group) return false;
 434   else if (uc1.plane < uc2.plane) return true;
 435   else if (uc1.plane > uc2.plane) return false;
 436   else if (uc1.row < uc2.row) return true;
 437   else if (uc1.row > uc2.row) return false;
 438   else return uc1.cell < uc2.cell;
 439 }
 440
 441 string ustring_to_uft8(const ustring& ustr)
 442 {
 443   string ret_val;
 444   for(size_t i = 0; i < ustr.size(); i++) {
 445     unsigned char g = ustr[i].group;
 446     unsigned char p = ustr[i].plane;
 447     unsigned char r = ustr[i].row;
 448     unsigned char c = ustr[i].cell;
 449     if(g == 0x00 && p <= 0x1F) {
 450       if(p == 0x00) {
 451         if(r == 0x00 && c <= 0x7F) {
 452           // 1 octet
 453           ret_val += c;
 454         } // r
 455         // 2 octets
 456         else if(r <= 0x07) {
 457           ret_val += (0xC0 | r << 2 | c >> 6);
 458           ret_val += (0x80 | (c & 0x3F));
 459         } // r
 460         // 3 octets
 461         else {
 462           ret_val += (0xE0 | r >> 4);
 463           ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
 464           ret_val += (0x80 | (c & 0x3F));
 465         } // r
 466       } // p
 467       // 4 octets
 468       else {
 469         ret_val += (0xF0 | p >> 2);
 470         ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
 471         ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
 472         ret_val += (0x80 | (c & 0x3F));
 473       } // p
 474     } //g
 475     // 5 octets
 476     else if(g <= 0x03) {
 477       ret_val += (0xF8 | g);
 478       ret_val += (0x80 | p >> 2);
 479       ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
 480       ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
 481       ret_val += (0x80 | (c & 0x3F));
 482     } // g
 483     // 6 octets
 484     else {
 485       ret_val += (0xFC | g >> 6);
 486       ret_val += (0x80 | (g & 0x3F));
 487       ret_val += (0x80 | p >> 2);
 488       ret_val += (0x80 | (p << 4 & 0x30) | r >> 4);
 489       ret_val += (0x80 | (r << 2 & 0x3C) | c >> 6);
 490       ret_val += (0x80 | (c & 0x3F));
 491     }
 492   } // for i
 493   return ret_val;
 494 }