Sync with 5.4.2
[deliverable/titan.core.git] / common / JSON_Tokenizer.cc
1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2000-2015 Ericsson Telecom AB
3 // All rights reserved. This program and the accompanying materials
4 // are made available under the terms of the Eclipse Public License v1.0
5 // which accompanies this distribution, and is available at
6 // http://www.eclipse.org/legal/epl-v10.html
7 ///////////////////////////////////////////////////////////////////////////////
8
9 #include <cstring>
10
11 #include "JSON_Tokenizer.hh"
12 #include "memory.h"
13 #include <cstdio>
14
15 static const char TABS[] =
16 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
17 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
18 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
19 "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
20 const size_t MAX_TABS = sizeof(TABS) - 1; // 64
21
22 void JSON_Tokenizer::init(const char* p_buf, const size_t p_buf_len)
23 {
24 if (p_buf != 0 && p_buf_len != 0) {
25 buf_ptr = mcopystrn(p_buf, p_buf_len);
26 } else {
27 buf_ptr = 0;
28 }
29 buf_len = p_buf_len;
30 buf_pos = 0;
31 depth = 0;
32 previous_token = JSON_TOKEN_NONE;
33 }
34
35 JSON_Tokenizer::~JSON_Tokenizer()
36 {
37 Free(buf_ptr);
38 }
39
40 void JSON_Tokenizer::put_c(const char c)
41 {
42 buf_ptr = mputprintf(buf_ptr, "%c", c);
43 ++buf_len;
44 }
45
46 void JSON_Tokenizer::put_s(const char* s)
47 {
48 buf_ptr = mputstr(buf_ptr, s);
49 buf_len += strlen(s);
50 }
51
52 void JSON_Tokenizer::put_depth()
53 {
54 put_s(TABS + ((depth > MAX_TABS) ? 0 : MAX_TABS - depth));
55 }
56
57 bool JSON_Tokenizer::skip_white_spaces()
58 {
59 while(buf_pos < buf_len) {
60 switch(buf_ptr[buf_pos]) {
61 case ' ':
62 case '\r':
63 case '\n':
64 case '\t':
65 case '\f':
66 ++buf_pos;
67 break;
68 default:
69 return true;
70 }
71 }
72 return false;
73 }
74
75 bool JSON_Tokenizer::check_for_string()
76 {
77 if ('\"' == buf_ptr[buf_pos]) {
78 ++buf_pos;
79 } else {
80 return false;
81 }
82 while (buf_pos < buf_len) {
83 if ('\"' == buf_ptr[buf_pos]) {
84 return true;
85 }
86 else if ('\\' == buf_ptr[buf_pos]) {
87 // skip escaped character (so escaped quotes (\") are not mistaken for the ending quotes)
88 ++buf_pos;
89 }
90 ++buf_pos;
91 }
92 return false;
93 }
94
95 bool JSON_Tokenizer::check_for_number()
96 {
97 bool first_digit = false; // first non-zero digit reached
98 bool zero = false; // first zero digit reached
99 bool decimal_point = false; // decimal point (.) reached
100 bool exponent_mark = false; // exponential mark (e or E) reached
101 bool exponent_sign = false; // sign of the exponential (- or +) reached
102
103 if ('-' == buf_ptr[buf_pos]) {
104 ++buf_pos;
105 }
106
107 while (buf_pos < buf_len) {
108 switch(buf_ptr[buf_pos]) {
109 case '.':
110 if (decimal_point || exponent_mark || (!first_digit && !zero)) {
111 return false;
112 }
113 decimal_point = true;
114 first_digit = false;
115 zero = false;
116 break;
117 case 'e':
118 case 'E':
119 if (exponent_mark || (!first_digit && !zero)) {
120 return false;
121 }
122 exponent_mark = true;
123 first_digit = false;
124 zero = false;
125 break;
126 case '0':
127 if (!first_digit && (exponent_mark || (!decimal_point && zero))) {
128 return false;
129 }
130 zero = true;
131 break;
132 case '1':
133 case '2':
134 case '3':
135 case '4':
136 case '5':
137 case '6':
138 case '7':
139 case '8':
140 case '9':
141 if (!first_digit && zero && (!decimal_point || exponent_mark)) {
142 return false;
143 }
144 first_digit = true;
145 break;
146 case '-':
147 case '+':
148 if (exponent_sign || !exponent_mark || zero || first_digit) {
149 return false;
150 }
151 exponent_sign = true;
152 break;
153 default:
154 return first_digit || zero;
155 }
156
157 ++buf_pos;
158 }
159 return first_digit || zero;
160 }
161
162 bool JSON_Tokenizer::check_for_separator()
163 {
164 if (buf_pos < buf_len) {
165 switch(buf_ptr[buf_pos]) {
166 case ',':
167 ++buf_pos;
168 // no break
169 case ':':
170 case '{':
171 case '}':
172 case '[':
173 case ']':
174 return true;
175 default:
176 return false;
177 }
178 }
179 return true;
180 }
181
182 bool JSON_Tokenizer::check_for_literal(const char* p_literal)
183 {
184 size_t len = strlen(p_literal);
185 size_t start_pos = buf_pos;
186
187 if (buf_len - buf_pos >= len &&
188 0 == strncmp(buf_ptr + buf_pos, p_literal, len)) {
189 buf_pos += len;
190 if (!skip_white_spaces() || check_for_separator()) {
191 return true;
192 } else {
193 // must be followed by a separator (or only white spaces until the buffer ends) -> undo buffer action
194 buf_pos = start_pos;
195 }
196 }
197 return false;
198 }
199
200 int JSON_Tokenizer::get_next_token(json_token_t* p_token, char** p_token_str, size_t* p_str_len)
201 {
202 size_t start_pos = buf_pos;
203 *p_token = JSON_TOKEN_NONE;
204 if (0 != p_token_str && 0 != p_str_len) {
205 *p_token_str = 0;
206 *p_str_len = 0;
207 }
208
209 if (skip_white_spaces()) {
210 char c = buf_ptr[buf_pos];
211 switch (c) {
212 case '{':
213 case '[':
214 *p_token = ('{' == c) ? JSON_TOKEN_OBJECT_START : JSON_TOKEN_ARRAY_START;
215 ++buf_pos;
216 break;
217 case '}':
218 case ']':
219 ++buf_pos;
220 if (skip_white_spaces() && !check_for_separator()) {
221 // must be followed by a separator (or only white spaces until the buffer ends)
222 *p_token = JSON_TOKEN_ERROR;
223 } else {
224 *p_token = ('}' == c) ? JSON_TOKEN_OBJECT_END : JSON_TOKEN_ARRAY_END;
225 }
226 break;
227 case '\"': {
228 // string value or field name
229 size_t string_start_pos = buf_pos;
230 if(!check_for_string()) {
231 // invalid string value
232 *p_token = JSON_TOKEN_ERROR;
233 break;
234 }
235 size_t string_end_pos = ++buf_pos; // step over the string's ending quotes
236 if (skip_white_spaces() && ':' == buf_ptr[buf_pos]) {
237 // name token - don't include the starting and ending quotes
238 *p_token = JSON_TOKEN_NAME;
239 if (0 != p_token_str && 0 != p_str_len) {
240 *p_token_str = buf_ptr + string_start_pos + 1;
241 *p_str_len = string_end_pos - string_start_pos - 2;
242 }
243 ++buf_pos;
244 } else if (check_for_separator()) {
245 // value token - include the starting and ending quotes
246 *p_token = JSON_TOKEN_STRING;
247 if (0 != p_token_str && 0 != p_str_len) {
248 *p_token_str = buf_ptr + string_start_pos;
249 *p_str_len = string_end_pos - string_start_pos;
250 }
251 } else {
252 // value token, but there is no separator after it -> error
253 *p_token = JSON_TOKEN_ERROR;
254 break;
255 }
256 break;
257 } // case: string value or field name
258 default:
259 if (('0' <= buf_ptr[buf_pos] && '9' >= buf_ptr[buf_pos]) ||
260 '-' == buf_ptr[buf_pos]) {
261 // number value
262 size_t number_start_pos = buf_pos;
263 if (!check_for_number()) {
264 // invalid number
265 *p_token = JSON_TOKEN_ERROR;
266 break;
267 }
268 size_t number_length = buf_pos - number_start_pos;
269 if (skip_white_spaces() && !check_for_separator()) {
270 // must be followed by a separator (or only white spaces until the buffer ends)
271 *p_token = JSON_TOKEN_ERROR;
272 break;
273 }
274 *p_token = JSON_TOKEN_NUMBER;
275 if (0 != p_token_str && 0 != p_str_len) {
276 *p_token_str = buf_ptr + number_start_pos;
277 *p_str_len = number_length;
278 }
279 break;
280 } // if (number value)
281 else if (check_for_literal("true")) {
282 *p_token = JSON_TOKEN_LITERAL_TRUE;
283 break;
284 }
285 else if (check_for_literal("false")) {
286 *p_token = JSON_TOKEN_LITERAL_FALSE;
287 break;
288 }
289 else if (check_for_literal("null")) {
290 *p_token = JSON_TOKEN_LITERAL_NULL;
291 break;
292 }
293 else {
294 *p_token = JSON_TOKEN_ERROR;
295 break;
296 }
297 } // switch (current char)
298 } // if (skip_white_spaces())
299
300 return buf_pos - start_pos;
301 }
302
303 void JSON_Tokenizer::put_separator()
304 {
305 if (JSON_TOKEN_NAME != previous_token && JSON_TOKEN_NONE != previous_token &&
306 JSON_TOKEN_ARRAY_START != previous_token && JSON_TOKEN_OBJECT_START != previous_token) {
307 put_c(',');
308 if (pretty) {
309 put_c('\n');
310 put_depth();
311 }
312 }
313 }
314
315 int JSON_Tokenizer::put_next_token(json_token_t p_token, const char* p_token_str)
316 {
317 int start_len = buf_len;
318 switch(p_token) {
319 case JSON_TOKEN_OBJECT_START:
320 case JSON_TOKEN_ARRAY_START: {
321 put_separator();
322 put_c( (JSON_TOKEN_OBJECT_START == p_token) ? '{' : '[' );
323 if (pretty) {
324 put_c('\n');
325 ++depth;
326 put_depth();
327 }
328 break;
329 }
330 case JSON_TOKEN_OBJECT_END:
331 case JSON_TOKEN_ARRAY_END: {
332 if (pretty) {
333 if (JSON_TOKEN_OBJECT_START != previous_token && JSON_TOKEN_ARRAY_START != previous_token) {
334 put_c('\n');
335 --depth;
336 put_depth();
337 } else if (MAX_TABS >= depth) {
338 // empty object or array -> remove the extra tab added at the start token
339 --depth;
340 --buf_len;
341 buf_ptr[buf_len] = 0;
342 }
343 }
344 put_c( (JSON_TOKEN_OBJECT_END == p_token) ? '}' : ']' );
345 break;
346 }
347 case JSON_TOKEN_NUMBER:
348 case JSON_TOKEN_STRING:
349 put_separator();
350 put_s(p_token_str);
351 break;
352 case JSON_TOKEN_LITERAL_TRUE:
353 put_separator();
354 put_s("true");
355 break;
356 case JSON_TOKEN_LITERAL_FALSE:
357 put_separator();
358 put_s("false");
359 break;
360 case JSON_TOKEN_LITERAL_NULL:
361 put_separator();
362 put_s("null");
363 break;
364 case JSON_TOKEN_NAME:
365 put_separator();
366 put_c('\"');
367 put_s(p_token_str);
368 if (pretty) {
369 put_s("\" : ");
370 } else {
371 put_s("\":");
372 }
373 break;
374 default:
375 return 0;
376 }
377
378 previous_token = p_token;
379 return buf_len - start_len;
380 }
381
382 void JSON_Tokenizer::put_raw_data(const char* p_data, size_t p_len)
383 {
384 buf_ptr = mputstrn(buf_ptr, p_data, p_len);
385 buf_len += p_len;
386 }
387
388 char* convert_to_json_string(const char* str)
389 {
390 char* ret_val = mcopystrn("\"", 1);
391 // control characters (like \n) cannot be placed in a JSON string, replace
392 // them with JSON metacharacters
393 // double quotes and backslashes need to be escaped, too
394 size_t str_len = strlen(str);
395 for (size_t i = 0; i < str_len; ++i) {
396 switch (str[i]) {
397 case '\n':
398 ret_val = mputstrn(ret_val, "\\n", 2);
399 break;
400 case '\r':
401 ret_val = mputstrn(ret_val, "\\r", 2);
402 break;
403 case '\t':
404 ret_val = mputstrn(ret_val, "\\t", 2);
405 break;
406 case '\f':
407 ret_val = mputstrn(ret_val, "\\f", 2);
408 break;
409 case '\b':
410 ret_val = mputstrn(ret_val, "\\b", 2);
411 break;
412 case '\"':
413 ret_val = mputstrn(ret_val, "\\\"", 2);
414 break;
415 case '\\':
416 ret_val = mputstrn(ret_val, "\\\\", 2);
417 break;
418 default:
419 if (str[i] < 32 && str[i] > 0) {
420 // use the JSON \uHHHH notation for other control characters
421 // (this is just for esthetic reasons, these wouldn't break the JSON
422 // string format)
423 ret_val = mputprintf(ret_val, "\\u00%d%c", str[i] / 16,
424 (str[i] % 16 < 10) ? (str[i] % 16 + '0') : (str[i] % 16 - 10 + 'A'));
425 }
426 else {
427 ret_val = mputc(ret_val, str[i]);
428 }
429 break;
430 }
431 }
432 return mputstrn(ret_val, "\"", 1);
433 }
This page took 0.041047 seconds and 5 git commands to generate.