Commit | Line | Data |
---|---|---|
3abe9331 | 1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Copyright (c) 2000-2015 Ericsson Telecom AB | |
3 | // All rights reserved. This program and the accompanying materials | |
4 | // are made available under the terms of the Eclipse Public License v1.0 | |
5 | // which accompanies this distribution, and is available at | |
6 | // http://www.eclipse.org/legal/epl-v10.html | |
7 | /////////////////////////////////////////////////////////////////////////////// | |
8 | ||
970ed795 EL |
9 | #include <cstring> |
10 | ||
11 | #include "JSON_Tokenizer.hh" | |
12 | #include "memory.h" | |
13 | #include <cstdio> | |
14 | ||
15 | static const char TABS[] = | |
16 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" | |
17 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" | |
18 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" | |
19 | "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; | |
20 | const size_t MAX_TABS = sizeof(TABS) - 1; // 64 | |
21 | ||
22 | void JSON_Tokenizer::init(const char* p_buf, const size_t p_buf_len) | |
23 | { | |
24 | if (p_buf != 0 && p_buf_len != 0) { | |
25 | buf_ptr = mcopystrn(p_buf, p_buf_len); | |
26 | } else { | |
27 | buf_ptr = 0; | |
28 | } | |
29 | buf_len = p_buf_len; | |
30 | buf_pos = 0; | |
31 | depth = 0; | |
32 | previous_token = JSON_TOKEN_NONE; | |
33 | } | |
34 | ||
35 | JSON_Tokenizer::~JSON_Tokenizer() | |
36 | { | |
37 | Free(buf_ptr); | |
38 | } | |
39 | ||
40 | void JSON_Tokenizer::put_c(const char c) | |
41 | { | |
42 | buf_ptr = mputprintf(buf_ptr, "%c", c); | |
43 | ++buf_len; | |
44 | } | |
45 | ||
46 | void JSON_Tokenizer::put_s(const char* s) | |
47 | { | |
48 | buf_ptr = mputstr(buf_ptr, s); | |
49 | buf_len += strlen(s); | |
50 | } | |
51 | ||
52 | void JSON_Tokenizer::put_depth() | |
53 | { | |
54 | put_s(TABS + ((depth > MAX_TABS) ? 0 : MAX_TABS - depth)); | |
55 | } | |
56 | ||
57 | bool JSON_Tokenizer::skip_white_spaces() | |
58 | { | |
59 | while(buf_pos < buf_len) { | |
60 | switch(buf_ptr[buf_pos]) { | |
61 | case ' ': | |
62 | case '\r': | |
63 | case '\n': | |
64 | case '\t': | |
65 | case '\f': | |
66 | ++buf_pos; | |
67 | break; | |
68 | default: | |
69 | return true; | |
70 | } | |
71 | } | |
72 | return false; | |
73 | } | |
74 | ||
75 | bool JSON_Tokenizer::check_for_string() | |
76 | { | |
77 | if ('\"' == buf_ptr[buf_pos]) { | |
78 | ++buf_pos; | |
79 | } else { | |
80 | return false; | |
81 | } | |
82 | while (buf_pos < buf_len) { | |
83 | if ('\"' == buf_ptr[buf_pos]) { | |
84 | return true; | |
85 | } | |
86 | else if ('\\' == buf_ptr[buf_pos]) { | |
87 | // skip escaped character (so escaped quotes (\") are not mistaken for the ending quotes) | |
88 | ++buf_pos; | |
89 | } | |
90 | ++buf_pos; | |
91 | } | |
92 | return false; | |
93 | } | |
94 | ||
95 | bool JSON_Tokenizer::check_for_number() | |
96 | { | |
97 | bool first_digit = false; // first non-zero digit reached | |
98 | bool zero = false; // first zero digit reached | |
99 | bool decimal_point = false; // decimal point (.) reached | |
100 | bool exponent_mark = false; // exponential mark (e or E) reached | |
101 | bool exponent_sign = false; // sign of the exponential (- or +) reached | |
102 | ||
103 | if ('-' == buf_ptr[buf_pos]) { | |
104 | ++buf_pos; | |
105 | } | |
106 | ||
107 | while (buf_pos < buf_len) { | |
108 | switch(buf_ptr[buf_pos]) { | |
109 | case '.': | |
110 | if (decimal_point || exponent_mark || (!first_digit && !zero)) { | |
111 | return false; | |
112 | } | |
113 | decimal_point = true; | |
114 | first_digit = false; | |
115 | zero = false; | |
116 | break; | |
117 | case 'e': | |
118 | case 'E': | |
119 | if (exponent_mark || (!first_digit && !zero)) { | |
120 | return false; | |
121 | } | |
122 | exponent_mark = true; | |
123 | first_digit = false; | |
124 | zero = false; | |
125 | break; | |
126 | case '0': | |
127 | if (!first_digit && (exponent_mark || (!decimal_point && zero))) { | |
128 | return false; | |
129 | } | |
130 | zero = true; | |
131 | break; | |
132 | case '1': | |
133 | case '2': | |
134 | case '3': | |
135 | case '4': | |
136 | case '5': | |
137 | case '6': | |
138 | case '7': | |
139 | case '8': | |
140 | case '9': | |
141 | if (!first_digit && zero && (!decimal_point || exponent_mark)) { | |
142 | return false; | |
143 | } | |
144 | first_digit = true; | |
145 | break; | |
146 | case '-': | |
147 | case '+': | |
148 | if (exponent_sign || !exponent_mark || zero || first_digit) { | |
149 | return false; | |
150 | } | |
151 | exponent_sign = true; | |
152 | break; | |
153 | default: | |
154 | return first_digit || zero; | |
155 | } | |
156 | ||
157 | ++buf_pos; | |
158 | } | |
159 | return first_digit || zero; | |
160 | } | |
161 | ||
162 | bool JSON_Tokenizer::check_for_separator() | |
163 | { | |
164 | if (buf_pos < buf_len) { | |
165 | switch(buf_ptr[buf_pos]) { | |
166 | case ',': | |
167 | ++buf_pos; | |
168 | // no break | |
169 | case ':': | |
170 | case '{': | |
171 | case '}': | |
172 | case '[': | |
173 | case ']': | |
174 | return true; | |
175 | default: | |
176 | return false; | |
177 | } | |
178 | } | |
179 | return true; | |
180 | } | |
181 | ||
182 | bool JSON_Tokenizer::check_for_literal(const char* p_literal) | |
183 | { | |
184 | size_t len = strlen(p_literal); | |
185 | size_t start_pos = buf_pos; | |
186 | ||
187 | if (buf_len - buf_pos >= len && | |
188 | 0 == strncmp(buf_ptr + buf_pos, p_literal, len)) { | |
189 | buf_pos += len; | |
190 | if (!skip_white_spaces() || check_for_separator()) { | |
191 | return true; | |
192 | } else { | |
193 | // must be followed by a separator (or only white spaces until the buffer ends) -> undo buffer action | |
194 | buf_pos = start_pos; | |
195 | } | |
196 | } | |
197 | return false; | |
198 | } | |
199 | ||
200 | int JSON_Tokenizer::get_next_token(json_token_t* p_token, char** p_token_str, size_t* p_str_len) | |
201 | { | |
202 | size_t start_pos = buf_pos; | |
203 | *p_token = JSON_TOKEN_NONE; | |
204 | if (0 != p_token_str && 0 != p_str_len) { | |
205 | *p_token_str = 0; | |
206 | *p_str_len = 0; | |
207 | } | |
208 | ||
209 | if (skip_white_spaces()) { | |
210 | char c = buf_ptr[buf_pos]; | |
211 | switch (c) { | |
212 | case '{': | |
213 | case '[': | |
214 | *p_token = ('{' == c) ? JSON_TOKEN_OBJECT_START : JSON_TOKEN_ARRAY_START; | |
215 | ++buf_pos; | |
216 | break; | |
217 | case '}': | |
218 | case ']': | |
219 | ++buf_pos; | |
220 | if (skip_white_spaces() && !check_for_separator()) { | |
221 | // must be followed by a separator (or only white spaces until the buffer ends) | |
222 | *p_token = JSON_TOKEN_ERROR; | |
223 | } else { | |
224 | *p_token = ('}' == c) ? JSON_TOKEN_OBJECT_END : JSON_TOKEN_ARRAY_END; | |
225 | } | |
226 | break; | |
227 | case '\"': { | |
228 | // string value or field name | |
229 | size_t string_start_pos = buf_pos; | |
230 | if(!check_for_string()) { | |
231 | // invalid string value | |
232 | *p_token = JSON_TOKEN_ERROR; | |
233 | break; | |
234 | } | |
235 | size_t string_end_pos = ++buf_pos; // step over the string's ending quotes | |
236 | if (skip_white_spaces() && ':' == buf_ptr[buf_pos]) { | |
237 | // name token - don't include the starting and ending quotes | |
238 | *p_token = JSON_TOKEN_NAME; | |
239 | if (0 != p_token_str && 0 != p_str_len) { | |
240 | *p_token_str = buf_ptr + string_start_pos + 1; | |
241 | *p_str_len = string_end_pos - string_start_pos - 2; | |
242 | } | |
243 | ++buf_pos; | |
244 | } else if (check_for_separator()) { | |
245 | // value token - include the starting and ending quotes | |
246 | *p_token = JSON_TOKEN_STRING; | |
247 | if (0 != p_token_str && 0 != p_str_len) { | |
248 | *p_token_str = buf_ptr + string_start_pos; | |
249 | *p_str_len = string_end_pos - string_start_pos; | |
250 | } | |
251 | } else { | |
252 | // value token, but there is no separator after it -> error | |
253 | *p_token = JSON_TOKEN_ERROR; | |
254 | break; | |
255 | } | |
256 | break; | |
257 | } // case: string value or field name | |
258 | default: | |
259 | if (('0' <= buf_ptr[buf_pos] && '9' >= buf_ptr[buf_pos]) || | |
260 | '-' == buf_ptr[buf_pos]) { | |
261 | // number value | |
262 | size_t number_start_pos = buf_pos; | |
263 | if (!check_for_number()) { | |
264 | // invalid number | |
265 | *p_token = JSON_TOKEN_ERROR; | |
266 | break; | |
267 | } | |
268 | size_t number_length = buf_pos - number_start_pos; | |
269 | if (skip_white_spaces() && !check_for_separator()) { | |
270 | // must be followed by a separator (or only white spaces until the buffer ends) | |
271 | *p_token = JSON_TOKEN_ERROR; | |
272 | break; | |
273 | } | |
274 | *p_token = JSON_TOKEN_NUMBER; | |
275 | if (0 != p_token_str && 0 != p_str_len) { | |
276 | *p_token_str = buf_ptr + number_start_pos; | |
277 | *p_str_len = number_length; | |
278 | } | |
279 | break; | |
280 | } // if (number value) | |
281 | else if (check_for_literal("true")) { | |
282 | *p_token = JSON_TOKEN_LITERAL_TRUE; | |
283 | break; | |
284 | } | |
285 | else if (check_for_literal("false")) { | |
286 | *p_token = JSON_TOKEN_LITERAL_FALSE; | |
287 | break; | |
288 | } | |
289 | else if (check_for_literal("null")) { | |
290 | *p_token = JSON_TOKEN_LITERAL_NULL; | |
291 | break; | |
292 | } | |
293 | else { | |
294 | *p_token = JSON_TOKEN_ERROR; | |
295 | break; | |
296 | } | |
297 | } // switch (current char) | |
298 | } // if (skip_white_spaces()) | |
299 | ||
300 | return buf_pos - start_pos; | |
301 | } | |
302 | ||
303 | void JSON_Tokenizer::put_separator() | |
304 | { | |
305 | if (JSON_TOKEN_NAME != previous_token && JSON_TOKEN_NONE != previous_token && | |
306 | JSON_TOKEN_ARRAY_START != previous_token && JSON_TOKEN_OBJECT_START != previous_token) { | |
307 | put_c(','); | |
308 | if (pretty) { | |
309 | put_c('\n'); | |
310 | put_depth(); | |
311 | } | |
312 | } | |
313 | } | |
314 | ||
315 | int JSON_Tokenizer::put_next_token(json_token_t p_token, const char* p_token_str) | |
316 | { | |
317 | int start_len = buf_len; | |
318 | switch(p_token) { | |
319 | case JSON_TOKEN_OBJECT_START: | |
320 | case JSON_TOKEN_ARRAY_START: { | |
321 | put_separator(); | |
322 | put_c( (JSON_TOKEN_OBJECT_START == p_token) ? '{' : '[' ); | |
323 | if (pretty) { | |
324 | put_c('\n'); | |
325 | ++depth; | |
326 | put_depth(); | |
327 | } | |
328 | break; | |
329 | } | |
330 | case JSON_TOKEN_OBJECT_END: | |
331 | case JSON_TOKEN_ARRAY_END: { | |
332 | if (pretty) { | |
333 | if (JSON_TOKEN_OBJECT_START != previous_token && JSON_TOKEN_ARRAY_START != previous_token) { | |
334 | put_c('\n'); | |
335 | --depth; | |
336 | put_depth(); | |
337 | } else if (MAX_TABS >= depth) { | |
338 | // empty object or array -> remove the extra tab added at the start token | |
339 | --depth; | |
340 | --buf_len; | |
341 | buf_ptr[buf_len] = 0; | |
342 | } | |
343 | } | |
344 | put_c( (JSON_TOKEN_OBJECT_END == p_token) ? '}' : ']' ); | |
345 | break; | |
346 | } | |
347 | case JSON_TOKEN_NUMBER: | |
348 | case JSON_TOKEN_STRING: | |
349 | put_separator(); | |
350 | put_s(p_token_str); | |
351 | break; | |
352 | case JSON_TOKEN_LITERAL_TRUE: | |
353 | put_separator(); | |
354 | put_s("true"); | |
355 | break; | |
356 | case JSON_TOKEN_LITERAL_FALSE: | |
357 | put_separator(); | |
358 | put_s("false"); | |
359 | break; | |
360 | case JSON_TOKEN_LITERAL_NULL: | |
361 | put_separator(); | |
362 | put_s("null"); | |
363 | break; | |
364 | case JSON_TOKEN_NAME: | |
365 | put_separator(); | |
366 | put_c('\"'); | |
367 | put_s(p_token_str); | |
368 | if (pretty) { | |
369 | put_s("\" : "); | |
370 | } else { | |
371 | put_s("\":"); | |
372 | } | |
373 | break; | |
374 | default: | |
375 | return 0; | |
376 | } | |
377 | ||
378 | previous_token = p_token; | |
379 | return buf_len - start_len; | |
380 | } | |
381 | ||
3abe9331 | 382 | |
383 | char* convert_to_json_string(const char* str) | |
384 | { | |
385 | char* ret_val = mcopystrn("\"", 1); | |
386 | // control characters (like \n) cannot be placed in a JSON string, replace | |
387 | // them with JSON metacharacters | |
388 | // double quotes and backslashes need to be escaped, too | |
389 | size_t str_len = strlen(str); | |
390 | for (size_t i = 0; i < str_len; ++i) { | |
391 | switch (str[i]) { | |
392 | case '\n': | |
393 | ret_val = mputstrn(ret_val, "\\n", 2); | |
394 | break; | |
395 | case '\r': | |
396 | ret_val = mputstrn(ret_val, "\\r", 2); | |
397 | break; | |
398 | case '\t': | |
399 | ret_val = mputstrn(ret_val, "\\t", 2); | |
400 | break; | |
401 | case '\f': | |
402 | ret_val = mputstrn(ret_val, "\\f", 2); | |
403 | break; | |
404 | case '\b': | |
405 | ret_val = mputstrn(ret_val, "\\b", 2); | |
406 | break; | |
407 | case '\"': | |
408 | ret_val = mputstrn(ret_val, "\\\"", 2); | |
409 | break; | |
410 | case '\\': | |
411 | ret_val = mputstrn(ret_val, "\\\\", 2); | |
412 | break; | |
413 | default: | |
414 | if (str[i] < 32 && str[i] > 0) { | |
415 | // use the JSON \uHHHH notation for other control characters | |
416 | // (this is just for esthetic reasons, these wouldn't break the JSON | |
417 | // string format) | |
418 | ret_val = mputprintf(ret_val, "\\u00%d%c", str[i] / 16, | |
419 | (str[i] % 16 < 10) ? (str[i] % 16 + '0') : (str[i] % 16 - 10 + 'A')); | |
420 | } | |
421 | else { | |
422 | ret_val = mputc(ret_val, str[i]); | |
423 | } | |
424 | break; | |
425 | } | |
426 | } | |
427 | return mputstrn(ret_val, "\"", 1); | |
428 | } |