2 * SPDX-License-Identifier: MIT
4 * Copyright 2016-2019 Philippe Proulx <pproulx@efficios.com>
7 #include "param-parse.h"
12 #include "common/assert.h"
16 #include <babeltrace2/babeltrace.h>
17 #include "common/common.h"
19 #include <sys/types.h>
21 /* INI-style parsing FSM states */
22 enum ini_parsing_fsm_state
{
23 /* Expect a map key (identifier) */
26 /* Expect an equal character (`=`) */
32 /* Expect a comma character (`,`) */
36 /* INI-style parsing state variables */
37 struct ini_parsing_state
{
38 /* Lexical scanner (owned by this) */
41 /* Output map value object being filled (owned by this) */
44 /* Next expected FSM state */
45 enum ini_parsing_fsm_state expecting
;
47 /* Last decoded map key (owned by this) */
48 GString
*last_map_key
;
50 /* Complete INI-style string to parse */
53 /* Error buffer (weak) */
58 * Appends an "expecting token" error to the INI-style parsing state's
62 void ini_append_error_expecting(struct ini_parsing_state
*state
,
63 GScanner
*scanner
, const char *expecting
)
68 g_string_append_printf(state
->ini_error
, "Expecting %s:\n", expecting
);
70 /* Only append error if there's one line */
71 if (strchr(state
->arg
, '\n') || strlen(state
->arg
) == 0) {
75 g_string_append_printf(state
->ini_error
, "\n %s\n", state
->arg
);
76 pos
= g_scanner_cur_position(scanner
) + 4;
78 if (!g_scanner_eof(scanner
)) {
82 for (i
= 0; i
< pos
; ++i
) {
83 g_string_append_c(state
->ini_error
, ' ');
86 g_string_append_c(state
->ini_error
, '^');
90 void ini_append_oom_error(GString
*error
)
93 g_string_append(error
, "Out of memory\n");
97 * Parses the next token as an unsigned integer.
100 bt_value
*ini_parse_uint(struct ini_parsing_state
*state
)
102 bt_value
*value
= NULL
;
103 GTokenType token_type
= g_scanner_get_next_token(state
->scanner
);
105 if (token_type
!= G_TOKEN_INT
) {
106 ini_append_error_expecting(state
, state
->scanner
,
111 value
= bt_value_integer_unsigned_create_init(
112 state
->scanner
->value
.v_int64
);
119 * Parses the next token as a number and returns its negation.
122 bt_value
*ini_parse_neg_number(struct ini_parsing_state
*state
)
124 bt_value
*value
= NULL
;
125 GTokenType token_type
= g_scanner_get_next_token(state
->scanner
);
127 switch (token_type
) {
130 /* Negative integer */
131 uint64_t int_val
= state
->scanner
->value
.v_int64
;
133 if (int_val
> (((uint64_t) INT64_MAX
) + 1)) {
134 g_string_append_printf(state
->ini_error
,
135 "Integer value -%" PRIu64
" is outside the range of a 64-bit signed integer\n",
138 value
= bt_value_integer_signed_create_init(
139 -((int64_t) int_val
));
145 /* Negative floating point number */
146 value
= bt_value_real_create_init(
147 -state
->scanner
->value
.v_float
);
150 ini_append_error_expecting(state
, state
->scanner
, "value");
157 static bt_value
*ini_parse_value(struct ini_parsing_state
*state
);
160 * Parses the current and following tokens as an array. Arrays are
161 * formatted as an opening `[`, a list of comma-separated values, and a
162 * closing `]`. For convenience, this function supports an optional
163 * trailing comma after the last value.
165 * The current token of the parser must be the opening square bracket
166 * (`[`) of the array.
169 bt_value
*ini_parse_array(struct ini_parsing_state
*state
)
171 bt_value
*array_value
;
172 GTokenType token_type
;
174 /* The `[` character must have already been ingested */
175 BT_ASSERT(g_scanner_cur_token(state
->scanner
) == G_TOKEN_CHAR
);
176 BT_ASSERT(g_scanner_cur_value(state
->scanner
).v_char
== '[');
178 array_value
= bt_value_array_create ();
180 ini_append_oom_error(state
->ini_error
);
184 token_type
= g_scanner_get_next_token(state
->scanner
);
186 /* While the current token is not a `]` */
187 while (!(token_type
== G_TOKEN_CHAR
&&
188 g_scanner_cur_value(state
->scanner
).v_char
== ']')) {
189 bt_value
*item_value
;
190 bt_value_array_append_element_status append_status
;
192 /* Parse the item... */
193 item_value
= ini_parse_value(state
);
198 /* ... and add it to the result array */
199 append_status
= bt_value_array_append_element(array_value
,
201 BT_VALUE_PUT_REF_AND_RESET(item_value
);
202 if (append_status
< 0) {
207 * Ingest the token following the value. It should be
208 * either a comma or closing square bracket.
210 token_type
= g_scanner_get_next_token(state
->scanner
);
211 if (token_type
== G_TOKEN_CHAR
&&
212 g_scanner_cur_value(state
->scanner
).v_char
== ',') {
214 * Ingest the token following the comma. If it
215 * happens to be a closing square bracket, exit
216 * the loop and we are done (we allow trailing
217 * commas). Otherwise, we are ready for the next
218 * ini_parse_value() call.
220 token_type
= g_scanner_get_next_token(state
->scanner
);
221 } else if (token_type
!= G_TOKEN_CHAR
||
222 g_scanner_cur_value(state
->scanner
).v_char
!= ']') {
223 ini_append_error_expecting(state
, state
->scanner
,
232 BT_VALUE_PUT_REF_AND_RESET(array_value
);
239 * Parses the current and following tokens as a map. Maps are
240 * formatted as an opening `{`, a list of comma-separated entries, and a
241 * closing `}`. And entry is a key (an unquoted string), an equal sign and
242 * a value. For convenience, this function supports an optional trailing comma
243 * after the last value.
245 * The current token of the parser must be the opening curly bracket
246 * (`{`) of the array.
249 bt_value
*ini_parse_map(struct ini_parsing_state
*state
)
252 GTokenType token_type
;
255 /* The `{` character must have already been ingested */
256 BT_ASSERT(g_scanner_cur_token(state
->scanner
) == G_TOKEN_CHAR
);
257 BT_ASSERT(g_scanner_cur_value(state
->scanner
).v_char
== '{');
259 map_value
= bt_value_map_create ();
261 ini_append_oom_error(state
->ini_error
);
265 token_type
= g_scanner_get_next_token(state
->scanner
);
267 /* While the current token is not a `}` */
268 while (!(token_type
== G_TOKEN_CHAR
&&
269 g_scanner_cur_value(state
->scanner
).v_char
== '}')) {
270 bt_value
*entry_value
;
271 bt_value_map_insert_entry_status insert_entry_status
;
273 /* Expect map key. */
274 if (token_type
!= G_TOKEN_IDENTIFIER
) {
275 ini_append_error_expecting(state
, state
->scanner
,
281 key
= g_strdup(g_scanner_cur_value(state
->scanner
).v_identifier
);
283 token_type
= g_scanner_get_next_token(state
->scanner
);
285 /* Expect equal sign. */
286 if (token_type
!= G_TOKEN_CHAR
||
287 g_scanner_cur_value(state
->scanner
).v_char
!= '=') {
288 ini_append_error_expecting(state
,
289 state
->scanner
, "'='");
293 g_scanner_get_next_token(state
->scanner
);
295 /* Parse the entry value... */
296 entry_value
= ini_parse_value(state
);
301 /* ... and add it to the result map */
302 insert_entry_status
=
303 bt_value_map_insert_entry(map_value
, key
, entry_value
);
304 BT_VALUE_PUT_REF_AND_RESET(entry_value
);
305 if (insert_entry_status
!= BT_VALUE_MAP_INSERT_ENTRY_STATUS_OK
) {
310 * Ingest the token following the value. It should be
311 * either a comma or closing curly bracket.
313 token_type
= g_scanner_get_next_token(state
->scanner
);
314 if (token_type
== G_TOKEN_CHAR
&&
315 g_scanner_cur_value(state
->scanner
).v_char
== ',') {
317 * Ingest the token following the comma. If it
318 * happens to be a closing curly bracket, exit
319 * the loop and we are done (we allow trailing
320 * commas). Otherwise, we are ready for the next
321 * ini_parse_value() call.
323 token_type
= g_scanner_get_next_token(state
->scanner
);
324 } else if (token_type
!= G_TOKEN_CHAR
||
325 g_scanner_cur_value(state
->scanner
).v_char
!= '}') {
326 ini_append_error_expecting(state
, state
->scanner
,
334 BT_VALUE_PUT_REF_AND_RESET(map_value
);
343 * Parses the current token (and the following ones if needed) as a
344 * value, returning it as a `bt_value *`.
347 bt_value
*ini_parse_value(struct ini_parsing_state
*state
)
349 bt_value
*value
= NULL
;
350 GTokenType token_type
= state
->scanner
->token
;
352 switch (token_type
) {
354 if (state
->scanner
->value
.v_char
== '-') {
355 /* Negative number */
356 value
= ini_parse_neg_number(state
);
357 } else if (state
->scanner
->value
.v_char
== '+') {
358 /* Unsigned integer */
359 value
= ini_parse_uint(state
);
360 } else if (state
->scanner
->value
.v_char
== '[') {
362 value
= ini_parse_array(state
);
363 } else if (state
->scanner
->value
.v_char
== '{') {
365 value
= ini_parse_map(state
);
367 ini_append_error_expecting(state
, state
->scanner
, "value");
374 /* Positive, signed integer */
375 uint64_t int_val
= state
->scanner
->value
.v_int64
;
377 if (int_val
> INT64_MAX
) {
378 g_string_append_printf(state
->ini_error
,
379 "Integer value %" PRIu64
" is outside the range of a 64-bit signed integer\n",
383 value
= bt_value_integer_signed_create_init(
390 /* Positive floating point number */
391 value
= bt_value_real_create_init(state
->scanner
->value
.v_float
);
395 value
= bt_value_string_create_init(state
->scanner
->value
.v_string
);
397 case G_TOKEN_IDENTIFIER
:
400 * Using symbols would be appropriate here, but said
401 * symbols are allowed as map key, so it's easier to
402 * consider everything an identifier.
404 * If one of the known symbols is not recognized here,
405 * then fall back to creating a string value.
407 const char *id
= state
->scanner
->value
.v_identifier
;
409 if (strcmp(id
, "null") == 0 || strcmp(id
, "NULL") == 0 ||
410 strcmp(id
, "nul") == 0) {
411 value
= bt_value_null
;
412 bt_value_get_ref(value
);
413 } else if (strcmp(id
, "true") == 0 || strcmp(id
, "TRUE") == 0 ||
414 strcmp(id
, "yes") == 0 ||
415 strcmp(id
, "YES") == 0) {
416 value
= bt_value_bool_create_init(true);
417 } else if (strcmp(id
, "false") == 0 ||
418 strcmp(id
, "FALSE") == 0 ||
419 strcmp(id
, "no") == 0 ||
420 strcmp(id
, "NO") == 0) {
421 value
= bt_value_bool_create_init(false);
423 value
= bt_value_string_create_init(id
);
428 /* Unset return value variable will trigger the error */
429 ini_append_error_expecting(state
, state
->scanner
, "value");
438 * Handles the current state of the INI parser.
440 * Returns 0 to continue, 1 to end, or a negative value on error.
443 int ini_handle_state(struct ini_parsing_state
*state
)
446 GTokenType token_type
;
447 bt_value
*value
= NULL
;
449 token_type
= g_scanner_get_next_token(state
->scanner
);
450 if (token_type
== G_TOKEN_EOF
) {
451 if (state
->expecting
!= INI_EXPECT_COMMA
) {
452 switch (state
->expecting
) {
453 case INI_EXPECT_EQUAL
:
454 ini_append_error_expecting(state
,
455 state
->scanner
, "`=`");
457 case INI_EXPECT_VALUE
:
458 ini_append_error_expecting(state
,
459 state
->scanner
, "value");
461 case INI_EXPECT_MAP_KEY
:
462 ini_append_error_expecting(state
,
463 state
->scanner
, "unquoted map key");
476 switch (state
->expecting
) {
477 case INI_EXPECT_MAP_KEY
:
478 if (token_type
!= G_TOKEN_IDENTIFIER
) {
479 ini_append_error_expecting(state
, state
->scanner
,
484 g_string_assign(state
->last_map_key
,
485 state
->scanner
->value
.v_identifier
);
487 state
->expecting
= INI_EXPECT_EQUAL
;
489 case INI_EXPECT_EQUAL
:
490 if (token_type
!= G_TOKEN_CHAR
) {
491 ini_append_error_expecting(state
,
492 state
->scanner
, "'='");
496 if (state
->scanner
->value
.v_char
!= '=') {
497 ini_append_error_expecting(state
,
498 state
->scanner
, "'='");
502 state
->expecting
= INI_EXPECT_VALUE
;
504 case INI_EXPECT_VALUE
:
506 value
= ini_parse_value(state
);
511 state
->expecting
= INI_EXPECT_COMMA
;
514 case INI_EXPECT_COMMA
:
515 if (token_type
!= G_TOKEN_CHAR
) {
516 ini_append_error_expecting(state
,
517 state
->scanner
, "','");
521 if (state
->scanner
->value
.v_char
!= ',') {
522 ini_append_error_expecting(state
,
523 state
->scanner
, "','");
527 state
->expecting
= INI_EXPECT_MAP_KEY
;
539 if (bt_value_map_insert_entry(state
->params
,
540 state
->last_map_key
->str
, value
)) {
541 /* Only override return value on error */
547 BT_VALUE_PUT_REF_AND_RESET(value
);
552 * Converts an INI-style argument to an equivalent map value object.
554 * Return value is owned by the caller.
556 bt_value
*bt_param_parse(const char *arg
, GString
*ini_error
)
558 /* Lexical scanner configuration */
559 GScannerConfig scanner_config
= {
560 /* Skip whitespaces */
561 .cset_skip_characters
= (gchar
*) " \t\n",
563 /* Identifier syntax is: [a-zA-Z_][a-zA-Z0-9_.:-]* */
564 .cset_identifier_first
= (gchar
*)
568 .cset_identifier_nth
= (gchar
*)
573 /* "hello" and "Hello" two different keys */
574 .case_sensitive
= TRUE
,
577 .cpair_comment_single
= NULL
,
578 .skip_comment_multi
= TRUE
,
579 .skip_comment_single
= TRUE
,
580 .scan_comment_multi
= FALSE
,
583 * Do scan identifiers, including 1-char identifiers,
584 * but NULL is a normal identifier.
586 .scan_identifier
= TRUE
,
587 .scan_identifier_1char
= TRUE
,
588 .scan_identifier_NULL
= FALSE
,
591 * No specific symbols: null and boolean "symbols" are
592 * scanned as plain identifiers.
594 .scan_symbols
= FALSE
,
595 .symbol_2_token
= FALSE
,
596 .scope_0_fallback
= FALSE
,
599 * Scan "0b"-, "0"-, and "0x"-prefixed integers, but not
600 * integers prefixed with "$".
606 .scan_hex_dollar
= FALSE
,
608 /* Convert scanned numbers to integer tokens */
609 .numbers_2_int
= TRUE
,
611 /* Support both integers and floating point numbers */
612 .int_2_float
= FALSE
,
614 /* Scan integers as 64-bit signed integers */
617 /* Only scan double-quoted strings */
618 .scan_string_sq
= FALSE
,
619 .scan_string_dq
= TRUE
,
621 /* Do not converter identifiers to string tokens */
622 .identifier_2_string
= FALSE
,
624 /* Scan characters as `G_TOKEN_CHAR` token */
625 .char_2_token
= FALSE
,
627 struct ini_parsing_state state
= {
630 .expecting
= INI_EXPECT_MAP_KEY
,
632 .ini_error
= ini_error
,
635 BT_ASSERT(ini_error
);
636 g_string_assign(ini_error
, "");
637 state
.params
= bt_value_map_create();
639 ini_append_oom_error(ini_error
);
643 state
.scanner
= g_scanner_new(&scanner_config
);
644 if (!state
.scanner
) {
645 ini_append_oom_error(ini_error
);
649 state
.last_map_key
= g_string_new(NULL
);
650 if (!state
.last_map_key
) {
651 ini_append_oom_error(ini_error
);
655 /* Let the scan begin */
656 g_scanner_input_text(state
.scanner
, arg
, strlen(arg
));
659 int ret
= ini_handle_state(&state
);
664 } else if (ret
> 0) {
673 BT_VALUE_PUT_REF_AND_RESET(state
.params
);
677 g_scanner_destroy(state
.scanner
);
680 if (state
.last_map_key
) {
681 g_string_free(state
.last_map_key
, TRUE
);