[deliverable/titan.core.git] / common / JSON_Tokenizer.hh

/******************************************************************************
 * Copyright (c) 2000-2016 Ericsson Telecom AB
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *   Balasko, Jeno
 *   Baranyi, Botond
 *
 ******************************************************************************/

#ifndef JSON_TOKENIZER_HH
#define	JSON_TOKENIZER_HH

#include <cstddef>

/** JSON token types */
enum json_token_t {
  JSON_TOKEN_ERROR = 0,     // not actually a token, used when get_next_token() fails
  JSON_TOKEN_NONE,          // not actually a token, used for initializing
  JSON_TOKEN_OBJECT_START,  // "{"
  JSON_TOKEN_OBJECT_END,    // "}"
  JSON_TOKEN_ARRAY_START,   // "["
  JSON_TOKEN_ARRAY_END,     // "]"
  JSON_TOKEN_NAME,          // field name (key) in a JSON object, followed by ":"
  JSON_TOKEN_NUMBER,        // JSON number value
  JSON_TOKEN_STRING,        // JSON string value
  JSON_TOKEN_LITERAL_TRUE,  // "true" value
  JSON_TOKEN_LITERAL_FALSE, // "false" value
  JSON_TOKEN_LITERAL_NULL   // "null" value
};
  
/** A class for building and processing JSON documents. Stores the document in a buffer.
  * Can build JSON documents by inserting tokens into an empty buffer.
  * Can extract tokens from an existing JSON document. */
class JSON_Tokenizer {
  
private:
  
  /** The buffer that stores the JSON document 
    * This is a buffer with exponential allocation (expstring), only uses expstring
    * memory operations from memory.h (ex.: mputstr, mputprintf) */
  char* buf_ptr;
  
  /** Number of bytes currently in the buffer */
  size_t buf_len;
  
  /** Current position in the buffer */
  size_t buf_pos;
  
  /** Current depth in the JSON document (only used if pretty printing is set */
  unsigned int depth;
  
  /** Stores the previous JSON token inserted by put_next_token() */
  json_token_t previous_token;
  
  /** Activates or deactivates pretty printing
    * If active, put_next_token() and put_separator() will add extra newlines 
    * and indenting to the JSON code to make it more readable for you humans,
    * otherwise it will be compact (no white spaces). */
  bool pretty;
  
  /** Initializes the properties of the tokenizer. 
    * The buffer is initialized with the parameter data (unless it's empty). */
  void init(const char* p_buf, const size_t p_buf_len);
  
  /** Inserts a character to the end of the buffer */
  void put_c(const char c);
  
  /** Inserts a null-terminated string to the end of the buffer */
  void put_s(const char* s);
  
  /** Indents a new line in JSON code depending on the current depth.
    * If the maximum depth is reached, the code is not indented further.
    * Used only if pretty printing is set. */
  void put_depth();
  
  /** Skips white spaces until a non-white-space character is found.
    * Returns false if the end of the buffer is reached before a non-white-space
    * character is found, otherwise returns true. */
  bool skip_white_spaces();
  
  /** Attempts to find a JSON string at the current buffer position. 
    * Returns true if a valid string is found before the end of the buffer
    * is reached, otherwise returns false. */
  bool check_for_string();
  
  /** Attempts to find a JSON number at the current buffer position.
    * For number format see http://json.org/.
    * Returns true if a valid number is found before the end of the buffer
    * is reached, otherwise returns false. */
  bool check_for_number();
  
  /** Checks if the current character in the buffer is a valid JSON separator.
    * Separators are: commas (,), colons (:) and curly and square brackets ({}[]).
    * This function also steps over the separator if it's a comma.
    * Returns true if a separator is found, otherwise returns false. */
  bool check_for_separator();
  
  /** Attempts to find a specific JSON literal at the current buffer position.
    * Returns true if the literal is found, otherwise returns false.
    * @param p_literal [in] Literal value to find */
  bool check_for_literal(const char* p_literal);
  
  /** Adds a separating comma (,) if the previous token is a value, or an object or
    * array end mark. */
  void put_separator();
  
  /** No copy constructor. Implement if needed. */
  JSON_Tokenizer(const JSON_Tokenizer&);
  
  /** No assignment operator. Implement if needed. */
  JSON_Tokenizer& operator=(const JSON_Tokenizer&);
  
public:
  /** Constructs a tokenizer with an empty buffer.
    * Use put_next_token() to build a JSON document and get_buffer()/get_buffer_length() to retrieve it */
  JSON_Tokenizer(bool p_pretty = false) : pretty(p_pretty) { init(0, 0); }
  
  /** Constructs a tokenizer with the buffer parameter.
    * Use get_next_token() to read JSON tokens and get_pos()/set_pos() to move around in the buffer */
  JSON_Tokenizer(const char* p_buf, const size_t p_buf_len) : pretty(false) { init(p_buf, p_buf_len); }
  
  /** Destructor. Frees the buffer. */
  ~JSON_Tokenizer();
  
  /** Reinitializes the tokenizer with a new buffer. */
  inline void set_buffer(const char* p_buf, const size_t p_buf_len) { init(p_buf, p_buf_len); }
  
  /** Retrieves the buffer containing the JSON document. */
  inline const char* get_buffer() { return buf_ptr; }
  
  /** Retrieves the length of the buffer containing the JSON document. */
  inline size_t get_buffer_length() { return buf_len; }
  
  /** Extracts a JSON token from the current buffer position.
    * @param p_token [out] Extracted token type, or JSON_TOKEN_ERROR if no token
    * could be extracted, or JSON_TOKEN_NONE if the buffer end is reached
    * @param p_token_str [out] A pointer to the token data (if any):
    * the name of a JSON object field (without quotes), or the string representation
    * of a JSON number, or a JSON string (with quotes and double-escaped).
    * @param p_str_len [out] The character length of the token data (if there is data)
    * @return The number of characters extracted 
    * @note The token data is not copied, *p_token_str will point to the start of the 
    * data in the tokenizer's buffer. */
  int get_next_token(json_token_t* p_token, char** p_token_str, size_t* p_str_len);
  
  /** Gets the current read position in the buffer.
    * This is where get_next_token() will read from next. */
  inline size_t get_buf_pos() { return buf_pos; }
  
  /** Sets the current read position in the buffer.
    * This is where get_next_buffer() will read from next. */
  inline void set_buf_pos(const size_t p_buf_pos) { buf_pos = p_buf_pos; }
  
  /** Adds the specified JSON token to end of the buffer. 
    * @param p_token [in] Token type
    * @param p_token_str [in] The name of a JSON object field (without quotes), or
    * the string representation of a JSON number, or a JSON string (with quotes 
    * and double-escaped). For all the other tokens this parameter will be ignored.
    * @return The number of characters added to the JSON document */
  int put_next_token(json_token_t p_token, const char* p_token_str = 0);
  
  /** Adds raw data to the end of the buffer.
    * @param p_data [in] Pointer to the beginning of the data
    * @param p_len [in] Length of the data in bytes */
  void put_raw_data(const char* p_data, size_t p_len);
  
}; // class JSON_Tokenizer

// A dummy JSON tokenizer, use when there is no actual JSON document
static JSON_Tokenizer DUMMY_BUFFER;

/** Converts a string into a JSON string by replacing all control characters
  * with JSON escape sequences, if available, or with the \uHHHH escape sequence.
  * The string is also wrapped inside a set of double quotes and all double quotes
  * and backslash characters are double-escaped.
  *
  * Returns an expstring, that needs to be freed. */
extern char* convert_to_json_string(const char* str);


#endif	/* JSON_TOKENIZER_HH */
Commit	Line	Data
d44e3c4f	1	/******************************************************************************
	2	* Copyright (c) 2000-2016 Ericsson Telecom AB
	3	* All rights reserved. This program and the accompanying materials
	4	* are made available under the terms of the Eclipse Public License v1.0
	5	* which accompanies this distribution, and is available at
	6	* http://www.eclipse.org/legal/epl-v10.html
	7	*
	8	* Contributors:
	9	* Balasko, Jeno
	10	* Baranyi, Botond
	11	*
	12	******************************************************************************/
3abe9331	13
970ed795 EL	14	#ifndef JSON_TOKENIZER_HH
	15	#define JSON_TOKENIZER_HH
	16
	17	#include <cstddef>
	18
	19	/** JSON token types */
	20	enum json_token_t {
	21	JSON_TOKEN_ERROR = 0, // not actually a token, used when get_next_token() fails
	22	JSON_TOKEN_NONE, // not actually a token, used for initializing
	23	JSON_TOKEN_OBJECT_START, // "{"
	24	JSON_TOKEN_OBJECT_END, // "}"
	25	JSON_TOKEN_ARRAY_START, // "["
	26	JSON_TOKEN_ARRAY_END, // "]"
	27	JSON_TOKEN_NAME, // field name (key) in a JSON object, followed by ":"
	28	JSON_TOKEN_NUMBER, // JSON number value
	29	JSON_TOKEN_STRING, // JSON string value
	30	JSON_TOKEN_LITERAL_TRUE, // "true" value
	31	JSON_TOKEN_LITERAL_FALSE, // "false" value
	32	JSON_TOKEN_LITERAL_NULL // "null" value
	33	};
	34
	35	/** A class for building and processing JSON documents. Stores the document in a buffer.
	36	* Can build JSON documents by inserting tokens into an empty buffer.
	37	* Can extract tokens from an existing JSON document. */
	38	class JSON_Tokenizer {
	39
	40	private:
	41
	42	/** The buffer that stores the JSON document
	43	* This is a buffer with exponential allocation (expstring), only uses expstring
	44	* memory operations from memory.h (ex.: mputstr, mputprintf) */
	45	char* buf_ptr;
	46
	47	/** Number of bytes currently in the buffer */
	48	size_t buf_len;
	49
	50	/** Current position in the buffer */
	51	size_t buf_pos;
	52
	53	/** Current depth in the JSON document (only used if pretty printing is set */
	54	unsigned int depth;
	55
	56	/** Stores the previous JSON token inserted by put_next_token() */
	57	json_token_t previous_token;
	58
	59	/** Activates or deactivates pretty printing
	60	* If active, put_next_token() and put_separator() will add extra newlines
	61	* and indenting to the JSON code to make it more readable for you humans,
	62	* otherwise it will be compact (no white spaces). */
	63	bool pretty;
	64
	65	/** Initializes the properties of the tokenizer.
	66	* The buffer is initialized with the parameter data (unless it's empty). */
	67	void init(const char* p_buf, const size_t p_buf_len);
	68
	69	/** Inserts a character to the end of the buffer */
	70	void put_c(const char c);
	71
	72	/** Inserts a null-terminated string to the end of the buffer */
	73	void put_s(const char* s);
	74
	75	/** Indents a new line in JSON code depending on the current depth.
	76	* If the maximum depth is reached, the code is not indented further.
	77	* Used only if pretty printing is set. */
78	void put_depth();
79
80	/** Skips white spaces until a non-white-space character is found.
81	* Returns false if the end of the buffer is reached before a non-white-space
82	* character is found, otherwise returns true. */
83	bool skip_white_spaces();
84
85	/** Attempts to find a JSON string at the current buffer position.
86	* Returns true if a valid string is found before the end of the buffer
87	* is reached, otherwise returns false. */
88	bool check_for_string();
89
90	/** Attempts to find a JSON number at the current buffer position.
91	* For number format see http://json.org/.
92	* Returns true if a valid number is found before the end of the buffer
93	* is reached, otherwise returns false. */
94	bool check_for_number();
95
96	/** Checks if the current character in the buffer is a valid JSON separator.
97	* Separators are: commas (,), colons (:) and curly and square brackets ({}[]).
98	* This function also steps over the separator if it's a comma.
99	* Returns true if a separator is found, otherwise returns false. */
100	bool check_for_separator();
101
102	/** Attempts to find a specific JSON literal at the current buffer position.
103	* Returns true if the literal is found, otherwise returns false.
104	* @param p_literal [in] Literal value to find */
105	bool check_for_literal(const char* p_literal);
106
107	/** Adds a separating comma (,) if the previous token is a value, or an object or
108	* array end mark. */
109	void put_separator();
110
111	/** No copy constructor. Implement if needed. */
112	JSON_Tokenizer(const JSON_Tokenizer&);
113
114	/** No assignment operator. Implement if needed. */
115	JSON_Tokenizer& operator=(const JSON_Tokenizer&);
116
117	public:
118	/** Constructs a tokenizer with an empty buffer.
119	* Use put_next_token() to build a JSON document and get_buffer()/get_buffer_length() to retrieve it */
120	JSON_Tokenizer(bool p_pretty = false) : pretty(p_pretty) { init(0, 0); }
121
122	/** Constructs a tokenizer with the buffer parameter.
123	* Use get_next_token() to read JSON tokens and get_pos()/set_pos() to move around in the buffer */
124	JSON_Tokenizer(const char* p_buf, const size_t p_buf_len) : pretty(false) { init(p_buf, p_buf_len); }
125
126	/** Destructor. Frees the buffer. */
127	~JSON_Tokenizer();
128
129	/** Reinitializes the tokenizer with a new buffer. */
130	inline void set_buffer(const char* p_buf, const size_t p_buf_len) { init(p_buf, p_buf_len); }
131
132	/** Retrieves the buffer containing the JSON document. */
133	inline const char* get_buffer() { return buf_ptr; }
134
135	/** Retrieves the length of the buffer containing the JSON document. */
136	inline size_t get_buffer_length() { return buf_len; }
137
138	/** Extracts a JSON token from the current buffer position.
139	* @param p_token [out] Extracted token type, or JSON_TOKEN_ERROR if no token
140	* could be extracted, or JSON_TOKEN_NONE if the buffer end is reached
141	* @param p_token_str [out] A pointer to the token data (if any):
142	* the name of a JSON object field (without quotes), or the string representation
143	* of a JSON number, or a JSON string (with quotes and double-escaped).
144	* @param p_str_len [out] The character length of the token data (if there is data)
145	* @return The number of characters extracted
146	* @note The token data is not copied, *p_token_str will point to the start of the
147	* data in the tokenizer's buffer. */
148	int get_next_token(json_token_t* p_token, char** p_token_str, size_t* p_str_len);
149
150	/** Gets the current read position in the buffer.
151	* This is where get_next_token() will read from next. */
152	inline size_t get_buf_pos() { return buf_pos; }
153
154	/** Sets the current read position in the buffer.
155	* This is where get_next_buffer() will read from next. */
156	inline void set_buf_pos(const size_t p_buf_pos) { buf_pos = p_buf_pos; }
157
158	/** Adds the specified JSON token to end of the buffer.
159	* @param p_token [in] Token type
160	* @param p_token_str [in] The name of a JSON object field (without quotes), or
161	* the string representation of a JSON number, or a JSON string (with quotes
162	* and double-escaped). For all the other tokens this parameter will be ignored.
163	* @return The number of characters added to the JSON document */
164	int put_next_token(json_token_t p_token, const char* p_token_str = 0);
165
3f84031e	166	/** Adds raw data to the end of the buffer.
	167	* @param p_data [in] Pointer to the beginning of the data
	168	* @param p_len [in] Length of the data in bytes */
	169	void put_raw_data(const char* p_data, size_t p_len);
	170
970ed795 EL	171	}; // class JSON_Tokenizer
	172
	173	// A dummy JSON tokenizer, use when there is no actual JSON document
	174	static JSON_Tokenizer DUMMY_BUFFER;
	175
3abe9331	176	/** Converts a string into a JSON string by replacing all control characters
	177	* with JSON escape sequences, if available, or with the \uHHHH escape sequence.
	178	* The string is also wrapped inside a set of double quotes and all double quotes
	179	* and backslash characters are double-escaped.
	180	*
	181	* Returns an expstring, that needs to be freed. */
	182	extern char* convert_to_json_string(const char* str);
	183
970ed795 EL	184
	185	#endif /* JSON_TOKENIZER_HH */
	186