27c6da5c17c9bf420b7dc4f2edfb029d57833ce9
[deliverable/titan.core.git] / core / XER.hh
1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright (c) 2000-2014 Ericsson Telecom AB
3 // All rights reserved. This program and the accompanying materials
4 // are made available under the terms of the Eclipse Public License v1.0
5 // which accompanies this distribution, and is available at
6 // http://www.eclipse.org/legal/epl-v10.html
7 ///////////////////////////////////////////////////////////////////////////////
8 #ifndef XER_HH_
9 #define XER_HH_
10
11 #include "Types.h"
12 #include "Encdec.hh"
13 #include <stddef.h> // for size_t
14 #include <string.h> // strncmp for the inline function
15
16 class XmlReaderWrap;
17
18 class Base_Type;
19 class TTCN_Module;
20
21 /** @defgroup XER XER codec
22 * @{
23 *
24 * @brief ASN.1 XML Encoding Rules, ITU-T Rec X.693 and amd1
25 */
26
27 /** XER flags for various uses.
28 *
29 * Low values specify the XML encoding variant (Basic, Canonical, Extended)
30 * Other bits have dual uses:
31 * - set in XERdescriptor_t::xer_bits, according to XML encoding attributes
32 * - passed in as additional flags in the \c flavor parameter, usually
33 * to XER_encode. These are used when encoding attributes in a parent type
34 * influence the encoding of its components (e.g. EMBED-VALUES on a record
35 * change the encoding of all components).
36 */
37 enum XER_flavor {
38 XER_BASIC = 1U << 0, /**< Basic XER with indentation */
39 XER_CANONICAL = 1U << 1, /**< Canonical XER, no indentation */
40 XER_EXTENDED = 1U << 2, /**< Extended XER */
41 DEF_NS_PRESENT = 1U << 3, // 0x08
42 DEF_NS_SQUASHED = 1U << 4, // 0x10
43 XER_MASK = 0x1FU, /**< All the "real" XER flavors plus DEF_NS */
44
45 /* Additional flags, for the parent to pass information to its children
46 * (when the parent affects the child, e.g. LIST) */
47 XER_ESCAPE_ENTITIES = 1U << 5, /**< Escape according to X.680/2002, 11.15.8,
48 used internally by UNIVERSAL_CHARSTRING. */
49 XER_RECOF = 1U << 6, /**< Generating code for the contained type
50 of a record-of/set-of. Only affects BOOLEAN, CHOICE, ENUMERATED and NULL
51 (see Table 5 in X.680 (11/2008) clause 26.5) */
52
53 /* More flags for XERdescriptor_t::xer_bits */
54 ANY_ATTRIBUTES = 1U << 7, // 0xooo80
55 ANY_ELEMENT = 1U << 8, // 0xoo100
56 XER_ATTRIBUTE = 1U << 9, // 0xoo200
57 BASE_64 = 1U << 10, // 0xoo400
58 XER_DECIMAL = 1U << 11, // 0xoo800
59 // DEFAULT-FOR-EMPTY has its own field
60 EMBED_VALUES = 1U << 12, // 0xo1000
61 /** LIST encoding instruction for record-of/set-of. */
62 XER_LIST = 1U << 13, // 0xo2000
63 // NAME is stored in the descriptor
64 // NAMESPACE is folded into the name
65 XER_TEXT = 1U << 14, // 0xo4000
66 UNTAGGED = 1U << 15, // 0xo8000
67 USE_NIL = 1U << 16, // 0x10000
68 USE_NUMBER = 1U << 17, // 0x20000
69 USE_ORDER = 1U << 18, // 0x40000
70 USE_QNAME = 1U << 19, // 0x80000
71 USE_TYPE_ATTR = 1U << 20, // 0x100000, either USE-TYPE or USE-UNION
72 HAS_1UNTAGGED = 1U << 21, // 0x200000 member, and it's character-encodable
73 // another hint to pass down to the children:
74 PARENT_CLOSED = 1U << 22, // 0x400000
75 FORM_UNQUALIFIED=1U << 23, // 0X800000 (qualified is more frequent)
76 XER_TOPLEVEL = 1U << 24, //0X1000000 (toplevel, for decoding)
77 SIMPLE_TYPE = 1U << 25, /*0X2000000 always encode on one line:
78 <foo>content</foo>, never <foo>\ncontent\n</foo> */
79 BXER_EMPTY_ELEM= 1U << 26, /*0X4000000 boolean and enum encode themselves
80 as empty elements in BXER only. This also influences them in record-of */
81 ANY_FROM = 1U << 27, // 0x8000000 anyElement from ... or anyAttributes from ...
82 ANY_EXCEPT = 1U << 28, // 0x10000000 anyElement except ... or anyAttributes except ...
83 EXIT_ON_ERROR = 1U << 29 /* 0x20000000 clean up and exit instead of throwing
84 a decoding error, used on alternatives of a union with USE-UNION */
85 };
86
87 /** WHITESPACE actions.
88 * Note that WHITESPACE_COLLAPSE includes the effect of WHITESPACE_REPLACE
89 * and the code relies on WHITESPACE_COLLAPSE having the higher value. */
90 enum XER_whitespace_action {
91 WHITESPACE_PRESERVE,
92 WHITESPACE_REPLACE,
93 WHITESPACE_COLLAPSE
94 };
95
96 /// Check that \p f has the canonical flavor.
97 inline bool is_canonical(unsigned int f)
98 {
99 return (f & XER_CANONICAL) != 0;
100 }
101
102 inline bool is_exer(unsigned int f)
103 {
104 return (f & XER_EXTENDED) != 0;
105 }
106
107 /** Is this a member of a SEQUENCE OF
108 *
109 * @param f XER flavor
110 * @return \c true if \p contains \c XER_RECOF, \c false otherwise
111 */
112 inline bool is_record_of(unsigned int f)
113 {
114 return (f & XER_RECOF) != 0;
115 }
116
117 /** Do list encoding
118 *
119 * This is now hijacked to mean "the enclosing type told us to omit our tag".
120 * Hence the check for USE-NIL too.
121 *
122 * @param f XER flavor
123 * @return \c true if \c XER_EXTENDED and either \c XER_LIST or \c USE_NIL is set.
124 */
125 inline bool is_exerlist(unsigned int f)
126 {
127 return (f & XER_EXTENDED) && ((f & (XER_LIST|USE_NIL|USE_TYPE_ATTR)) != 0);
128 }
129
130 /** Descriptor for XER encoding/decoding during runtime.
131 *
132 * This structure contains XER enc/dec information for the runtime.
133 *
134 * There is an instance of this struct for most TTCN3/ASN1 types.
135 * Because TITAN generates type aliases (typedefs) when one type references
136 * another (e.g. "type integer i1" results in "typedef INTEGER i1"),
137 * this struct holds information to distinguish them during encoding.
138 *
139 * Only those encoding instructions need to be recorded which can apply to
140 * scalar types (e.g. BOOLEAN, REAL, etc., usually implemented by classes in core/)
141 * because the same code needs to handle all varieties.
142 *
143 * - ANY-ELEMENT : UFT8String
144 * - BASE64 : OCTET STRING, open type, restricted character string
145 * - DECIMAL : REAL
146 * - NAME : anything (this is already present as \c name)
147 * - NAMESPACE : hmm
148 * - TEXT : INTEGER, enum
149 * - USE-NUMBER : enum
150 * - WHITESPACE : restricted character string
151 *
152 * ANY-ATTRIBUTE, EMBED-VALUES, LIST, USE-TYPE, USE-UNION apply to sequence/choice types;
153 * their effect will be resolved by the compiler.
154 *
155 * Instances of this type are written by the compiler into the generated code,
156 * one for each type. For a TTCN3 type foo_bar, there will be a class
157 * foo__bar and a XERdescriptor_t instance named foo__bar_xer_.
158 *
159 * Each built-in type has a descriptor (e.g. INTEGER_xer_) in the runtime.
160 *
161 * The \a name field contains the closing tag including a newline, e.g.
162 * \c "</INTEGER>\n". This allows for a more efficient output of the tags,
163 * minimizing the number of one-character inserts into the buffer.
164 *
165 * The start tag is written as an 'open angle bracket' character,
166 * followed by the \a name field without its first two characters (\c "</" ).
167 *
168 * In case of the canonical encoding (\c CXER ) there is no indenting,
169 * so the final newline is omitted by reducing the length by one.
170 *
171 * Example:
172 * @code
173 * int Foo::XER_encode(const XERdescriptor_t& p_td,
174 * TTCN_Buffer& p_buf, unsigned int flavor, int indent) const {
175 * int canon = is_canonical(flavor);
176 * if (!canon) do_indent(p_buf, indent);
177 * // output the start tag
178 * p_buf.put_c('<');
179 * p_buf.put_s((size_t)p_td.namelen-2-canon, (const unsigned char*)p_td.name+2);
180 * // this is not right if Foo has attributes :(
181 * // we'll need to reduce namelen further (or just get rid of this hackery altogether)
182 *
183 * // output actual content
184 * p_buf.put_.....
185 *
186 * // output the closing tag
187 * if (!canon) do_indent(p_buf, indent);
188 * p_buf.put_s((size_t)p_td.namelen-canon, (const unsigned char*)p_td.name);
189 * }
190 * @endcode
191 *
192 * Empty element tag:
193 *
194 * @code
195 * int Foo::XER_encode(const XERdescriptor_t& p_td,
196 * TTCN_Buffer& p_buf, unsigned int flavor, int indent) const {
197 * int canon = is_canonical(flavor);
198 * if (!canon) do_indent(p_buf, indent);
199 * // output an empty element tag
200 * p_buf.put_c('<');
201 * p_buf.put_s((size_t)p_td.namelen-4, (const unsigned char*)p_td.name+2);
202 * p_buf.put_s(3 - canon, (const unsigned char*)"/>\n");
203 * }
204 * @endcode
205 *
206 * @note We don't generate the XML prolog. This is required for Canonical XER
207 * (X.693 9.1.1) and permitted for Basic-XER (8.2.1).
208 *
209 * @note X.693 amd1 (EXER) 10.3.5 states: If an "ExtendedXMLValue" is empty,
210 * and its associated tags have not been removed by the use of an UNTAGGED
211 * encoding instruction, then the associated preceding and following tags
212 * <b>can (as an encoder's option)</b> be replaced with
213 * an XML empty-element tag (see ITU-T Rec. X.680 | ISO/IEC 8824-1, 16.8).
214 * This is called the associated empty-element tag.
215 *
216 * @note X.693 (XER) 9.1.4 states: (for Canonical XER)
217 * If the XML value notation permits the use of an XML empty-element tag
218 * (see ITU-T Rec. X.680 |ISO/IEC 8824-1, 15.5 and 16.8),
219 * then this empty-element tag @b shall be used.
220 *
221 * @note After editing XERdescriptor_t, make sure to change XER_STRUCT2 here
222 * and generate_code_xerdescriptor() in Type.cc.
223 * */
224 struct XERdescriptor_t
225 {
226 /** (closing) Tag name, including a newline.
227 * First is for basic and canonical XER, second for EXER */
228 const char *names[2];
229 /** Length of closing tag string (strlen of names[i]) */
230 const unsigned short namelens[2];
231 /** Various EXER flags */
232 const unsigned long xer_bits;
233 /** Whitespace handling */
234 const XER_whitespace_action whitespace;
235 /** value to compare for DEFAULT-FOR-EMPTY */
236 const Base_Type* dfeValue;
237 /** The module to which the type belongs. May be NULL in a descriptor
238 * for a built-in type, e.g. in INTEGER_xer_ */
239 const TTCN_Module* my_module;
240 /** Index into the module's namespace list.
241 * -1 means no namespace.
242 * >=+0 and FORM_UNQUALIFIED means that there IS a namespace,
243 * it just doesn't show up in the XML (but libxml2 will return it). */
244 const int ns_index;
245
246 /** Number of namespace URIs*/
247 const unsigned short nof_ns_uris;
248
249 /** List of namespace URIs
250 * In case of "anyElement" variants this list contains the valid ("anyElement from ...")
251 * or invalid ("anyElement except ...") namespace URIs.
252 * The unqualified namespace is marked by an empty string ("").*/
253 const char** ns_uris;
254 };
255
256 /** Check the name of an XML node against a XER type descriptor.
257 *
258 * @param name the (local, unqualified) name of the XML element
259 * @param p_td the type descriptor
260 * @param exer \c true if Extended XER decoding, \c false for Basic and Canonical XER
261 * @return \c true if \p name corresponds to the type descriptor, \c false otherwise.
262 */
263 inline bool check_name(const char *name, const XERdescriptor_t& p_td, int exer)
264 {
265 return strncmp(name, p_td.names[exer], p_td.namelens[exer]-2) == 0
266 && name[p_td.namelens[exer]-2] == '\0';
267 }
268
269 /** Verify the namespace of an XML node against a XER type descriptor.
270 *
271 * @pre EXER decoding is in progress
272 *
273 * @param ns_uri the URI of the current node
274 * @param p_td the type descriptor
275 * @return \c true if \p ns_uri is NULL and the type has no namespace
276 * or it's the default namespace.
277 * @return \c true if \p ns_uri is not NULL and it matches the one referenced
278 * by \p p_td.
279 * @return \c false otherwise.
280 */
281 bool check_namespace(const char *ns_uri, const XERdescriptor_t& p_td);
282
283 /** Check that the current element matches the XER descriptor
284 *
285 * Calls TTCN_EncDec_ErrorContext::error() if it doesn't.
286 *
287 * @param reader XML reader
288 * @param p_td XER descriptor
289 * @param exer 0 for Basic/Canonical XER, 1 for EXER
290 * @return the name of the current element
291 */
292 const char* verify_name(XmlReaderWrap& reader, const XERdescriptor_t& p_td, int exer);
293
294 /** Check the end tag
295 *
296 * Calls verify_name(), then compares \a depth with the current XML depth
297 * and calls TTCN_EncDec_ErrorContext::error() if they don't match.
298 *
299 * @param reader XML reader
300 * @param p_td XER descriptor
301 * @param depth XML tag depth (0 for top-level element)
302 * @param exer 0 for Basic/Canonical XER, 1 for EXER
303 */
304 void verify_end(XmlReaderWrap& reader, const XERdescriptor_t& p_td, const int depth, int exer);
305
306 class TTCN_Buffer;
307
308 /** Output the namespace prefix
309 *
310 * The namespace prefix is determined by the XER descriptor (@a my_module
311 * and @a ns_index fields). It is not written if p_td.xer_bits has
312 * FORM_UNQUALIFIED.
313 *
314 * @param p_td XER descriptor
315 * @param p_buf buffer to write into
316 *
317 * @pre the caller should check that E-XER encoding is in effect.
318 */
319 void write_ns_prefix(const XERdescriptor_t& p_td, TTCN_Buffer& p_buf);
320
321 /** Output the beginning of an XML attribute.
322 *
323 * Writes a space, the attribute name (from \p p_td), and the string "='".
324 * @post the buffer is ready to receive the actual value
325 *
326 * @param p_td XER descriptor (contains the attribute name)
327 * @param p_buf buffer to write into
328 */
329 inline void begin_attribute(const XERdescriptor_t& p_td, TTCN_Buffer& p_buf)
330 {
331 p_buf.put_c(' ');
332 write_ns_prefix(p_td, p_buf);
333 p_buf.put_s((size_t)p_td.namelens[1]-2, (const unsigned char*)p_td.names[1]);
334 p_buf.put_s((size_t)2, (const unsigned char*)"='");
335 }
336
337 /** Indent.
338 *
339 * @param buf buffer to write into.
340 * @param level indent level
341 *
342 * Writes the appropriate amount of indentation into \p buf.
343 *
344 * Indentation is currently done with with tabs.
345 * */
346 int do_indent(TTCN_Buffer& buf, int level);
347
348 /** Ensures that the anyElement or anyAttribute field respects its namespace
349 * restrictions.
350 * In case of "anyElement from ..." or "anyAttributes from ..." the namespace
351 * needs to be in the specified list.
352 * In case of "anyElement except ..." or "anyAttributes except ..." it cannot
353 * match any of the namespaces from the list.
354 * An invalid namespace causes a dynamic test case error.
355 *
356 * @param p_td type descriptor of the field in question, contains the list of
357 * valid or invalid namespaces
358 * @param p_xmlns constains the namespace in question
359 */
360 void check_namespace_restrictions(const XERdescriptor_t& p_td, const char* p_xmlns);
361
362
363 #ifdef DEFINE_XER_STRUCT
364 # define XER_STRUCT2(type_name,xmlname) \
365 extern const XERdescriptor_t type_name##_xer_ = { \
366 { xmlname ">\n", xmlname ">\n" }, \
367 { 2+sizeof(xmlname)-1, 2+sizeof(xmlname)-1 }, \
368 0UL, WHITESPACE_PRESERVE, NULL, NULL, 0, 0, NULL }
369 // The compiler should fold the two identical strings into one
370
371 # define XER_STRUCT_COPY(cpy,original) \
372 const XERdescriptor_t& cpy##_xer_ = original##_xer_
373 #else
374 /** Declare a XER structure.
375 * @param type_name the name of a Titan runtime class
376 * @param xmlname the XML tag name
377 */
378 # define XER_STRUCT2(type_name,xmlname) extern const XERdescriptor_t type_name##_xer_
379 # define XER_STRUCT_COPY(cpy,original) extern const XERdescriptor_t& cpy##_xer_
380 #endif
381
382 /** Declare a XER structure where the name of the type matches the tag */
383 # define XER_STRUCT(name) XER_STRUCT2(name, #name)
384
385 /* XER descriptors for built-in types.
386 * The XML tag names are defined in Table 4, referenced by clause
387 * 11.25.2 (X.680/2002) or 12.36.2 (X.680/2008) */
388
389 // Types shared between ASN.1 and TTCN-3
390 XER_STRUCT2(BITSTRING, "BIT_STRING");
391 XER_STRUCT (BOOLEAN);
392 XER_STRUCT (CHARSTRING);
393 XER_STRUCT2(FLOAT, "REAL");
394 XER_STRUCT (INTEGER);
395 XER_STRUCT2(OBJID, "OBJECT_IDENTIFIER");
396 XER_STRUCT2(OCTETSTRING, "OCTET_STRING");
397 XER_STRUCT (UNIVERSAL_CHARSTRING);
398
399 XER_STRUCT(RELATIVE_OID);
400
401 // ASN.1 types
402
403 XER_STRUCT2(EMBEDDED_PDV, "SEQUENCE");
404 XER_STRUCT2(EMBEDDED_PDV_identification, "identification");
405 XER_STRUCT2(EMBEDDED_PDV_identification_sxs, "syntaxes");
406 XER_STRUCT2(EMBEDDED_PDV_identification_sxs_abs, "abstract");
407 XER_STRUCT2(EMBEDDED_PDV_identification_sxs_xfr, "transfer");
408 XER_STRUCT2(EMBEDDED_PDV_identification_sx , "syntax");
409 XER_STRUCT2(EMBEDDED_PDV_identification_pci, "presentation-context-id");
410 XER_STRUCT2(EMBEDDED_PDV_identification_cn , "context-negotiation");
411 XER_STRUCT2(EMBEDDED_PDV_identification_cn_pci , "presentation-context-id");
412 XER_STRUCT2(EMBEDDED_PDV_identification_cn_tsx , "transfer-syntax");
413 XER_STRUCT2(EMBEDDED_PDV_identification_ts , "transfer-syntax");
414 XER_STRUCT2(EMBEDDED_PDV_identification_fix, "fixed");
415 XER_STRUCT2(EMBEDDED_PDV_data_value_descriptor, "data-value-descriptor");
416 XER_STRUCT2(EMBEDDED_PDV_data_value, "data-value");
417
418
419 XER_STRUCT2(EXTERNAL, "SEQUENCE");
420 XER_STRUCT2(EXTERNAL_direct_reference , "direct-reference");
421 XER_STRUCT2(EXTERNAL_indirect_reference, "indirect-reference");
422 XER_STRUCT2(EXTERNAL_data_value_descriptor, "data-value-descriptor");
423 XER_STRUCT2(EXTERNAL_encoding, "encoding");
424 XER_STRUCT2(EXTERNAL_encoding_singleASN , "single-ASN1-type");
425 XER_STRUCT2(EXTERNAL_encoding_octet_aligned, "octet-aligned");
426 XER_STRUCT2(EXTERNAL_encoding_arbitrary , "arbitrary");
427
428 // The big, scary ASN.1 unrestricted character string
429 XER_STRUCT2(CHARACTER_STRING, "SEQUENCE");
430 XER_STRUCT_COPY(CHARACTER_STRING_identification, EMBEDDED_PDV_identification);
431 XER_STRUCT_COPY(CHARACTER_STRING_identification_sxs, EMBEDDED_PDV_identification_sxs);
432 XER_STRUCT_COPY(CHARACTER_STRING_identification_sxs_abs, EMBEDDED_PDV_identification_sxs_abs);
433 XER_STRUCT_COPY(CHARACTER_STRING_identification_sxs_xfr, EMBEDDED_PDV_identification_sxs_xfr);
434 XER_STRUCT_COPY(CHARACTER_STRING_identification_sx , EMBEDDED_PDV_identification_sx);
435 XER_STRUCT_COPY(CHARACTER_STRING_identification_pci, EMBEDDED_PDV_identification_pci);
436 XER_STRUCT_COPY(CHARACTER_STRING_identification_cn , EMBEDDED_PDV_identification_cn);
437 XER_STRUCT_COPY(CHARACTER_STRING_identification_cn_pci , EMBEDDED_PDV_identification_cn_pci);
438 XER_STRUCT_COPY(CHARACTER_STRING_identification_cn_tsx , EMBEDDED_PDV_identification_cn_tsx);
439 XER_STRUCT_COPY(CHARACTER_STRING_identification_ts , EMBEDDED_PDV_identification_ts);
440 XER_STRUCT_COPY(CHARACTER_STRING_identification_fix, EMBEDDED_PDV_identification_fix);
441 // this one is used for decoding only (only to check that it's absent)
442 XER_STRUCT2(CHARACTER_STRING_data_value_descriptor, "data-value-descriptor");
443 // this can not be folded with EMBEDDED-PDV
444 XER_STRUCT2(CHARACTER_STRING_data_value, "string-value");
445
446 // ASN.1 restricted character strings
447 XER_STRUCT(GeneralString);
448 XER_STRUCT(NumericString);
449 XER_STRUCT(UTF8String);
450 XER_STRUCT(PrintableString);
451 XER_STRUCT(UniversalString);
452
453 XER_STRUCT(BMPString);
454 XER_STRUCT(GraphicString);
455 XER_STRUCT(IA5String);
456 XER_STRUCT(TeletexString);
457 XER_STRUCT(VideotexString);
458 XER_STRUCT(VisibleString);
459
460 XER_STRUCT2(ASN_NULL, "NULL");
461 XER_STRUCT2(ASN_ROID, "RELATIVE_OID");
462 XER_STRUCT (ASN_ANY); // obsoleted by 2002
463
464 // TTCN-3 types
465 XER_STRUCT2(HEXSTRING, "hexstring");
466 XER_STRUCT2(VERDICTTYPE, "verdicttype");
467
468 /** @} */
469
470 #endif /*XER_HH_*/
This page took 0.135715 seconds and 4 git commands to generate.