Commit | Line | Data |
---|---|---|
ddfa8903 PP |
1 | # The MIT License (MIT) |
2 | # | |
3 | # Copyright (c) 2020 Philippe Proulx <pproulx@efficios.com> | |
4 | # | |
5 | # Permission is hereby granted, free of charge, to any person obtaining | |
6 | # a copy of this software and associated documentation files (the | |
7 | # "Software"), to deal in the Software without restriction, including | |
8 | # without limitation the rights to use, copy, modify, merge, publish, | |
9 | # distribute, sublicense, and/or sell copies of the Software, and to | |
10 | # permit persons to whom the Software is furnished to do so, subject to | |
11 | # the following conditions: | |
12 | # | |
13 | # The above copyright notice and this permission notice shall be | |
14 | # included in all copies or substantial portions of the Software. | |
15 | # | |
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
20 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
23 | ||
24 | import re | |
2d55dc7d PP |
25 | import typing |
26 | from typing import Optional, List, Iterable | |
27 | from barectf.typing import Index, _OptStr | |
ddfa8903 PP |
28 | |
29 | ||
2d55dc7d PP |
30 | __all__ = ['OptDescr', '_OptItem', '_NonOptItem', '_Error', 'parse', 'OrigArgs'] |
31 | ||
32 | ||
33 | # types | |
34 | OrigArgs = List[str] | |
ddfa8903 PP |
35 | |
36 | ||
37 | # Option descriptor. | |
38 | class OptDescr: | |
39 | # Builds an option descriptor having the short name `short_name` | |
40 | # (without the leading `-`) and/or the long name `long_name` | |
41 | # (without the leading `--`). | |
42 | # | |
43 | # If `has_arg` is `True`, then it is expected that such an option | |
44 | # has an argument. | |
2d55dc7d PP |
45 | def __init__(self, short_name: _OptStr = None, long_name: _OptStr = None, |
46 | has_arg: bool = False): | |
ddfa8903 PP |
47 | assert short_name is not None or long_name is not None |
48 | self._short_name = short_name | |
49 | self._long_name = long_name | |
50 | self._has_arg = has_arg | |
51 | ||
52 | @property | |
2d55dc7d | 53 | def short_name(self) -> _OptStr: |
ddfa8903 PP |
54 | return self._short_name |
55 | ||
56 | @property | |
2d55dc7d | 57 | def long_name(self) -> _OptStr: |
ddfa8903 PP |
58 | return self._long_name |
59 | ||
60 | @property | |
2d55dc7d | 61 | def has_arg(self) -> Optional[bool]: |
ddfa8903 PP |
62 | return self._has_arg |
63 | ||
64 | ||
65 | class _Item: | |
66 | pass | |
67 | ||
68 | ||
69 | # Parsed option argument item. | |
70 | class _OptItem(_Item): | |
2d55dc7d | 71 | def __init__(self, descr: OptDescr, arg_text: _OptStr = None): |
ddfa8903 PP |
72 | self._descr = descr |
73 | self._arg_text = arg_text | |
74 | ||
75 | @property | |
2d55dc7d | 76 | def descr(self) -> OptDescr: |
ddfa8903 PP |
77 | return self._descr |
78 | ||
79 | @property | |
2d55dc7d | 80 | def arg_text(self) -> _OptStr: |
ddfa8903 PP |
81 | return self._arg_text |
82 | ||
83 | ||
84 | # Parsed non-option argument item. | |
85 | class _NonOptItem(_Item): | |
2d55dc7d | 86 | def __init__(self, text: str, orig_arg_index: Index, non_opt_index: Index): |
ddfa8903 PP |
87 | self._text = text |
88 | self._orig_arg_index = orig_arg_index | |
89 | self._non_opt_index = non_opt_index | |
90 | ||
91 | @property | |
2d55dc7d | 92 | def text(self) -> str: |
ddfa8903 PP |
93 | return self._text |
94 | ||
95 | @property | |
2d55dc7d | 96 | def orig_arg_index(self) -> Index: |
ddfa8903 PP |
97 | return self._orig_arg_index |
98 | ||
99 | @property | |
2d55dc7d | 100 | def non_opt_index(self) -> Index: |
ddfa8903 PP |
101 | return self._non_opt_index |
102 | ||
103 | ||
104 | # Results of parse(). | |
105 | class _ParseRes: | |
2d55dc7d PP |
106 | def __init__(self, items: List[_Item], ingested_orig_args: OrigArgs, |
107 | remaining_orig_args: OrigArgs): | |
ddfa8903 PP |
108 | self._items = items |
109 | self._ingested_orig_args = ingested_orig_args | |
110 | self._remaining_orig_args = remaining_orig_args | |
111 | ||
112 | @property | |
2d55dc7d | 113 | def items(self) -> List[_Item]: |
ddfa8903 PP |
114 | return self._items |
115 | ||
116 | @property | |
2d55dc7d | 117 | def ingested_orig_args(self) -> OrigArgs: |
ddfa8903 PP |
118 | return self._ingested_orig_args |
119 | ||
120 | @property | |
2d55dc7d | 121 | def remaining_orig_args(self) -> OrigArgs: |
ddfa8903 PP |
122 | return self._remaining_orig_args |
123 | ||
124 | ||
125 | # Parsing error. | |
126 | class _Error(Exception): | |
2d55dc7d | 127 | def __init__(self, orig_arg_index: Index, orig_arg: str, msg: str): |
ddfa8903 PP |
128 | super().__init__(msg) |
129 | self._orig_arg_index = orig_arg_index | |
130 | self._orig_arg = orig_arg | |
131 | self._msg = msg | |
132 | ||
133 | @property | |
2d55dc7d | 134 | def orig_arg_index(self) -> Index: |
ddfa8903 PP |
135 | return self._orig_arg_index |
136 | ||
137 | @property | |
2d55dc7d | 138 | def orig_arg(self) -> str: |
ddfa8903 PP |
139 | return self._orig_arg |
140 | ||
141 | @property | |
2d55dc7d | 142 | def msg(self) -> str: |
ddfa8903 PP |
143 | return self._msg |
144 | ||
145 | ||
146 | # Results of parse_short_opts() and parse_long_opt(); internal. | |
2d55dc7d PP |
147 | class _OptParseRes(typing.NamedTuple): |
148 | items: List[_Item] | |
149 | orig_arg_index_incr: int | |
ddfa8903 PP |
150 | |
151 | ||
152 | # Parses the original arguments `orig_args` (list of strings), | |
153 | # considering the option descriptors `opt_descrs` (set of `OptDescr` | |
154 | # objects), and returns a corresponding `_ParseRes` object. | |
155 | # | |
156 | # This function considers ALL the elements of `orig_args`, including the | |
157 | # first one, so that you would typically pass `sys.argv[1:]` to exclude | |
158 | # the program/script name. | |
159 | # | |
160 | # This argument parser supports: | |
161 | # | |
162 | # * Short options without an argument, possibly tied together: | |
163 | # | |
164 | # -f -auf -n | |
165 | # | |
166 | # * Short options with arguments: | |
167 | # | |
168 | # -b 45 -f/mein/file -xyzhello | |
169 | # | |
170 | # * Long options without an argument: | |
171 | # | |
172 | # --five-guys --burger-king --pizza-hut --subway | |
173 | # | |
174 | # * Long options with arguments: | |
175 | # | |
176 | # --security enable --time=18.56 | |
177 | # | |
178 | # * Non-option arguments (anything else). | |
179 | # | |
180 | # This function does NOT accept `--` as an original argument; while it | |
181 | # means "end of options" for many command-line tools, this function is | |
182 | # all about keeping the order of the arguments, so it doesn't mean much | |
183 | # to put them at the end. This has the side effect that a non-option | |
184 | # argument cannot have the form of an option, for example if you need to | |
185 | # pass the exact relative path `--lentil-soup`. In that case, you would | |
186 | # need to pass `./--lentil-soup`. | |
187 | # | |
188 | # This function accepts duplicate options (the resulting list of items | |
189 | # contains one entry for each instance). | |
190 | # | |
191 | # On success, this function returns a `_ParseRes` object which contains | |
192 | # a list of items as its `items` property. Each item is either an | |
193 | # option item or a non-option item. | |
194 | # | |
195 | # The returned list contains the items in the same order that the | |
196 | # original arguments `orig_args` were parsed, including non-option | |
197 | # arguments. This means, for example, that for | |
198 | # | |
199 | # --hello --meow=23 /path/to/file -b | |
200 | # | |
201 | # the function creates a list of four items: two options, one | |
202 | # non-option, and one option. | |
203 | # | |
204 | # In the returned object, `ingested_orig_args` is the list of ingested | |
205 | # original arguments to produce the resulting items, while `remaining_orig_args` | |
206 | # is the list of remaining original arguments (not parsed because an | |
207 | # unknown option was found and `fail_on_unknown_opt` was `False`). | |
208 | # | |
209 | # For example, with | |
210 | # | |
211 | # --great --white contact nuance --shark nuclear | |
212 | # | |
213 | # if `--shark` is not described within `opt_descrs` and | |
214 | # `fail_on_unknown_opt` is `False`, then `ingested_orig_args` contains | |
215 | # `--great`, `--white`, `contact`, and `nuance` (two options, two | |
216 | # non-options), whereas `remaining_orig_args` contains `--shark` and | |
217 | # `nuclear`. | |
218 | # | |
219 | # This makes it possible to know where a command name is, for example. | |
220 | # With those arguments: | |
221 | # | |
222 | # --verbose --stuff=23 do-something --specific-opt -f -b | |
223 | # | |
224 | # and the option descriptors for `--verbose` and `--stuff` only, the | |
225 | # function returns the `--verbose` and `--stuff` option items, the | |
226 | # `do-something` non-option item, three ingested original arguments, and | |
227 | # three remaining original arguments. This means you can start the next | |
228 | # argument parsing stage, with option descriptors depending on the | |
229 | # command name, with the remaining original arguments. | |
230 | # | |
231 | # Note that `len(ingested_orig_args)` is NOT always equal to the number | |
232 | # of returned items, as | |
233 | # | |
234 | # --hello -fdw | |
235 | # | |
236 | # for example contains two ingested original arguments, but four | |
237 | # resulting option items. | |
238 | # | |
239 | # On failure, this function raises an `_Error` object. | |
2d55dc7d PP |
240 | def parse(orig_args: OrigArgs, opt_descrs: Iterable[OptDescr], |
241 | fail_on_unknown_opt: bool = True) -> _ParseRes: | |
ddfa8903 PP |
242 | # Finds and returns an option description amongst `opt_descrs` |
243 | # having the short option name `short_name` OR the long option name | |
244 | # `long_name` (not both). | |
2d55dc7d PP |
245 | def find_opt_descr(short_name: _OptStr = None, |
246 | long_name: _OptStr = None) -> Optional[OptDescr]: | |
ddfa8903 PP |
247 | for opt_descr in opt_descrs: |
248 | if short_name is not None and short_name == opt_descr.short_name: | |
249 | return opt_descr | |
250 | ||
251 | if long_name is not None and long_name == opt_descr.long_name: | |
252 | return opt_descr | |
253 | ||
2d55dc7d PP |
254 | return None |
255 | ||
ddfa8903 PP |
256 | # Parses a short option original argument, returning an |
257 | # `_OptParseRes` object. | |
258 | # | |
259 | # `orig_arg` can contain more than one short options, for example: | |
260 | # | |
261 | # -xzv | |
262 | # | |
263 | # Moreover, `orig_arg` can contain the argument of a short option, | |
264 | # for example: | |
265 | # | |
266 | # -xzvflol.mp3 | |
267 | # | |
268 | # (`lol.mp3` is the argument of short option `-f`). | |
269 | # | |
270 | # If this function expects an argument for the last short option of | |
271 | # `orig_arg`, then it must be `next_orig_arg`, for example: | |
272 | # | |
273 | # -xzvf lol.mp3 | |
274 | # | |
275 | # If any of the short options of `orig_arg` is unknown, then this | |
276 | # function raises an error if `fail_on_unknown_opt` is `True`, or | |
277 | # returns `None` otherwise. | |
2d55dc7d | 278 | def parse_short_opts() -> Optional[_OptParseRes]: |
ddfa8903 | 279 | short_opts = orig_arg[1:] |
2d55dc7d | 280 | items: List[_Item] = [] |
ddfa8903 PP |
281 | done = False |
282 | index = 0 | |
283 | orig_arg_index_incr = 1 | |
284 | ||
285 | while not done: | |
286 | short_opt = short_opts[index] | |
287 | opt_descr = find_opt_descr(short_name=short_opt) | |
288 | ||
289 | if opt_descr is None: | |
290 | # unknown option | |
291 | if fail_on_unknown_opt: | |
292 | raise _Error(orig_arg_index, orig_arg, f'Unknown short option `-{short_opt}`') | |
293 | ||
294 | # discard collected arguments | |
2d55dc7d | 295 | return None |
ddfa8903 PP |
296 | |
297 | opt_arg = None | |
298 | ||
299 | if opt_descr.has_arg: | |
300 | if index == len(short_opts) - 1: | |
301 | # last short option: use the next original argument | |
302 | if next_orig_arg is None: | |
303 | raise _Error(orig_arg_index, orig_arg, | |
304 | f'Expecting an argument for short option `-{short_opt}`') | |
305 | ||
306 | opt_arg = next_orig_arg | |
307 | orig_arg_index_incr += 1 | |
308 | else: | |
309 | # use remaining original argument's text | |
310 | opt_arg = short_opts[index + 1:] | |
311 | ||
312 | done = True | |
313 | ||
314 | items.append(_OptItem(opt_descr, opt_arg)) | |
315 | index += 1 | |
316 | ||
317 | if index == len(short_opts): | |
318 | done = True | |
319 | ||
320 | return _OptParseRes(items, orig_arg_index_incr) | |
321 | ||
322 | # Parses a long option original argument, returning an | |
323 | # `_OptParseRes` object. | |
324 | # | |
325 | # `orig_arg` can contain a single long option, for example: | |
326 | # | |
327 | # --header-dir | |
328 | # | |
329 | # Moreover, `orig_arg` can contain the long option's argument, for | |
330 | # example: | |
331 | # | |
332 | # --header-dir=/path/to/dir | |
333 | # | |
334 | # If this function expects an argument for the long option, then it | |
335 | # must be `next_orig_arg`, for example: | |
336 | # | |
337 | # --header-dir /path/to/dir | |
338 | # | |
339 | # If the long option is unknown, then this function raises an error | |
340 | # if `fail_on_unknown_opt` is `True`, or returns `None` otherwise. | |
2d55dc7d | 341 | def parse_long_opt() -> Optional[_OptParseRes]: |
ddfa8903 PP |
342 | long_opt = orig_arg[2:] |
343 | m = re.match(r'--([^=]+)=(.*)', orig_arg) | |
344 | ||
345 | if m: | |
346 | # `--long-opt=arg` form: isolate option name | |
347 | long_opt = m.group(1) | |
348 | ||
349 | opt_descr = find_opt_descr(long_name=long_opt) | |
350 | ||
351 | if opt_descr is None: | |
352 | # unknown option | |
353 | if fail_on_unknown_opt: | |
354 | raise _Error(orig_arg_index, orig_arg, f'Unknown long option `--{long_opt}`') | |
355 | ||
356 | # discard | |
2d55dc7d | 357 | return None |
ddfa8903 PP |
358 | |
359 | orig_arg_index_incr = 1 | |
360 | ||
361 | if opt_descr.has_arg: | |
362 | if m: | |
363 | item = _OptItem(opt_descr, m.group(2)) | |
364 | else: | |
365 | if next_orig_arg is None: | |
366 | raise _Error(orig_arg_index, orig_arg, | |
367 | f'Expecting an argument for long option `--{long_opt}`') | |
368 | ||
369 | item = _OptItem(opt_descr, next_orig_arg) | |
370 | orig_arg_index_incr += 1 | |
371 | else: | |
372 | # no option argument | |
373 | item = _OptItem(opt_descr, None) | |
374 | ||
375 | return _OptParseRes([item], orig_arg_index_incr) | |
376 | ||
377 | # parse original arguments | |
2d55dc7d PP |
378 | items: List[_Item] = [] |
379 | orig_arg_index = Index(0) | |
380 | non_opt_index = Index(0) | |
ddfa8903 PP |
381 | |
382 | while orig_arg_index < len(orig_args): | |
383 | orig_arg = orig_args[orig_arg_index] | |
384 | ||
385 | # keep next original argument, if any | |
386 | next_orig_arg = None | |
387 | ||
388 | if orig_arg_index < len(orig_args) - 1: | |
389 | next_orig_arg = orig_args[orig_arg_index + 1] | |
390 | ||
391 | if orig_arg.startswith('-') and len(orig_arg) >= 2: | |
392 | # option | |
393 | if orig_arg[1] == '-': | |
394 | if orig_arg == '--': | |
2d55dc7d | 395 | raise _Error(orig_arg_index, orig_arg, 'Invalid `--` argument') |
ddfa8903 PP |
396 | |
397 | # long option | |
398 | res = parse_long_opt() | |
399 | else: | |
400 | # short option(s) | |
401 | res = parse_short_opts() | |
402 | ||
403 | if res is None: | |
404 | # unknown option | |
405 | assert not fail_on_unknown_opt | |
406 | return _ParseRes(items, orig_args[:orig_arg_index], orig_args[orig_arg_index:]) | |
407 | ||
408 | items += res.items | |
2d55dc7d | 409 | orig_arg_index = Index(orig_arg_index + res.orig_arg_index_incr) |
ddfa8903 PP |
410 | else: |
411 | # non-option | |
412 | items.append(_NonOptItem(orig_arg, orig_arg_index, non_opt_index)) | |
2d55dc7d PP |
413 | non_opt_index = Index(non_opt_index + 1) |
414 | orig_arg_index = Index(orig_arg_index + 1) | |
ddfa8903 PP |
415 | |
416 | return _ParseRes(items, orig_args, []) |