Commit | Line | Data |
---|---|---|
ddfa8903 PP |
1 | # The MIT License (MIT) |
2 | # | |
3 | # Copyright (c) 2020 Philippe Proulx <pproulx@efficios.com> | |
4 | # | |
5 | # Permission is hereby granted, free of charge, to any person obtaining | |
6 | # a copy of this software and associated documentation files (the | |
7 | # "Software"), to deal in the Software without restriction, including | |
8 | # without limitation the rights to use, copy, modify, merge, publish, | |
9 | # distribute, sublicense, and/or sell copies of the Software, and to | |
10 | # permit persons to whom the Software is furnished to do so, subject to | |
11 | # the following conditions: | |
12 | # | |
13 | # The above copyright notice and this permission notice shall be | |
14 | # included in all copies or substantial portions of the Software. | |
15 | # | |
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
20 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
23 | ||
24 | import re | |
25 | import collections | |
26 | ||
27 | ||
28 | __all__ = ['OptDescr', '_OptItem', '_NonOptItem', '_Error', 'parse'] | |
29 | ||
30 | ||
31 | # Option descriptor. | |
32 | class OptDescr: | |
33 | # Builds an option descriptor having the short name `short_name` | |
34 | # (without the leading `-`) and/or the long name `long_name` | |
35 | # (without the leading `--`). | |
36 | # | |
37 | # If `has_arg` is `True`, then it is expected that such an option | |
38 | # has an argument. | |
39 | def __init__(self, short_name=None, long_name=None, has_arg=False): | |
40 | assert short_name is not None or long_name is not None | |
41 | self._short_name = short_name | |
42 | self._long_name = long_name | |
43 | self._has_arg = has_arg | |
44 | ||
45 | @property | |
46 | def short_name(self): | |
47 | return self._short_name | |
48 | ||
49 | @property | |
50 | def long_name(self): | |
51 | return self._long_name | |
52 | ||
53 | @property | |
54 | def has_arg(self): | |
55 | return self._has_arg | |
56 | ||
57 | ||
58 | class _Item: | |
59 | pass | |
60 | ||
61 | ||
62 | # Parsed option argument item. | |
63 | class _OptItem(_Item): | |
64 | def __init__(self, descr, arg_text=None): | |
65 | self._descr = descr | |
66 | self._arg_text = arg_text | |
67 | ||
68 | @property | |
69 | def descr(self): | |
70 | return self._descr | |
71 | ||
72 | @property | |
73 | def arg_text(self): | |
74 | return self._arg_text | |
75 | ||
76 | ||
77 | # Parsed non-option argument item. | |
78 | class _NonOptItem(_Item): | |
79 | def __init__(self, text, orig_arg_index, non_opt_index): | |
80 | self._text = text | |
81 | self._orig_arg_index = orig_arg_index | |
82 | self._non_opt_index = non_opt_index | |
83 | ||
84 | @property | |
85 | def text(self): | |
86 | return self._text | |
87 | ||
88 | @property | |
89 | def orig_arg_index(self): | |
90 | return self._orig_arg_index | |
91 | ||
92 | @property | |
93 | def non_opt_index(self): | |
94 | return self._non_opt_index | |
95 | ||
96 | ||
97 | # Results of parse(). | |
98 | class _ParseRes: | |
99 | def __init__(self, items, ingested_orig_args, remaining_orig_args): | |
100 | self._items = items | |
101 | self._ingested_orig_args = ingested_orig_args | |
102 | self._remaining_orig_args = remaining_orig_args | |
103 | ||
104 | @property | |
105 | def items(self): | |
106 | return self._items | |
107 | ||
108 | @property | |
109 | def ingested_orig_args(self): | |
110 | return self._ingested_orig_args | |
111 | ||
112 | @property | |
113 | def remaining_orig_args(self): | |
114 | return self._remaining_orig_args | |
115 | ||
116 | ||
117 | # Parsing error. | |
118 | class _Error(Exception): | |
119 | def __init__(self, orig_arg_index, orig_arg, msg): | |
120 | super().__init__(msg) | |
121 | self._orig_arg_index = orig_arg_index | |
122 | self._orig_arg = orig_arg | |
123 | self._msg = msg | |
124 | ||
125 | @property | |
126 | def orig_arg_index(self): | |
127 | return self._orig_arg_index | |
128 | ||
129 | @property | |
130 | def orig_arg(self): | |
131 | return self._orig_arg | |
132 | ||
133 | @property | |
134 | def msg(self): | |
135 | return self._msg | |
136 | ||
137 | ||
138 | # Results of parse_short_opts() and parse_long_opt(); internal. | |
139 | _OptParseRes = collections.namedtuple('_OptParseRes', ['items', 'orig_arg_index_incr']) | |
140 | ||
141 | ||
142 | # Parses the original arguments `orig_args` (list of strings), | |
143 | # considering the option descriptors `opt_descrs` (set of `OptDescr` | |
144 | # objects), and returns a corresponding `_ParseRes` object. | |
145 | # | |
146 | # This function considers ALL the elements of `orig_args`, including the | |
147 | # first one, so that you would typically pass `sys.argv[1:]` to exclude | |
148 | # the program/script name. | |
149 | # | |
150 | # This argument parser supports: | |
151 | # | |
152 | # * Short options without an argument, possibly tied together: | |
153 | # | |
154 | # -f -auf -n | |
155 | # | |
156 | # * Short options with arguments: | |
157 | # | |
158 | # -b 45 -f/mein/file -xyzhello | |
159 | # | |
160 | # * Long options without an argument: | |
161 | # | |
162 | # --five-guys --burger-king --pizza-hut --subway | |
163 | # | |
164 | # * Long options with arguments: | |
165 | # | |
166 | # --security enable --time=18.56 | |
167 | # | |
168 | # * Non-option arguments (anything else). | |
169 | # | |
170 | # This function does NOT accept `--` as an original argument; while it | |
171 | # means "end of options" for many command-line tools, this function is | |
172 | # all about keeping the order of the arguments, so it doesn't mean much | |
173 | # to put them at the end. This has the side effect that a non-option | |
174 | # argument cannot have the form of an option, for example if you need to | |
175 | # pass the exact relative path `--lentil-soup`. In that case, you would | |
176 | # need to pass `./--lentil-soup`. | |
177 | # | |
178 | # This function accepts duplicate options (the resulting list of items | |
179 | # contains one entry for each instance). | |
180 | # | |
181 | # On success, this function returns a `_ParseRes` object which contains | |
182 | # a list of items as its `items` property. Each item is either an | |
183 | # option item or a non-option item. | |
184 | # | |
185 | # The returned list contains the items in the same order that the | |
186 | # original arguments `orig_args` were parsed, including non-option | |
187 | # arguments. This means, for example, that for | |
188 | # | |
189 | # --hello --meow=23 /path/to/file -b | |
190 | # | |
191 | # the function creates a list of four items: two options, one | |
192 | # non-option, and one option. | |
193 | # | |
194 | # In the returned object, `ingested_orig_args` is the list of ingested | |
195 | # original arguments to produce the resulting items, while `remaining_orig_args` | |
196 | # is the list of remaining original arguments (not parsed because an | |
197 | # unknown option was found and `fail_on_unknown_opt` was `False`). | |
198 | # | |
199 | # For example, with | |
200 | # | |
201 | # --great --white contact nuance --shark nuclear | |
202 | # | |
203 | # if `--shark` is not described within `opt_descrs` and | |
204 | # `fail_on_unknown_opt` is `False`, then `ingested_orig_args` contains | |
205 | # `--great`, `--white`, `contact`, and `nuance` (two options, two | |
206 | # non-options), whereas `remaining_orig_args` contains `--shark` and | |
207 | # `nuclear`. | |
208 | # | |
209 | # This makes it possible to know where a command name is, for example. | |
210 | # With those arguments: | |
211 | # | |
212 | # --verbose --stuff=23 do-something --specific-opt -f -b | |
213 | # | |
214 | # and the option descriptors for `--verbose` and `--stuff` only, the | |
215 | # function returns the `--verbose` and `--stuff` option items, the | |
216 | # `do-something` non-option item, three ingested original arguments, and | |
217 | # three remaining original arguments. This means you can start the next | |
218 | # argument parsing stage, with option descriptors depending on the | |
219 | # command name, with the remaining original arguments. | |
220 | # | |
221 | # Note that `len(ingested_orig_args)` is NOT always equal to the number | |
222 | # of returned items, as | |
223 | # | |
224 | # --hello -fdw | |
225 | # | |
226 | # for example contains two ingested original arguments, but four | |
227 | # resulting option items. | |
228 | # | |
229 | # On failure, this function raises an `_Error` object. | |
230 | def parse(orig_args, opt_descrs, fail_on_unknown_opt=True): | |
231 | # Finds and returns an option description amongst `opt_descrs` | |
232 | # having the short option name `short_name` OR the long option name | |
233 | # `long_name` (not both). | |
234 | def find_opt_descr(short_name=None, long_name=None): | |
235 | for opt_descr in opt_descrs: | |
236 | if short_name is not None and short_name == opt_descr.short_name: | |
237 | return opt_descr | |
238 | ||
239 | if long_name is not None and long_name == opt_descr.long_name: | |
240 | return opt_descr | |
241 | ||
242 | # Parses a short option original argument, returning an | |
243 | # `_OptParseRes` object. | |
244 | # | |
245 | # `orig_arg` can contain more than one short options, for example: | |
246 | # | |
247 | # -xzv | |
248 | # | |
249 | # Moreover, `orig_arg` can contain the argument of a short option, | |
250 | # for example: | |
251 | # | |
252 | # -xzvflol.mp3 | |
253 | # | |
254 | # (`lol.mp3` is the argument of short option `-f`). | |
255 | # | |
256 | # If this function expects an argument for the last short option of | |
257 | # `orig_arg`, then it must be `next_orig_arg`, for example: | |
258 | # | |
259 | # -xzvf lol.mp3 | |
260 | # | |
261 | # If any of the short options of `orig_arg` is unknown, then this | |
262 | # function raises an error if `fail_on_unknown_opt` is `True`, or | |
263 | # returns `None` otherwise. | |
264 | def parse_short_opts(): | |
265 | short_opts = orig_arg[1:] | |
266 | items = [] | |
267 | done = False | |
268 | index = 0 | |
269 | orig_arg_index_incr = 1 | |
270 | ||
271 | while not done: | |
272 | short_opt = short_opts[index] | |
273 | opt_descr = find_opt_descr(short_name=short_opt) | |
274 | ||
275 | if opt_descr is None: | |
276 | # unknown option | |
277 | if fail_on_unknown_opt: | |
278 | raise _Error(orig_arg_index, orig_arg, f'Unknown short option `-{short_opt}`') | |
279 | ||
280 | # discard collected arguments | |
281 | return | |
282 | ||
283 | opt_arg = None | |
284 | ||
285 | if opt_descr.has_arg: | |
286 | if index == len(short_opts) - 1: | |
287 | # last short option: use the next original argument | |
288 | if next_orig_arg is None: | |
289 | raise _Error(orig_arg_index, orig_arg, | |
290 | f'Expecting an argument for short option `-{short_opt}`') | |
291 | ||
292 | opt_arg = next_orig_arg | |
293 | orig_arg_index_incr += 1 | |
294 | else: | |
295 | # use remaining original argument's text | |
296 | opt_arg = short_opts[index + 1:] | |
297 | ||
298 | done = True | |
299 | ||
300 | items.append(_OptItem(opt_descr, opt_arg)) | |
301 | index += 1 | |
302 | ||
303 | if index == len(short_opts): | |
304 | done = True | |
305 | ||
306 | return _OptParseRes(items, orig_arg_index_incr) | |
307 | ||
308 | # Parses a long option original argument, returning an | |
309 | # `_OptParseRes` object. | |
310 | # | |
311 | # `orig_arg` can contain a single long option, for example: | |
312 | # | |
313 | # --header-dir | |
314 | # | |
315 | # Moreover, `orig_arg` can contain the long option's argument, for | |
316 | # example: | |
317 | # | |
318 | # --header-dir=/path/to/dir | |
319 | # | |
320 | # If this function expects an argument for the long option, then it | |
321 | # must be `next_orig_arg`, for example: | |
322 | # | |
323 | # --header-dir /path/to/dir | |
324 | # | |
325 | # If the long option is unknown, then this function raises an error | |
326 | # if `fail_on_unknown_opt` is `True`, or returns `None` otherwise. | |
327 | def parse_long_opt(): | |
328 | long_opt = orig_arg[2:] | |
329 | m = re.match(r'--([^=]+)=(.*)', orig_arg) | |
330 | ||
331 | if m: | |
332 | # `--long-opt=arg` form: isolate option name | |
333 | long_opt = m.group(1) | |
334 | ||
335 | opt_descr = find_opt_descr(long_name=long_opt) | |
336 | ||
337 | if opt_descr is None: | |
338 | # unknown option | |
339 | if fail_on_unknown_opt: | |
340 | raise _Error(orig_arg_index, orig_arg, f'Unknown long option `--{long_opt}`') | |
341 | ||
342 | # discard | |
343 | return | |
344 | ||
345 | orig_arg_index_incr = 1 | |
346 | ||
347 | if opt_descr.has_arg: | |
348 | if m: | |
349 | item = _OptItem(opt_descr, m.group(2)) | |
350 | else: | |
351 | if next_orig_arg is None: | |
352 | raise _Error(orig_arg_index, orig_arg, | |
353 | f'Expecting an argument for long option `--{long_opt}`') | |
354 | ||
355 | item = _OptItem(opt_descr, next_orig_arg) | |
356 | orig_arg_index_incr += 1 | |
357 | else: | |
358 | # no option argument | |
359 | item = _OptItem(opt_descr, None) | |
360 | ||
361 | return _OptParseRes([item], orig_arg_index_incr) | |
362 | ||
363 | # parse original arguments | |
364 | items = [] | |
365 | orig_arg_index = 0 | |
366 | non_opt_index = 0 | |
367 | ||
368 | while orig_arg_index < len(orig_args): | |
369 | orig_arg = orig_args[orig_arg_index] | |
370 | ||
371 | # keep next original argument, if any | |
372 | next_orig_arg = None | |
373 | ||
374 | if orig_arg_index < len(orig_args) - 1: | |
375 | next_orig_arg = orig_args[orig_arg_index + 1] | |
376 | ||
377 | if orig_arg.startswith('-') and len(orig_arg) >= 2: | |
378 | # option | |
379 | if orig_arg[1] == '-': | |
380 | if orig_arg == '--': | |
381 | raise _Error(orig_arg_index, 'Invalid `--` argument') | |
382 | ||
383 | # long option | |
384 | res = parse_long_opt() | |
385 | else: | |
386 | # short option(s) | |
387 | res = parse_short_opts() | |
388 | ||
389 | if res is None: | |
390 | # unknown option | |
391 | assert not fail_on_unknown_opt | |
392 | return _ParseRes(items, orig_args[:orig_arg_index], orig_args[orig_arg_index:]) | |
393 | ||
394 | items += res.items | |
395 | orig_arg_index += res.orig_arg_index_incr | |
396 | else: | |
397 | # non-option | |
398 | items.append(_NonOptItem(orig_arg, orig_arg_index, non_opt_index)) | |
399 | non_opt_index += 1 | |
400 | orig_arg_index += 1 | |
401 | ||
402 | return _ParseRes(items, orig_args, []) |