| 1 | # The MIT License (MIT) |
| 2 | # |
| 3 | # Copyright (c) 2020 Philippe Proulx <pproulx@efficios.com> |
| 4 | # |
| 5 | # Permission is hereby granted, free of charge, to any person obtaining |
| 6 | # a copy of this software and associated documentation files (the |
| 7 | # "Software"), to deal in the Software without restriction, including |
| 8 | # without limitation the rights to use, copy, modify, merge, publish, |
| 9 | # distribute, sublicense, and/or sell copies of the Software, and to |
| 10 | # permit persons to whom the Software is furnished to do so, subject to |
| 11 | # the following conditions: |
| 12 | # |
| 13 | # The above copyright notice and this permission notice shall be |
| 14 | # included in all copies or substantial portions of the Software. |
| 15 | # |
| 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 20 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 23 | |
| 24 | import re |
| 25 | import typing |
| 26 | from typing import Optional, List, Iterable |
| 27 | from barectf.typing import Index, _OptStr |
| 28 | |
| 29 | |
| 30 | __all__ = ['OptDescr', '_OptItem', '_NonOptItem', '_Error', 'parse', 'OrigArgs'] |
| 31 | |
| 32 | |
| 33 | # types |
| 34 | OrigArgs = List[str] |
| 35 | |
| 36 | |
| 37 | # Option descriptor. |
| 38 | class OptDescr: |
| 39 | # Builds an option descriptor having the short name `short_name` |
| 40 | # (without the leading `-`) and/or the long name `long_name` |
| 41 | # (without the leading `--`). |
| 42 | # |
| 43 | # If `has_arg` is `True`, then it is expected that such an option |
| 44 | # has an argument. |
| 45 | def __init__(self, short_name: _OptStr = None, long_name: _OptStr = None, |
| 46 | has_arg: bool = False): |
| 47 | assert short_name is not None or long_name is not None |
| 48 | self._short_name = short_name |
| 49 | self._long_name = long_name |
| 50 | self._has_arg = has_arg |
| 51 | |
| 52 | @property |
| 53 | def short_name(self) -> _OptStr: |
| 54 | return self._short_name |
| 55 | |
| 56 | @property |
| 57 | def long_name(self) -> _OptStr: |
| 58 | return self._long_name |
| 59 | |
| 60 | @property |
| 61 | def has_arg(self) -> Optional[bool]: |
| 62 | return self._has_arg |
| 63 | |
| 64 | |
| 65 | class _Item: |
| 66 | pass |
| 67 | |
| 68 | |
| 69 | # Parsed option argument item. |
| 70 | class _OptItem(_Item): |
| 71 | def __init__(self, descr: OptDescr, arg_text: _OptStr = None): |
| 72 | self._descr = descr |
| 73 | self._arg_text = arg_text |
| 74 | |
| 75 | @property |
| 76 | def descr(self) -> OptDescr: |
| 77 | return self._descr |
| 78 | |
| 79 | @property |
| 80 | def arg_text(self) -> _OptStr: |
| 81 | return self._arg_text |
| 82 | |
| 83 | |
| 84 | # Parsed non-option argument item. |
| 85 | class _NonOptItem(_Item): |
| 86 | def __init__(self, text: str, orig_arg_index: Index, non_opt_index: Index): |
| 87 | self._text = text |
| 88 | self._orig_arg_index = orig_arg_index |
| 89 | self._non_opt_index = non_opt_index |
| 90 | |
| 91 | @property |
| 92 | def text(self) -> str: |
| 93 | return self._text |
| 94 | |
| 95 | @property |
| 96 | def orig_arg_index(self) -> Index: |
| 97 | return self._orig_arg_index |
| 98 | |
| 99 | @property |
| 100 | def non_opt_index(self) -> Index: |
| 101 | return self._non_opt_index |
| 102 | |
| 103 | |
| 104 | # Results of parse(). |
| 105 | class _ParseRes: |
| 106 | def __init__(self, items: List[_Item], ingested_orig_args: OrigArgs, |
| 107 | remaining_orig_args: OrigArgs): |
| 108 | self._items = items |
| 109 | self._ingested_orig_args = ingested_orig_args |
| 110 | self._remaining_orig_args = remaining_orig_args |
| 111 | |
| 112 | @property |
| 113 | def items(self) -> List[_Item]: |
| 114 | return self._items |
| 115 | |
| 116 | @property |
| 117 | def ingested_orig_args(self) -> OrigArgs: |
| 118 | return self._ingested_orig_args |
| 119 | |
| 120 | @property |
| 121 | def remaining_orig_args(self) -> OrigArgs: |
| 122 | return self._remaining_orig_args |
| 123 | |
| 124 | |
| 125 | # Parsing error. |
| 126 | class _Error(Exception): |
| 127 | def __init__(self, orig_arg_index: Index, orig_arg: str, msg: str): |
| 128 | super().__init__(msg) |
| 129 | self._orig_arg_index = orig_arg_index |
| 130 | self._orig_arg = orig_arg |
| 131 | self._msg = msg |
| 132 | |
| 133 | @property |
| 134 | def orig_arg_index(self) -> Index: |
| 135 | return self._orig_arg_index |
| 136 | |
| 137 | @property |
| 138 | def orig_arg(self) -> str: |
| 139 | return self._orig_arg |
| 140 | |
| 141 | @property |
| 142 | def msg(self) -> str: |
| 143 | return self._msg |
| 144 | |
| 145 | |
| 146 | # Results of parse_short_opts() and parse_long_opt(); internal. |
| 147 | class _OptParseRes(typing.NamedTuple): |
| 148 | items: List[_Item] |
| 149 | orig_arg_index_incr: int |
| 150 | |
| 151 | |
| 152 | # Parses the original arguments `orig_args` (list of strings), |
| 153 | # considering the option descriptors `opt_descrs` (set of `OptDescr` |
| 154 | # objects), and returns a corresponding `_ParseRes` object. |
| 155 | # |
| 156 | # This function considers ALL the elements of `orig_args`, including the |
| 157 | # first one, so that you would typically pass `sys.argv[1:]` to exclude |
| 158 | # the program/script name. |
| 159 | # |
| 160 | # This argument parser supports: |
| 161 | # |
| 162 | # * Short options without an argument, possibly tied together: |
| 163 | # |
| 164 | # -f -auf -n |
| 165 | # |
| 166 | # * Short options with arguments: |
| 167 | # |
| 168 | # -b 45 -f/mein/file -xyzhello |
| 169 | # |
| 170 | # * Long options without an argument: |
| 171 | # |
| 172 | # --five-guys --burger-king --pizza-hut --subway |
| 173 | # |
| 174 | # * Long options with arguments: |
| 175 | # |
| 176 | # --security enable --time=18.56 |
| 177 | # |
| 178 | # * Non-option arguments (anything else). |
| 179 | # |
| 180 | # This function does NOT accept `--` as an original argument; while it |
| 181 | # means "end of options" for many command-line tools, this function is |
| 182 | # all about keeping the order of the arguments, so it doesn't mean much |
| 183 | # to put them at the end. This has the side effect that a non-option |
| 184 | # argument cannot have the form of an option, for example if you need to |
| 185 | # pass the exact relative path `--lentil-soup`. In that case, you would |
| 186 | # need to pass `./--lentil-soup`. |
| 187 | # |
| 188 | # This function accepts duplicate options (the resulting list of items |
| 189 | # contains one entry for each instance). |
| 190 | # |
| 191 | # On success, this function returns a `_ParseRes` object which contains |
| 192 | # a list of items as its `items` property. Each item is either an |
| 193 | # option item or a non-option item. |
| 194 | # |
| 195 | # The returned list contains the items in the same order that the |
| 196 | # original arguments `orig_args` were parsed, including non-option |
| 197 | # arguments. This means, for example, that for |
| 198 | # |
| 199 | # --hello --meow=23 /path/to/file -b |
| 200 | # |
| 201 | # the function creates a list of four items: two options, one |
| 202 | # non-option, and one option. |
| 203 | # |
| 204 | # In the returned object, `ingested_orig_args` is the list of ingested |
| 205 | # original arguments to produce the resulting items, while `remaining_orig_args` |
| 206 | # is the list of remaining original arguments (not parsed because an |
| 207 | # unknown option was found and `fail_on_unknown_opt` was `False`). |
| 208 | # |
| 209 | # For example, with |
| 210 | # |
| 211 | # --great --white contact nuance --shark nuclear |
| 212 | # |
| 213 | # if `--shark` is not described within `opt_descrs` and |
| 214 | # `fail_on_unknown_opt` is `False`, then `ingested_orig_args` contains |
| 215 | # `--great`, `--white`, `contact`, and `nuance` (two options, two |
| 216 | # non-options), whereas `remaining_orig_args` contains `--shark` and |
| 217 | # `nuclear`. |
| 218 | # |
| 219 | # This makes it possible to know where a command name is, for example. |
| 220 | # With those arguments: |
| 221 | # |
| 222 | # --verbose --stuff=23 do-something --specific-opt -f -b |
| 223 | # |
| 224 | # and the option descriptors for `--verbose` and `--stuff` only, the |
| 225 | # function returns the `--verbose` and `--stuff` option items, the |
| 226 | # `do-something` non-option item, three ingested original arguments, and |
| 227 | # three remaining original arguments. This means you can start the next |
| 228 | # argument parsing stage, with option descriptors depending on the |
| 229 | # command name, with the remaining original arguments. |
| 230 | # |
| 231 | # Note that `len(ingested_orig_args)` is NOT always equal to the number |
| 232 | # of returned items, as |
| 233 | # |
| 234 | # --hello -fdw |
| 235 | # |
| 236 | # for example contains two ingested original arguments, but four |
| 237 | # resulting option items. |
| 238 | # |
| 239 | # On failure, this function raises an `_Error` object. |
| 240 | def parse(orig_args: OrigArgs, opt_descrs: Iterable[OptDescr], |
| 241 | fail_on_unknown_opt: bool = True) -> _ParseRes: |
| 242 | # Finds and returns an option description amongst `opt_descrs` |
| 243 | # having the short option name `short_name` OR the long option name |
| 244 | # `long_name` (not both). |
| 245 | def find_opt_descr(short_name: _OptStr = None, |
| 246 | long_name: _OptStr = None) -> Optional[OptDescr]: |
| 247 | for opt_descr in opt_descrs: |
| 248 | if short_name is not None and short_name == opt_descr.short_name: |
| 249 | return opt_descr |
| 250 | |
| 251 | if long_name is not None and long_name == opt_descr.long_name: |
| 252 | return opt_descr |
| 253 | |
| 254 | return None |
| 255 | |
| 256 | # Parses a short option original argument, returning an |
| 257 | # `_OptParseRes` object. |
| 258 | # |
| 259 | # `orig_arg` can contain more than one short options, for example: |
| 260 | # |
| 261 | # -xzv |
| 262 | # |
| 263 | # Moreover, `orig_arg` can contain the argument of a short option, |
| 264 | # for example: |
| 265 | # |
| 266 | # -xzvflol.mp3 |
| 267 | # |
| 268 | # (`lol.mp3` is the argument of short option `-f`). |
| 269 | # |
| 270 | # If this function expects an argument for the last short option of |
| 271 | # `orig_arg`, then it must be `next_orig_arg`, for example: |
| 272 | # |
| 273 | # -xzvf lol.mp3 |
| 274 | # |
| 275 | # If any of the short options of `orig_arg` is unknown, then this |
| 276 | # function raises an error if `fail_on_unknown_opt` is `True`, or |
| 277 | # returns `None` otherwise. |
| 278 | def parse_short_opts() -> Optional[_OptParseRes]: |
| 279 | short_opts = orig_arg[1:] |
| 280 | items: List[_Item] = [] |
| 281 | done = False |
| 282 | index = 0 |
| 283 | orig_arg_index_incr = 1 |
| 284 | |
| 285 | while not done: |
| 286 | short_opt = short_opts[index] |
| 287 | opt_descr = find_opt_descr(short_name=short_opt) |
| 288 | |
| 289 | if opt_descr is None: |
| 290 | # unknown option |
| 291 | if fail_on_unknown_opt: |
| 292 | raise _Error(orig_arg_index, orig_arg, f'Unknown short option `-{short_opt}`') |
| 293 | |
| 294 | # discard collected arguments |
| 295 | return None |
| 296 | |
| 297 | opt_arg = None |
| 298 | |
| 299 | if opt_descr.has_arg: |
| 300 | if index == len(short_opts) - 1: |
| 301 | # last short option: use the next original argument |
| 302 | if next_orig_arg is None: |
| 303 | raise _Error(orig_arg_index, orig_arg, |
| 304 | f'Expecting an argument for short option `-{short_opt}`') |
| 305 | |
| 306 | opt_arg = next_orig_arg |
| 307 | orig_arg_index_incr += 1 |
| 308 | else: |
| 309 | # use remaining original argument's text |
| 310 | opt_arg = short_opts[index + 1:] |
| 311 | |
| 312 | done = True |
| 313 | |
| 314 | items.append(_OptItem(opt_descr, opt_arg)) |
| 315 | index += 1 |
| 316 | |
| 317 | if index == len(short_opts): |
| 318 | done = True |
| 319 | |
| 320 | return _OptParseRes(items, orig_arg_index_incr) |
| 321 | |
| 322 | # Parses a long option original argument, returning an |
| 323 | # `_OptParseRes` object. |
| 324 | # |
| 325 | # `orig_arg` can contain a single long option, for example: |
| 326 | # |
| 327 | # --header-dir |
| 328 | # |
| 329 | # Moreover, `orig_arg` can contain the long option's argument, for |
| 330 | # example: |
| 331 | # |
| 332 | # --header-dir=/path/to/dir |
| 333 | # |
| 334 | # If this function expects an argument for the long option, then it |
| 335 | # must be `next_orig_arg`, for example: |
| 336 | # |
| 337 | # --header-dir /path/to/dir |
| 338 | # |
| 339 | # If the long option is unknown, then this function raises an error |
| 340 | # if `fail_on_unknown_opt` is `True`, or returns `None` otherwise. |
| 341 | def parse_long_opt() -> Optional[_OptParseRes]: |
| 342 | long_opt = orig_arg[2:] |
| 343 | m = re.match(r'--([^=]+)=(.*)', orig_arg) |
| 344 | |
| 345 | if m: |
| 346 | # `--long-opt=arg` form: isolate option name |
| 347 | long_opt = m.group(1) |
| 348 | |
| 349 | opt_descr = find_opt_descr(long_name=long_opt) |
| 350 | |
| 351 | if opt_descr is None: |
| 352 | # unknown option |
| 353 | if fail_on_unknown_opt: |
| 354 | raise _Error(orig_arg_index, orig_arg, f'Unknown long option `--{long_opt}`') |
| 355 | |
| 356 | # discard |
| 357 | return None |
| 358 | |
| 359 | orig_arg_index_incr = 1 |
| 360 | |
| 361 | if opt_descr.has_arg: |
| 362 | if m: |
| 363 | item = _OptItem(opt_descr, m.group(2)) |
| 364 | else: |
| 365 | if next_orig_arg is None: |
| 366 | raise _Error(orig_arg_index, orig_arg, |
| 367 | f'Expecting an argument for long option `--{long_opt}`') |
| 368 | |
| 369 | item = _OptItem(opt_descr, next_orig_arg) |
| 370 | orig_arg_index_incr += 1 |
| 371 | else: |
| 372 | # no option argument |
| 373 | item = _OptItem(opt_descr, None) |
| 374 | |
| 375 | return _OptParseRes([item], orig_arg_index_incr) |
| 376 | |
| 377 | # parse original arguments |
| 378 | items: List[_Item] = [] |
| 379 | orig_arg_index = Index(0) |
| 380 | non_opt_index = Index(0) |
| 381 | |
| 382 | while orig_arg_index < len(orig_args): |
| 383 | orig_arg = orig_args[orig_arg_index] |
| 384 | |
| 385 | # keep next original argument, if any |
| 386 | next_orig_arg = None |
| 387 | |
| 388 | if orig_arg_index < len(orig_args) - 1: |
| 389 | next_orig_arg = orig_args[orig_arg_index + 1] |
| 390 | |
| 391 | if orig_arg.startswith('-') and len(orig_arg) >= 2: |
| 392 | # option |
| 393 | if orig_arg[1] == '-': |
| 394 | if orig_arg == '--': |
| 395 | raise _Error(orig_arg_index, orig_arg, 'Invalid `--` argument') |
| 396 | |
| 397 | # long option |
| 398 | res = parse_long_opt() |
| 399 | else: |
| 400 | # short option(s) |
| 401 | res = parse_short_opts() |
| 402 | |
| 403 | if res is None: |
| 404 | # unknown option |
| 405 | assert not fail_on_unknown_opt |
| 406 | return _ParseRes(items, orig_args[:orig_arg_index], orig_args[orig_arg_index:]) |
| 407 | |
| 408 | items += res.items |
| 409 | orig_arg_index = Index(orig_arg_index + res.orig_arg_index_incr) |
| 410 | else: |
| 411 | # non-option |
| 412 | items.append(_NonOptItem(orig_arg, orig_arg_index, non_opt_index)) |
| 413 | non_opt_index = Index(non_opt_index + 1) |
| 414 | orig_arg_index = Index(orig_arg_index + 1) |
| 415 | |
| 416 | return _ParseRes(items, orig_args, []) |