From ddfa89036a644ee14abd92e0b0281836780968c7 Mon Sep 17 00:00:00 2001 From: Philippe Proulx Date: Fri, 7 Aug 2020 15:21:15 -0400 Subject: [PATCH] cli: introduce Git-like commands This patch makes the barectf CLI work with commands like Git: $ barectf COMMAND COMMAND-ARGS The goal of this change is to have the `barectf` tool perform more than tracer generation in the future. For instance, the current `--dump-config` option should be a dedicated command instead of being part of the generation process. As of this patch, the only available command is `generate` (with a `gen` alias) which does exactly what `barectf` does without an explicit command: $ barectf generate config.yaml $ barectf config.yaml `--help` and `--version` are "general" options; you need to put them before the command name, if any: $ barectf --help $ barectf --version You can also put `--help` after the command name to get this command's help: $ barectf generate --help `argpar.py` is a Python equivalent of the low-level parts of which I originally wrote for Babeltrace 2. While I acknowledge that it's ludicrous to write a custom argument parser in Python considering that the standard library and PyPI packages offer many of them, I couldn't find one which can satisfy the "default command" use case, with this default command accepting a non-option argument. For: $ barectf config.yaml all of them indicate that `config.yaml` is not a valid command name. Of course this is because there's ambiguity when your configuration file happens to be named `generate` in the current working directory: $ barectf generate This can also be the `generate` command with a missing configuration file path. As of this patch, this is what happens: `barectf` prefers the command name. It is such a corner case that I'm not spending ONE MINUTE on it. That being said, there's a workaround: $ barectf ./generate Signed-off-by: Philippe Proulx --- barectf/argpar.py | 402 ++++++++++++++++++++++++++++++++++++++++++++++ barectf/cli.py | 298 ++++++++++++++++++++++++++++------ 2 files changed, 654 insertions(+), 46 deletions(-) create mode 100644 barectf/argpar.py diff --git a/barectf/argpar.py b/barectf/argpar.py new file mode 100644 index 0000000..c9fba5e --- /dev/null +++ b/barectf/argpar.py @@ -0,0 +1,402 @@ +# The MIT License (MIT) +# +# Copyright (c) 2020 Philippe Proulx +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import re +import collections + + +__all__ = ['OptDescr', '_OptItem', '_NonOptItem', '_Error', 'parse'] + + +# Option descriptor. +class OptDescr: + # Builds an option descriptor having the short name `short_name` + # (without the leading `-`) and/or the long name `long_name` + # (without the leading `--`). + # + # If `has_arg` is `True`, then it is expected that such an option + # has an argument. + def __init__(self, short_name=None, long_name=None, has_arg=False): + assert short_name is not None or long_name is not None + self._short_name = short_name + self._long_name = long_name + self._has_arg = has_arg + + @property + def short_name(self): + return self._short_name + + @property + def long_name(self): + return self._long_name + + @property + def has_arg(self): + return self._has_arg + + +class _Item: + pass + + +# Parsed option argument item. +class _OptItem(_Item): + def __init__(self, descr, arg_text=None): + self._descr = descr + self._arg_text = arg_text + + @property + def descr(self): + return self._descr + + @property + def arg_text(self): + return self._arg_text + + +# Parsed non-option argument item. +class _NonOptItem(_Item): + def __init__(self, text, orig_arg_index, non_opt_index): + self._text = text + self._orig_arg_index = orig_arg_index + self._non_opt_index = non_opt_index + + @property + def text(self): + return self._text + + @property + def orig_arg_index(self): + return self._orig_arg_index + + @property + def non_opt_index(self): + return self._non_opt_index + + +# Results of parse(). +class _ParseRes: + def __init__(self, items, ingested_orig_args, remaining_orig_args): + self._items = items + self._ingested_orig_args = ingested_orig_args + self._remaining_orig_args = remaining_orig_args + + @property + def items(self): + return self._items + + @property + def ingested_orig_args(self): + return self._ingested_orig_args + + @property + def remaining_orig_args(self): + return self._remaining_orig_args + + +# Parsing error. +class _Error(Exception): + def __init__(self, orig_arg_index, orig_arg, msg): + super().__init__(msg) + self._orig_arg_index = orig_arg_index + self._orig_arg = orig_arg + self._msg = msg + + @property + def orig_arg_index(self): + return self._orig_arg_index + + @property + def orig_arg(self): + return self._orig_arg + + @property + def msg(self): + return self._msg + + +# Results of parse_short_opts() and parse_long_opt(); internal. +_OptParseRes = collections.namedtuple('_OptParseRes', ['items', 'orig_arg_index_incr']) + + +# Parses the original arguments `orig_args` (list of strings), +# considering the option descriptors `opt_descrs` (set of `OptDescr` +# objects), and returns a corresponding `_ParseRes` object. +# +# This function considers ALL the elements of `orig_args`, including the +# first one, so that you would typically pass `sys.argv[1:]` to exclude +# the program/script name. +# +# This argument parser supports: +# +# * Short options without an argument, possibly tied together: +# +# -f -auf -n +# +# * Short options with arguments: +# +# -b 45 -f/mein/file -xyzhello +# +# * Long options without an argument: +# +# --five-guys --burger-king --pizza-hut --subway +# +# * Long options with arguments: +# +# --security enable --time=18.56 +# +# * Non-option arguments (anything else). +# +# This function does NOT accept `--` as an original argument; while it +# means "end of options" for many command-line tools, this function is +# all about keeping the order of the arguments, so it doesn't mean much +# to put them at the end. This has the side effect that a non-option +# argument cannot have the form of an option, for example if you need to +# pass the exact relative path `--lentil-soup`. In that case, you would +# need to pass `./--lentil-soup`. +# +# This function accepts duplicate options (the resulting list of items +# contains one entry for each instance). +# +# On success, this function returns a `_ParseRes` object which contains +# a list of items as its `items` property. Each item is either an +# option item or a non-option item. +# +# The returned list contains the items in the same order that the +# original arguments `orig_args` were parsed, including non-option +# arguments. This means, for example, that for +# +# --hello --meow=23 /path/to/file -b +# +# the function creates a list of four items: two options, one +# non-option, and one option. +# +# In the returned object, `ingested_orig_args` is the list of ingested +# original arguments to produce the resulting items, while `remaining_orig_args` +# is the list of remaining original arguments (not parsed because an +# unknown option was found and `fail_on_unknown_opt` was `False`). +# +# For example, with +# +# --great --white contact nuance --shark nuclear +# +# if `--shark` is not described within `opt_descrs` and +# `fail_on_unknown_opt` is `False`, then `ingested_orig_args` contains +# `--great`, `--white`, `contact`, and `nuance` (two options, two +# non-options), whereas `remaining_orig_args` contains `--shark` and +# `nuclear`. +# +# This makes it possible to know where a command name is, for example. +# With those arguments: +# +# --verbose --stuff=23 do-something --specific-opt -f -b +# +# and the option descriptors for `--verbose` and `--stuff` only, the +# function returns the `--verbose` and `--stuff` option items, the +# `do-something` non-option item, three ingested original arguments, and +# three remaining original arguments. This means you can start the next +# argument parsing stage, with option descriptors depending on the +# command name, with the remaining original arguments. +# +# Note that `len(ingested_orig_args)` is NOT always equal to the number +# of returned items, as +# +# --hello -fdw +# +# for example contains two ingested original arguments, but four +# resulting option items. +# +# On failure, this function raises an `_Error` object. +def parse(orig_args, opt_descrs, fail_on_unknown_opt=True): + # Finds and returns an option description amongst `opt_descrs` + # having the short option name `short_name` OR the long option name + # `long_name` (not both). + def find_opt_descr(short_name=None, long_name=None): + for opt_descr in opt_descrs: + if short_name is not None and short_name == opt_descr.short_name: + return opt_descr + + if long_name is not None and long_name == opt_descr.long_name: + return opt_descr + + # Parses a short option original argument, returning an + # `_OptParseRes` object. + # + # `orig_arg` can contain more than one short options, for example: + # + # -xzv + # + # Moreover, `orig_arg` can contain the argument of a short option, + # for example: + # + # -xzvflol.mp3 + # + # (`lol.mp3` is the argument of short option `-f`). + # + # If this function expects an argument for the last short option of + # `orig_arg`, then it must be `next_orig_arg`, for example: + # + # -xzvf lol.mp3 + # + # If any of the short options of `orig_arg` is unknown, then this + # function raises an error if `fail_on_unknown_opt` is `True`, or + # returns `None` otherwise. + def parse_short_opts(): + short_opts = orig_arg[1:] + items = [] + done = False + index = 0 + orig_arg_index_incr = 1 + + while not done: + short_opt = short_opts[index] + opt_descr = find_opt_descr(short_name=short_opt) + + if opt_descr is None: + # unknown option + if fail_on_unknown_opt: + raise _Error(orig_arg_index, orig_arg, f'Unknown short option `-{short_opt}`') + + # discard collected arguments + return + + opt_arg = None + + if opt_descr.has_arg: + if index == len(short_opts) - 1: + # last short option: use the next original argument + if next_orig_arg is None: + raise _Error(orig_arg_index, orig_arg, + f'Expecting an argument for short option `-{short_opt}`') + + opt_arg = next_orig_arg + orig_arg_index_incr += 1 + else: + # use remaining original argument's text + opt_arg = short_opts[index + 1:] + + done = True + + items.append(_OptItem(opt_descr, opt_arg)) + index += 1 + + if index == len(short_opts): + done = True + + return _OptParseRes(items, orig_arg_index_incr) + + # Parses a long option original argument, returning an + # `_OptParseRes` object. + # + # `orig_arg` can contain a single long option, for example: + # + # --header-dir + # + # Moreover, `orig_arg` can contain the long option's argument, for + # example: + # + # --header-dir=/path/to/dir + # + # If this function expects an argument for the long option, then it + # must be `next_orig_arg`, for example: + # + # --header-dir /path/to/dir + # + # If the long option is unknown, then this function raises an error + # if `fail_on_unknown_opt` is `True`, or returns `None` otherwise. + def parse_long_opt(): + long_opt = orig_arg[2:] + m = re.match(r'--([^=]+)=(.*)', orig_arg) + + if m: + # `--long-opt=arg` form: isolate option name + long_opt = m.group(1) + + opt_descr = find_opt_descr(long_name=long_opt) + + if opt_descr is None: + # unknown option + if fail_on_unknown_opt: + raise _Error(orig_arg_index, orig_arg, f'Unknown long option `--{long_opt}`') + + # discard + return + + orig_arg_index_incr = 1 + + if opt_descr.has_arg: + if m: + item = _OptItem(opt_descr, m.group(2)) + else: + if next_orig_arg is None: + raise _Error(orig_arg_index, orig_arg, + f'Expecting an argument for long option `--{long_opt}`') + + item = _OptItem(opt_descr, next_orig_arg) + orig_arg_index_incr += 1 + else: + # no option argument + item = _OptItem(opt_descr, None) + + return _OptParseRes([item], orig_arg_index_incr) + + # parse original arguments + items = [] + orig_arg_index = 0 + non_opt_index = 0 + + while orig_arg_index < len(orig_args): + orig_arg = orig_args[orig_arg_index] + + # keep next original argument, if any + next_orig_arg = None + + if orig_arg_index < len(orig_args) - 1: + next_orig_arg = orig_args[orig_arg_index + 1] + + if orig_arg.startswith('-') and len(orig_arg) >= 2: + # option + if orig_arg[1] == '-': + if orig_arg == '--': + raise _Error(orig_arg_index, 'Invalid `--` argument') + + # long option + res = parse_long_opt() + else: + # short option(s) + res = parse_short_opts() + + if res is None: + # unknown option + assert not fail_on_unknown_opt + return _ParseRes(items, orig_args[:orig_arg_index], orig_args[orig_arg_index:]) + + items += res.items + orig_arg_index += res.orig_arg_index_incr + else: + # non-option + items.append(_NonOptItem(orig_arg, orig_arg_index, non_opt_index)) + non_opt_index += 1 + orig_arg_index += 1 + + return _ParseRes(items, orig_args, []) diff --git a/barectf/cli.py b/barectf/cli.py index 0b18ed6..956ee0c 100644 --- a/barectf/cli.py +++ b/barectf/cli.py @@ -27,6 +27,7 @@ import argparse import os.path import barectf import barectf.config_parse_common as barectf_config_parse_common +import barectf.argpar as barectf_argpar import sys import os @@ -65,76 +66,281 @@ def _print_unknown_exc(exc): _print_error(f'Unknown exception: {exc}') -def _parse_args(): - ap = argparse.ArgumentParser() - - ap.add_argument('-c', '--code-dir', metavar='DIR', action='store', default=os.getcwd(), - help='output directory of C source file') - ap.add_argument('--dump-config', action='store_true', - help='also dump the effective YAML configuration file used for generation') - ap.add_argument('-H', '--headers-dir', metavar='DIR', action='store', default=os.getcwd(), - help='output directory of C header files') - ap.add_argument('-I', '--include-dir', metavar='DIR', action='append', default=[], - help='add directory DIR to the list of directories to be searched for include files') - ap.add_argument('--ignore-include-not-found', action='store_true', - help='continue to process the configuration file when included files are not found') - ap.add_argument('-m', '--metadata-dir', metavar='DIR', action='store', default=os.getcwd(), - help='output directory of CTF metadata') - ap.add_argument('-p', '--prefix', metavar='PREFIX', action='store', - help='override configuration\'s prefixes') - ap.add_argument('-V', '--version', action='version', - version='%(prog)s {}'.format(barectf.__version__)) - ap.add_argument('config', metavar='CONFIG', action='store', - help='barectf YAML configuration file') - - # parse args - args = ap.parse_args() - - # validate output directories - for dir in [args.code_dir, args.headers_dir, args.metadata_dir] + args.include_dir: +# Finds and returns all the option items in `items` having the long name +# `long_name`. +def _find_opt_items(items, long_name): + ret_items = [] + + for item in items: + if type(item) is barectf_argpar._OptItem and item.descr.long_name == long_name: + ret_items.append(item) + + return ret_items + + +# Returns: +# +# For an option item without an argument: +# `True`. +# +# For an option item with an argument: +# Its argument. +# +# Uses the last option item having the long name `long_name` found in +# `items`. +# +# Returns `default` if there's no such option item. +def _opt_item_val(items, long_name, default=None): + opt_items = _find_opt_items(items, long_name) + + if len(opt_items) == 0: + return default + + opt_item = opt_items[-1] + + if opt_item.descr.has_arg: + return opt_item.arg_text + + return True + + +class _CliCfg: + pass + + +class _CliGenCmdCfg(_CliCfg): + def __init__(self, config_file_path, c_source_dir, c_header_dir, metadata_stream_dir, + inclusion_dirs, ignore_inclusion_not_found, dump_config, v2_prefix): + self._config_file_path = config_file_path + self._c_source_dir = c_source_dir + self._c_header_dir = c_header_dir + self._metadata_stream_dir = metadata_stream_dir + self._inclusion_dirs = inclusion_dirs + self._ignore_inclusion_not_found = ignore_inclusion_not_found + self._dump_config = dump_config + self._v2_prefix = v2_prefix + + @property + def config_file_path(self): + return self._config_file_path + + @property + def c_source_dir(self): + return self._c_source_dir + + @property + def c_header_dir(self): + return self._c_header_dir + + @property + def metadata_stream_dir(self): + return self._metadata_stream_dir + + @property + def inclusion_dirs(self): + return self._inclusion_dirs + + @property + def ignore_inclusion_not_found(self): + return self._ignore_inclusion_not_found + + @property + def dump_config(self): + return self._dump_config + + @property + def v2_prefix(self): + return self._v2_prefix + + +def _print_gen_cmd_usage(): + print('''Usage: barectf generate [--code-dir=DIR] [--headers-dir=DIR] + [--metadata-dir=DIR] [--prefix=PREFIX] + [--include-dir=DIR]... [--ignore-include-not-found] + [--dump-config] CONFIG-FILE-PATH + +Options: + -c DIR, --code-dir=DIR Write C source files to DIR + --dump-config Print the effective configuration file + -H DIR, --headers-dir=DIR Write C header files to DIR + --ignore-include-not-found Continue to process the configuration file when + included files are not found + -I DIR, --include-dir=DIR Add DIR to the list of directories to be + searched for inclusion files + -m DIR, --metadata-dir=DIR Write the metadata stream file to DIR + -p PREFIX, --prefix=PREFIX Set the configuration prefix to PREFIX''') + + +class _CliError(Exception): + pass + + +def _cli_gen_cfg_from_args(orig_args): + # parse original arguments + opt_descrs = [ + barectf_argpar.OptDescr('h', 'help'), + barectf_argpar.OptDescr('c', 'code-dir', True), + barectf_argpar.OptDescr('H', 'headers-dir', True), + barectf_argpar.OptDescr('I', 'include-dir', True), + barectf_argpar.OptDescr('m', 'metadata-dir', True), + barectf_argpar.OptDescr('p', 'prefix', True), + barectf_argpar.OptDescr(long_name='dump-config'), + barectf_argpar.OptDescr(long_name='ignore-include-not-found'), + ] + res = barectf_argpar.parse(orig_args, opt_descrs) + assert len(res.ingested_orig_args) == len(orig_args) + + # command help? + if len(_find_opt_items(res.items, 'help')) > 0: + _print_gen_cmd_usage() + sys.exit() + + # check configuration file path + config_file_path = None + + for item in res.items: + if type(item) is barectf_argpar._NonOptItem: + if config_file_path is not None: + raise _CliError('Multiple configuration file paths provided') + + config_file_path = item.text + + if config_file_path is None: + raise _CliError('Missing configuration file path') + + if not os.path.isfile(config_file_path): + raise _CliError(f'`{config_file_path}` is not an existing, regular file') + + # directories + c_source_dir = _opt_item_val(res.items, 'code-dir', os.getcwd()) + c_header_dir = _opt_item_val(res.items, 'headers-dir', os.getcwd()) + metadata_stream_dir = _opt_item_val(res.items, 'metadata-dir', os.getcwd()) + inclusion_dirs = [item.arg_text for item in _find_opt_items(res.items, 'include-dir')] + + for dir in [c_source_dir, c_header_dir, metadata_stream_dir] + inclusion_dirs: if not os.path.isdir(dir): - _print_error(f'`{dir}` is not an existing directory') + raise _CliError(f'`{dir}` is not an existing directory') + + inclusion_dirs.append(os.getcwd()) + + # other options + ignore_inclusion_not_found = _opt_item_val(res.items, 'ignore-include-not-found', False) + dump_config = _opt_item_val(res.items, 'dump-config', False) + v2_prefix = _opt_item_val(res.items, 'prefix') + + return _CliGenCmdCfg(config_file_path, c_source_dir, c_header_dir, metadata_stream_dir, + inclusion_dirs, ignore_inclusion_not_found, dump_config, v2_prefix) - # validate that configuration file exists - if not os.path.isfile(args.config): - _print_error(f'`{args.config}` is not an existing, regular file') - # append current working directory - args.include_dir.append(os.getcwd()) +def _print_general_usage(): + print('''Usage: barectf COMMAND COMMAND-ARGS + barectf --help + barectf --version - return args +General options: + -h, --help Show this help and quit + -V, --version Show version and quit + +Available commands: + gen, generate Generate the C source and CTF metadata files of a tracer + from a configuration file + +Run `barectf COMMAND --help` to show the help of COMMAND.''') + + +def _cli_cfg_from_args(): + # We use our `argpar` module here instead of Python's `argparse` + # because we need to support the two following use cases: + # + # $ barectf config.yaml + # $ barectf generate config.yaml + # + # In other words, the default command is `generate` (for backward + # compatibility reasons). The argument parser must not consider + # `config.yaml` as being a command name. + general_opt_descrs = [ + barectf_argpar.OptDescr('V', 'version'), + barectf_argpar.OptDescr('h', 'help'), + ] + orig_args = sys.argv[1:] + res = barectf_argpar.parse(orig_args, general_opt_descrs, False) + + # find command name, collecting preceding (common) option items + general_opt_items = [] + cmd_first_orig_arg_index = None + cmd_name = None + + for item in res.items: + if type(item) is barectf_argpar._NonOptItem: + if item.text in ['gen', 'generate']: + cmd_name = 'generate' + cmd_first_orig_arg_index = item.orig_arg_index + 1 + else: + cmd_first_orig_arg_index = item.orig_arg_index + + break + else: + assert type(item) is barectf_argpar._OptItem + general_opt_items.append(item) + + # general help? + if len(_find_opt_items(general_opt_items, 'help')) > 0: + _print_general_usage() + sys.exit() + + # version? + if len(_find_opt_items(general_opt_items, 'version')) > 0: + print(f'barectf {barectf.__version__}') + sys.exit() + + # execute command + cmd_orig_args = orig_args[cmd_first_orig_arg_index:] + + if cmd_name is None: + # default `generate` command + return _cli_gen_cfg_from_args(cmd_orig_args) + else: + assert cmd_name == 'generate' + return _cli_gen_cfg_from_args(cmd_orig_args) def _run(): # parse arguments - args = _parse_args() + try: + cli_cfg = _cli_cfg_from_args() + except barectf_argpar._Error as exc: + _print_error(f'Command-line: For argument `{exc.orig_arg}`: {exc.msg}') + except _CliError as exc: + _print_error(f'Command-line: {exc}') + + assert type(cli_cfg) is _CliGenCmdCfg # create configuration try: - with open(args.config) as f: - if args.dump_config: + with open(cli_cfg.config_file_path) as f: + if cli_cfg.dump_config: # print effective configuration file - print(barectf.effective_configuration_file(f, True, args.include_dir, - args.ignore_include_not_found)) + print(barectf.effective_configuration_file(f, True, cli_cfg.inclusion_dirs, + cli_cfg.ignore_inclusion_not_found)) # barectf.configuration_from_file() reads the file again # below: rewind. f.seek(0) - config = barectf.configuration_from_file(f, True, args.include_dir, - args.ignore_include_not_found) + config = barectf.configuration_from_file(f, True, cli_cfg.inclusion_dirs, + cli_cfg.ignore_inclusion_not_found) except barectf._ConfigurationParseError as exc: _print_config_error(exc) except Exception as exc: _print_unknown_exc(exc) - if args.prefix: + if cli_cfg.v2_prefix is not None: # Override prefixes. # # For historical reasons, the `--prefix` option applies the # barectf 2 configuration prefix rules. Therefore, get the # equivalent barectf 3 prefixes first. - v3_prefixes = barectf_config_parse_common._v3_prefixes_from_v2_prefix(args.prefix) + v3_prefixes = barectf_config_parse_common._v3_prefixes_from_v2_prefix(cli_cfg.v2_prefix) cg_opts = config.options.code_generation_options cg_opts = barectf.ConfigurationCodeGenerationOptions(v3_prefixes.identifier, v3_prefixes.file_name, @@ -156,13 +362,13 @@ def _run(): try: # generate and write metadata stream file - write_file(args.metadata_dir, code_gen.generate_metadata_stream()) + write_file(cli_cfg.metadata_stream_dir, code_gen.generate_metadata_stream()) # generate and write C header files - write_files(args.headers_dir, code_gen.generate_c_headers()) + write_files(cli_cfg.c_header_dir, code_gen.generate_c_headers()) # generate and write C source files - write_files(args.code_dir, code_gen.generate_c_sources()) + write_files(cli_cfg.c_source_dir, code_gen.generate_c_sources()) except Exception as exc: # We know `config` is valid, therefore the code generator cannot # fail for a reason known to barectf. -- 2.34.1