initial commit
authorOlivier Dion <odion@efficios.com>
Tue, 27 Feb 2024 18:55:28 +0000 (13:55 -0500)
committerOlivier Dion <odion@efficios.com>
Tue, 27 Feb 2024 18:56:09 +0000 (13:56 -0500)
Signed-off-by: Olivier Dion <odion@efficios.com>
16 files changed:
.gitignore [new file with mode: 0644]
Makefile [new file with mode: 0644]
scripts/check [new file with mode: 0755]
scripts/gen-hip-wrappers [new file with mode: 0755]
scripts/gen-hsa-wrappers [new file with mode: 0755]
scripts/lttng-ust-auto-api [new file with mode: 0755]
scripts/lttng-ust-hip-post-processing.sed [new file with mode: 0644]
scripts/lttng-ust-hsa-post-processing.sed [new file with mode: 0644]
src/hip-ignores.txt [new file with mode: 0644]
src/hsa-ignores.txt [new file with mode: 0644]
src/lttng-roctx.c [new file with mode: 0644]
src/lttng-roctx.h [new file with mode: 0644]
src/lttng-ust-exatracer.cpp [new file with mode: 0644]
src/lttng-ust-roctx-impl.c [new file with mode: 0644]
src/lttng-ust-roctx.h [new file with mode: 0644]
tests/hello.cpp [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..5ad324d
--- /dev/null
@@ -0,0 +1,2 @@
+*.tar.gz
+build
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..f7c1fb2
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,93 @@
+# Configure this.
+ROCM_VERSION?=6.1.0-1388
+ROCM?=/opt/rocm-$(ROCM_VERSION)
+VENDOR?=AMD
+CXX?=g++
+builddir?=$(CURDIR)/build
+
+# Do not touch below.
+HIPCC=$(ROCM)/bin/hipcc
+PLATFORM=__HIP_PLATFORM_$(VENDOR)__
+
+# LTTng-UST
+LTTNG_UST_CFLAGS=$(shell pkg-config --cflags lttng-ust)
+LTTNG_UST_LIBS=$(shell pkg-config --libs lttng-ust)
+
+# Rocprofiler-sdk
+ROCPROFILER_SDK_CFLAGS=-I $(ROCM)/include -L $(ROCM)/lib -Wl,-rpath=$(ROCM)/lib
+ROCPROFILER_SDK_LIBS=-lrocprofiler-sdk
+
+# Concat dependencies.
+DEPS_CFLAGS=$(LTTNG_UST_CFLAGS) $(ROCPROFILER_SDK_CFLAGS)
+DEPS_LIBS=$(LTTNG_UST_LIBS) $(ROCPROFILER_SDK_LIBS)
+
+
+# HIP stuff.
+AUTOGEN_HIP_API=$(builddir)/lttng-ust-hip-defs.h    \
+                $(builddir)/lttng-ust-hip.h         \
+                $(builddir)/lttng-ust-hip-classes.h \
+                $(builddir)/lttng-ust-hip-impl.c    \
+                $(builddir)/lttng-ust-hip-states.h
+
+HIP_HEADER=$(ROCM)/include/hip/hip_runtime.h
+
+# HSA stuff.
+AUTOGEN_HSA_API=$(builddir)/lttng-ust-hsa-defs.h    \
+                $(builddir)/lttng-ust-hsa.h         \
+                $(builddir)/lttng-ust-hsa-classes.h \
+                $(builddir)/lttng-ust-hsa-impl.c    \
+                $(builddir)/lttng-ust-hsa-states.h
+
+HSA_HEADER=$(ROCM)/include/hsa/hsa.h
+
+# Final target.
+TARGET=$(builddir)/libexatracer.so
+
+# Toolchain flags.
+CFLAGS=-I $(builddir) -D $(PLATFORM) -O2 -g -fmax-errors=1 -fvisibility=hidden -Wextra -Wno-deprecated-declarations
+LDFLAGS=-shared -fPIC
+
+all: $(builddir) $(TARGET)
+
+$(builddir):
+       mkdir -p $@
+
+# Do not add *-wrappers.cpp to toolchain inputs.
+$(TARGET): src/lttng-ust-exatracer.cpp src/lttng-ust-roctx-impl.c $(builddir)/lttng-ust-hsa-impl.c $(builddir)/lttng-ust-hip-impl.c
+       $(CXX) -I src $(CFLAGS) $(DEPS_CFLAGS) $(LDFLAGS) -o $@ $^ $(DEPS_LIBS)
+
+src/lttng-ust-exatracer.cpp: $(builddir)/lttng-ust-hsa-wrappers.cpp $(builddir)/lttng-ust-hip-wrappers.cpp
+
+# HIP rules.
+$(builddir)/lttng-ust-hip-wrappers.cpp: $(AUTOGEN_HIP_API) scripts/gen-hip-wrappers
+       scripts/gen-hip-wrappers -D $(PLATFORM) -I $(ROCM)/include --ignore=src/hip-ignores.txt $(HIP_HEADER) $@
+       sed -i -f scripts/lttng-ust-hip-post-processing.sed $@
+
+$(AUTOGEN_HIP_API) &: $(HIP_HEADER) scripts/lttng-ust-auto-api
+       scripts/lttng-ust-auto-api --ignores=src/hip-ignores.txt -D $(PLATFORM) -I $(ROCM)/include --namespace=lttng_hip --provider=hip --common-prefix=hip  --classes-guard=LTTNG_HIP_TRACEPOINT_CLASSES_HPP --tp-guard=LTTNG_HIP_TRACEPOINT_DEF_H $< $(AUTOGEN_HIP_API)
+       sed -i -f scripts/lttng-ust-hip-post-processing.sed $(AUTOGEN_HIP_API)
+
+# HSA rules.
+$(builddir)/lttng-ust-hsa-wrappers.cpp: $(AUTOGEN_HSA_API) scripts/gen-hsa-wrappers
+       scripts/gen-hsa-wrappers -D $(PLATFORM) -I $(ROCM)/include --ignore=src/hsa-ignores.txt $(HSA_HEADER) $@
+       sed -i -f scripts/lttng-ust-hsa-post-processing.sed $@
+
+$(AUTOGEN_HSA_API) &: $(HSA_HEADER) scripts/lttng-ust-auto-api
+       scripts/lttng-ust-auto-api --ignores=src/hsa-ignores.txt -D $(PLATFORM) -I $(ROCM)/include --namespace=lttng_hsa --provider=hsa --common-prefix=hsa --classes-guard=LTTNG_HSA_TRACEPOINT_CLASSES_HPP --tp-guard=LTTNG_HSA_TRACEPOINT_DEF_H $< $(AUTOGEN_HSA_API)
+       sed -i -f scripts/lttng-ust-hsa-post-processing.sed $(AUTOGEN_HSA_API)
+
+# Testing.
+$(builddir)/hello: tests/hello.cpp
+       $(HIPCC) -lrocprofiler-sdk-roctx -Wno-unused -o $@ $^
+
+check: $(builddir)/hello
+       scripts/check $(TARGET) $^
+
+clean:
+       rm -rf ./build
+       rm -rf ./traces
+
+dist:
+       git archive --prefix extracer/ --format=tar.gz --output exatracer.tar.gz HEAD
+
+.PHONY: all clean dist
diff --git a/scripts/check b/scripts/check
new file mode 100755 (executable)
index 0000000..20a42d8
--- /dev/null
@@ -0,0 +1,22 @@
+#!/bin/sh
+#
+# SPDX-License-Identifier: MIT
+#
+# Copyright (c) 2024 Olivier Dion <odion@efficios.com>
+
+TRACE_OUTPUT=traces
+
+lttng create --output $TRACE_OUTPUT
+
+# Enable all HSA, HIP and roctx event types.
+lttng enable-event --userspace 'hsa:*'
+lttng enable-event --userspace 'hip:*'
+lttng enable-event --userspace 'roctx:*'
+
+lttng start
+
+LD_PRELOAD=$1 $2
+
+lttng destroy
+
+babeltrace2 $TRACE_OUTPUT
diff --git a/scripts/gen-hip-wrappers b/scripts/gen-hip-wrappers
new file mode 100755 (executable)
index 0000000..a43fd29
--- /dev/null
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2023 EfficiOS, Inc.
+#
+# Author: Olivier Dion <odion@efficios.com>
+#
+# Auto-generate lttng-ust tracepoints for HIP.
+#
+# Require: python-clang (libclang)
+
+import argparse
+import re
+import subprocess
+
+from string import Template
+
+import clang.cindex
+
+def list_function_declarations(root):
+    return [
+        child
+        for child in root.get_children()
+        if child.kind == clang.cindex.CursorKind.FUNCTION_DECL
+    ]
+
+def get_system_include_paths():
+
+    clang_args = ["clang", "-v", "-c", "-xc", "-o", "/dev/null", "/dev/null"]
+    paths = []
+
+    with subprocess.Popen(clang_args, stderr=subprocess.PIPE, text=True) as proc:
+        start_sys_search = False
+        for line in proc.stderr:
+            if start_sys_search:
+                if line == "End of search list.\n":
+                    break
+                paths.append("-isystem")
+                paths.append(line.strip())
+            elif line == "#include <...> search starts here:\n":
+                start_sys_search = True
+
+    return paths
+
+def parse_header(header_file, includes, defines):
+
+    args = get_system_include_paths()
+
+    if includes:
+        for inc in includes:
+            args.append("-I")
+            args.append(inc)
+
+    if defines:
+        for d in defines:
+            args.append("-D")
+            args.append(d)
+
+    tu = clang.cindex.Index.create().parse(header_file, args=args)
+
+    for d in tu.diagnostics:
+        print(f"WARNING: {d}")
+
+    return tu.cursor
+
+def list_functions(root):
+    return [
+        fn
+        for fn in list_function_declarations(root)
+        if fn.spelling.startswith("hip") and fn.spelling
+    ]
+
+def exact_definition(arg):
+
+    ct = arg.type.get_canonical()
+    if ct.kind == clang.cindex.TypeKind.POINTER:
+        pt = ct.get_pointee()
+        if pt.kind == clang.cindex.TypeKind.FUNCTIONPROTO:
+            ret_type = pt.get_result().spelling
+            argument_types = ", ".join([a.spelling for a in pt.argument_types()])
+            return f"{ret_type} (*{arg.spelling})({argument_types})"
+    m = re.search(r'(\[[0-9]*\])+', arg.type.spelling)
+    if m:
+        return f"{arg.type.spelling[:m.start(0)]} {arg.spelling}{m.group(0)}"
+    else:
+        return f"{arg.type.spelling} {arg.spelling}"
+
+def cast(arg):
+    canon = arg.type.get_canonical()
+    if canon.kind == clang.cindex.TypeKind.POINTER:
+        return "void *"
+    return re.sub(r'\[[0-9]*\]', '*', canon.spelling)
+
+forbiden_list = set()
+
+extra_works = {
+}
+
+def main():
+
+    parser = argparse.ArgumentParser(prog="gen-hip-wrappers")
+
+    parser.add_argument("api",
+                        help="HIP API header")
+
+    parser.add_argument("wrappers",
+                        help="Path to HIP wrappers")
+
+    parser.add_argument("--ignores",
+                        dest="ignores",
+                        metavar="FILE",
+                        default=None,
+                        help="Ignore list")
+
+    parser.add_argument("-I",
+                        action="append",
+                        metavar="DIR",
+                        dest="includes",
+                        help="Add DIR to list of directories to include")
+
+    parser.add_argument("-D",
+                        action="append",
+                        metavar="DEFINITION",
+                        dest="defines",
+                        help="Add DEFINITION to list of definitions")
+
+    args = parser.parse_args()
+
+    if args.ignores:
+        with open(args.ignores, "r") as f:
+            for ignore in f.read().splitlines():
+                forbiden_list.add(ignore)
+
+    prologue_tpl = Template("""/* Auto-generated */
+#include "lttng-ust-hip-states.h"
+""")
+
+    ret_fn_tpl = Template("""
+static ${ret_type} lttng_${fn_name}(${fn_arguments})
+{
+       ${ret_type} ret;
+       {
+               lttng_hip::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+               ret = next_hip_table.${fn_name}_fn(${fn_pass_argument_names});
+               lttng_api_object.mark_return(ret);
+       }
+$extra_work
+       return ret;
+}
+""")
+
+    void_fn_tpl = Template("""
+static void lttng_${fn_name}(${fn_arguments})
+{
+       {
+               lttng_hip::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+               next_hip_table.${fn_name}_fn(${fn_pass_argument_names});
+               lttng_api_object.mark_return();
+       }
+$extra_work
+}
+""")
+
+    epilogue_tpl = Template("""
+static void lttng_hip_install_wrappers(void)
+{
+       ${wrappers}
+}
+""")
+
+    functions = list_functions(parse_header(args.api,
+                                            args.includes,
+                                            args.defines))
+
+    with open(args.wrappers, "w") as output:
+
+        output.write(prologue_tpl.substitute())
+
+        for fn in functions:
+
+            if fn.spelling in forbiden_list:
+                continue
+
+            args = list(fn.get_arguments())
+            fn_pass_argument_names = ", ".join([
+                f"{arg.spelling}"
+                for arg in args
+            ])
+
+            if args:
+                fn_rest_argument_names = ", ".join([
+                    "(%s)%s" % (cast(arg), arg.spelling)
+                    for arg in args
+                ])
+            else:
+                fn_rest_argument_names=""
+
+            if fn.spelling in extra_works:
+                extra_work = extra_works[fn.spelling]
+            else:
+                extra_work = ""
+
+            if "void"== fn.type.get_result().spelling:
+                fn_tpl = void_fn_tpl
+            else:
+                fn_tpl = ret_fn_tpl
+
+            output.write(fn_tpl.substitute(fn_name=fn.spelling,
+                                           fn_arguments=", ".join([
+                                               exact_definition(arg)
+                                               for arg in fn.get_arguments()
+                                           ]),
+                                           fn_pass_argument_names=fn_pass_argument_names,
+                                           fn_rest_argument_names=fn_rest_argument_names,
+                                           ret_type=fn.type.get_result().spelling,
+                                           extra_work=extra_work))
+
+        output.write(epilogue_tpl.substitute(wrappers="\n      ".join([
+            f"lttng_hip_table.{fn.spelling}_fn = &lttng_{fn.spelling};"
+            for fn in functions if fn.spelling not in forbiden_list
+        ])))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/gen-hsa-wrappers b/scripts/gen-hsa-wrappers
new file mode 100755 (executable)
index 0000000..5009cf0
--- /dev/null
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2023 EfficiOS, Inc.
+#
+# Author: Olivier Dion <odion@efficios.com>
+#
+# Auto-generate lttng-ust tracepoints for HSA.
+#
+# Require: python-clang (libclang)
+
+import argparse
+import re
+import subprocess
+
+from string import Template
+
+import clang.cindex
+
+def list_function_declarations(root):
+    return [
+        child
+        for child in root.get_children()
+        if child.kind == clang.cindex.CursorKind.FUNCTION_DECL
+    ]
+
+def get_system_include_paths():
+
+    clang_args = ["clang", "-v", "-c", "-xc", "-o", "/dev/null", "/dev/null"]
+    paths = []
+
+    with subprocess.Popen(clang_args, stderr=subprocess.PIPE, text=True) as proc:
+        start_sys_search = False
+        for line in proc.stderr:
+            if start_sys_search:
+                if line == "End of search list.\n":
+                    break
+                paths.append("-isystem")
+                paths.append(line.strip())
+            elif line == "#include <...> search starts here:\n":
+                start_sys_search = True
+
+    return paths
+
+def parse_header(header_file, includes, defines):
+
+    args = get_system_include_paths()
+
+    if includes:
+        for inc in includes:
+            args.append("-I")
+            args.append(inc)
+
+    if defines:
+        for d in defines:
+            args.append("-D")
+            args.append(d)
+
+    tu = clang.cindex.Index.create().parse(header_file, args=args)
+
+    for d in tu.diagnostics:
+        print(f"WARNING: {d}")
+
+    return tu.cursor
+
+def list_functions(root):
+    return [
+        fn
+        for fn in list_function_declarations(root)
+        if fn.spelling.startswith("hsa") and fn.spelling
+    ]
+
+def exact_definition(arg):
+
+    ct = arg.type.get_canonical()
+    if ct.kind == clang.cindex.TypeKind.POINTER:
+        pt = ct.get_pointee()
+        if pt.kind == clang.cindex.TypeKind.FUNCTIONPROTO:
+            ret_type = pt.get_result().spelling
+            argument_types = ", ".join([a.spelling for a in pt.argument_types()])
+            return f"{ret_type} (*{arg.spelling})({argument_types})"
+    m = re.search(r'(\[[0-9]*\])+', arg.type.spelling)
+    if m:
+        return f"{arg.type.spelling[:m.start(0)]} {arg.spelling}{m.group(0)}"
+    else:
+        return f"{arg.type.spelling} {arg.spelling}"
+
+def cast(arg):
+    canon = arg.type.get_canonical()
+    if canon.kind == clang.cindex.TypeKind.POINTER:
+        return "void *"
+    return re.sub(r'\[[0-9]*\]', '*', canon.spelling)
+
+forbiden_list = set()
+
+extra_works = {
+}
+
+def main():
+
+    parser = argparse.ArgumentParser(prog="gen-hsa-wrappers")
+
+    parser.add_argument("api",
+                        help="HSA API header")
+
+    parser.add_argument("wrappers",
+                        help="Path to HSA wrappers")
+
+    parser.add_argument("--ignores",
+                        dest="ignores",
+                        metavar="FILE",
+                        default=None,
+                        help="Ignore list")
+
+    parser.add_argument("-I",
+                        action="append",
+                        metavar="DIR",
+                        dest="includes",
+                        help="Add DIR to list of directories to include")
+
+    parser.add_argument("-D",
+                        action="append",
+                        metavar="DEFINITION",
+                        dest="defines",
+                        help="Add DEFINITION to list of definitions")
+
+    args = parser.parse_args()
+
+    if args.ignores:
+        with open(args.ignores, "r") as f:
+            for ignore in f.read().splitlines():
+                forbiden_list.add(ignore)
+
+    prologue_tpl = Template("""/* Auto-generated */
+#include "lttng-ust-hsa-states.h"
+""")
+
+    ret_fn_tpl = Template("""
+static ${ret_type} lttng_${fn_name}(${fn_arguments})
+{
+       ${ret_type} ret;
+       {
+               lttng_hsa::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+               ret = next_hsa_core_table.${fn_name}_fn(${fn_pass_argument_names});
+               lttng_api_object.mark_return(ret);
+       }
+$extra_work
+       return ret;
+}
+""")
+
+    void_fn_tpl = Template("""
+static void lttng_${fn_name}(${fn_arguments})
+{
+       {
+               lttng_hsa::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+               next_hsa_core_table.${fn_name}_fn(${fn_pass_argument_names});
+               lttng_api_object.mark_return();
+       }
+$extra_work
+}
+""")
+
+    epilogue_tpl = Template("""
+static void lttng_hsa_install_wrappers(void)
+{
+       ${wrappers}
+}
+""")
+
+    functions = list_functions(parse_header(args.api,
+                                            args.includes,
+                                            args.defines))
+
+    with open(args.wrappers, "w") as output:
+
+        output.write(prologue_tpl.substitute())
+
+        for fn in functions:
+
+            if fn.spelling in forbiden_list:
+                continue
+
+            args = list(fn.get_arguments())
+            fn_pass_argument_names = ", ".join([
+                f"{arg.spelling}"
+                for arg in args
+            ])
+
+            if args:
+                fn_rest_argument_names = ", ".join([
+                    "(%s)%s" % (cast(arg), arg.spelling)
+                    for arg in args
+                ])
+            else:
+                fn_rest_argument_names=""
+
+            if fn.spelling in extra_works:
+                extra_work = extra_works[fn.spelling]
+            else:
+                extra_work = ""
+
+            if "void"== fn.type.get_result().spelling:
+                fn_tpl = void_fn_tpl
+            else:
+                fn_tpl = ret_fn_tpl
+
+            output.write(fn_tpl.substitute(fn_name=fn.spelling,
+                                           fn_arguments=", ".join([
+                                               exact_definition(arg)
+                                               for arg in fn.get_arguments()
+                                           ]),
+                                           fn_pass_argument_names=fn_pass_argument_names,
+                                           fn_rest_argument_names=fn_rest_argument_names,
+                                           ret_type=fn.type.get_result().spelling,
+                                           extra_work=extra_work))
+
+        output.write(epilogue_tpl.substitute(wrappers="\n      ".join([
+            f"lttng_hsa_core_table.{fn.spelling}_fn = &lttng_{fn.spelling};"
+            for fn in functions if fn.spelling not in forbiden_list
+        ])))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/lttng-ust-auto-api b/scripts/lttng-ust-auto-api
new file mode 100755 (executable)
index 0000000..845a33f
--- /dev/null
@@ -0,0 +1,868 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: MIT
+#
+# Copyright (c) 2023 EfficiOS, Inc.
+#
+# Author: Olivier Dion <odion@efficios.com>
+#
+# Auto-generate lttng-ust tracepoints for OpenMPI.
+#
+# Require: python-clang (libclang)
+
+import argparse
+import re
+import os
+import subprocess
+
+from string import Template
+
+import clang.cindex
+
+COMMON_PREFIX = None
+IGNORE        = set()
+PROVIDER      = None
+
+# LTTNG_UST_TP_ARGS is limited to 10 arguments.  Since we introduce two
+# arguments of our own (thread-id and local-id), the maximum is 8.
+#
+# If a function has more arguments than this limit, all arguments -- at the
+# exception of the IDs -- will be passed through a data structure instead.
+MAX_TP_ARGS_COUNT = 8
+
+class EnumValue:
+
+    def __init__(self, ev):
+        self.name  = ev.spelling
+        self.value = ev.enum_value
+
+class EnumType:
+
+    def __init__(self, en, name=None):
+        self.name   = name or en.spelling
+        self.values = [EnumValue(ev) for ev in en.get_children()]
+        self.en     = en
+
+class Typedef:
+
+    def __init__(self, spelling, value):
+        self.spelling = spelling
+        self.value    = value
+
+class ArgumentType:
+
+    integer_set = {
+        clang.cindex.TypeKind.UCHAR,
+        clang.cindex.TypeKind.USHORT,
+        clang.cindex.TypeKind.UINT,
+        clang.cindex.TypeKind.ULONG,
+        clang.cindex.TypeKind.ULONGLONG,
+        clang.cindex.TypeKind.SHORT,
+        clang.cindex.TypeKind.INT,
+        clang.cindex.TypeKind.LONG,
+        clang.cindex.TypeKind.LONGLONG,
+    }
+
+    float_set = {
+        clang.cindex.TypeKind.FLOAT,
+        clang.cindex.TypeKind.DOUBLE,
+    }
+
+    address_set = {
+        clang.cindex.TypeKind.POINTER,
+        clang.cindex.TypeKind.INCOMPLETEARRAY,
+    }
+
+    def __init__(self, arg, name_prefix="", expr_prefix=""):
+        self.type = arg.type
+        self.arg  = arg
+        self.const = ""
+        self.name_prefix = name_prefix
+        self.expr_prefix = expr_prefix
+
+        if self.kind() == clang.cindex.TypeKind.POINTER:
+            if self.type.get_pointee().is_const_qualified():
+                self.const = "const "
+        elif self.type.is_const_qualified():
+            self.const = "const "
+
+    def name(self):
+        return self.arg.spelling
+
+    def type_name(self):
+        if self.kind() == clang.cindex.TypeKind.INCOMPLETEARRAY:
+            return self.const + re.sub(r"\[[0-9]*\]", "*", self.type.spelling)
+        if self.kind() == clang.cindex.TypeKind.POINTER:
+            return f"{self.const}void *"
+        return self.const + self.type.spelling
+
+    def kind(self):
+        return self.type.get_canonical().kind
+
+    def to_lttng_field(self):
+        if self.name() == "reserved":
+            return ""
+        elif self.kind() in ArgumentType.address_set:
+            return f"lttng_ust_field_integer_hex(uintptr_t, {self.name_prefix}{self.name()}, (uintptr_t){self.expr_prefix}{self.name()})"
+        elif self.kind() in ArgumentType.integer_set:
+            return f"lttng_ust_field_integer({self.type_name()}, {self.name_prefix}{self.name()}, {self.expr_prefix}{self.name()})"
+        elif self.kind() in ArgumentType.float_set:
+            return f"lttng_ust_field_float({self.type_name()}, {self.name_prefix}{self.name()}, {self.expr_prefix}{self.name()})"
+        elif self.kind() == clang.cindex.TypeKind.ENUM:
+            enum_name = self.type_name().removeprefix("enum ")
+            return f"lttng_ust_field_enum({PROVIDER}, {enum_name}, int, {self.name_prefix}{self.name()}, {self.expr_prefix}{self.name()})"
+        elif self.kind() == clang.cindex.TypeKind.RECORD:
+            return [
+                ArgumentType(field, f"{self.name()}_", f"{self.expr_prefix}{self.name()}.").to_lttng_field()
+                for field in self.type.get_canonical().get_fields()
+            ]
+        else:
+            raise Exception("Unsupported kind: %s" % self.kind())
+
+class FunctionType:
+
+    struct_tpl = Template("""
+$name {
+    $fields
+};
+""")
+
+    def __init__(self, fn):
+        self.name = fn.spelling
+        self.args = [ArgumentType(arg) for arg in fn.get_arguments()]
+        self.fn   = fn
+
+    def tp_args(self):
+        if len(self.args) == 0:
+            return ""
+        elif len(self.args) > MAX_TP_ARGS_COUNT:
+            return ",\n        " + f"{self.arguments_struct_name()} *, lttng_args"
+        else:
+            return ",\n        " + ",\n        ".join([f"{arg.type_name()}, {arg.name()}"
+                                                           for arg in self.args])
+
+    def tp_fields(self):
+        if len(self.args) == 0:
+            return ""
+        elif len(self.args) > MAX_TP_ARGS_COUNT:
+            packed_args = [ArgumentType(arg.arg, "", "lttng_args->") for arg in self.args]
+            return "\n        ".join(flatten([arg.to_lttng_field()
+                                              for arg in packed_args]))
+        else:
+            return "\n        ".join(flatten([arg.to_lttng_field()
+                                              for arg in self.args]))
+    def get_return_type_name(self):
+        return self.fn.type.get_result().spelling
+
+    def ctor_params(self):
+        if len(self.args) == 0:
+            return ""
+        elif len(self.args) > MAX_TP_ARGS_COUNT:
+            return ", &lttng_args"
+        else:
+            return ", " + ", ".join(arg.name() for arg in self.args)
+
+    def arguments_struct_variable(self):
+        if len(self.args) > MAX_TP_ARGS_COUNT:
+            return "%s lttng_args = {%s};" % (self.arguments_struct_name(),
+                                              ", ".join([arg.name() for arg in self.args]))
+        else:
+            return f"/* {self.arguments_struct_name()} lttng_args */"
+
+
+    def arguments_struct_name(self):
+        return f"struct lttng_arguments_of_{self.name}"
+
+    def arguments_struct(self):
+        if len(self.args) > MAX_TP_ARGS_COUNT:
+            return self.struct_tpl.substitute(name=self.arguments_struct_name(),
+                                              fields="\n    ".join([
+                                                  f"{arg.type_name()} {arg.name()};"
+                                                  for arg in self.args
+                                              ]))
+        else:
+            return ""
+
+def flatten(lst):
+    new_lst = []
+    for e in lst:
+        if isinstance(e, list):
+            for e in flatten(e):
+                new_lst.append(e)
+        else:
+            new_lst.append(e)
+    return new_lst
+
+def list_function_declarations(root):
+    return [ child
+             for child in root.get_children()
+             if child.kind == clang.cindex.CursorKind.FUNCTION_DECL ]
+
+def list_enum_declarations(root):
+    return [
+        child
+        for child in root.get_children()
+        if child.kind == clang.cindex.CursorKind.ENUM_DECL
+    ]
+
+def list_typedef_enums(root):
+    enums = []
+    for child in root.get_children():
+        if child.kind == clang.cindex.CursorKind.TYPEDEF_DECL:
+            maybe_enum = child.underlying_typedef_type.get_declaration()
+            if maybe_enum.kind == clang.cindex.CursorKind.ENUM_DECL:
+                enums.append(Typedef(child.spelling, maybe_enum))
+    return enums
+
+def search_header_in(name, paths):
+    for path in paths.split(":"):
+        for dirpath, _, files in os.walk(path, followlinks=True):
+            for file in files:
+                if file == name:
+                    return os.path.join(dirpath, file)
+    return None
+
+def search_c_header(name):
+    return search_header_in(name, os.environ["C_INCLUDE_PATH"])
+
+def search_cxx_header(name):
+    return search_header_in(name, os.environ["CPLUS_INCLUDE_PATH"])
+
+def get_system_include_paths():
+
+    clang_args = ["clang", "-v", "-c", "-xc", "/dev/null", "-o", "/dev/null"]
+    paths = []
+
+    with subprocess.Popen(clang_args, stderr=subprocess.PIPE,
+                          encoding="ascii") as proc:
+        start_sys_search = False
+        for line in proc.stderr:
+            if start_sys_search:
+                if line == "End of search list.\n":
+                    break
+                paths.append("-isystem")
+                paths.append(line.strip())
+            elif line == "#include <...> search starts here:\n":
+                start_sys_search = True
+
+    return paths
+
+def parse_header(header_file, includes, defines,
+                 required_c_headers, required_cxx_headers):
+
+    args = get_system_include_paths()
+
+    if includes:
+        for inc in includes:
+            args.append("-I")
+            args.append(inc)
+
+    if defines:
+        for d in defines:
+            args.append("-D")
+            args.append(d)
+
+    for header in required_c_headers:
+        found = search_c_header(header)
+        if found:
+            args.append("-I")
+            args.append(os.path.dirname(found))
+
+    for header in required_cxx_headers:
+        found = search_cxx_header(header)
+        if found:
+            args.append("-I")
+            args.append(os.path.dirname(found))
+
+    tu = clang.cindex.Index.create().parse(header_file, args=args)
+
+    for d in tu.diagnostics:
+        print(f"WARNING: {d}")
+
+    return tu.cursor
+
+def list_functions(root):
+    return [
+        FunctionType(fn)
+        for fn in list_function_declarations(root)
+        if fn.spelling.startswith(COMMON_PREFIX) and fn.spelling not in IGNORE
+    ]
+
+def list_enums(root):
+
+    enums = [
+        en
+        for en in list_enum_declarations(root)
+        if en.spelling.startswith(COMMON_PREFIX) and en.spelling not in IGNORE
+    ]
+
+    typedef_enums = [
+        typedef
+        for typedef in list_typedef_enums(root)
+        if typedef.spelling.startswith(COMMON_PREFIX) and
+        typedef.spelling not in IGNORE and
+        typedef.value.get_definition() not in enums
+    ]
+
+    all_enums = ([ EnumType(e) for e in enums ] +
+                 [ EnumType(td.value, td.spelling) for td in typedef_enums])
+
+    return all_enums
+
+def generate_tracepoint_definitions(function_declarations, enum_declarations,
+                                    api_file, output_defs, output_interface,
+                                    header_guard):
+    defs_tpl = Template("""/* Auto-generated file! */
+#undef LTTNG_UST_TRACEPOINT_PROVIDER
+#define LTTNG_UST_TRACEPOINT_PROVIDER $provider
+
+#undef LTTNG_UST_TRACEPOINT_INCLUDE
+#define LTTNG_UST_TRACEPOINT_INCLUDE "$output_defs"
+
+#if !defined($header_guard)
+#include <$api_file>
+$pass_by_struct
+#endif
+
+#if !defined($header_guard) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
+#define $header_guard
+
+#include <lttng/tracepoint.h>
+
+$enum_definitions
+$tracepoint_definitions
+
+#endif /* $header_guard */
+
+#include <lttng/tracepoint-event.h>
+""")
+
+    interface_tpl = Template("""/* Auto-generated file! */
+#ifndef ${header_guard}_IMPL
+#define ${header_guard}_IMPL
+
+#include "${output_defs}"
+
+#endif /* ${header_guard}_IMPL */
+""")
+
+    tp_tpl = Template("""
+LTTNG_UST_TRACEPOINT_EVENT(
+    $provider,
+    ${name}_entry,
+    LTTNG_UST_TP_ARGS(
+        uint64_t, lttng_thread_id,
+        uint64_t, lttng_local_id$tp_args
+    ),
+    LTTNG_UST_TP_FIELDS(
+        lttng_ust_field_integer(uint64_t, lttng_thread_id, lttng_thread_id)
+        lttng_ust_field_integer(uint64_t, lttng_local_id, lttng_local_id)
+        $tp_fields
+    )
+)
+""")
+
+    tp_ret_tpl = Template("""
+LTTNG_UST_TRACEPOINT_EVENT(
+    $provider,
+    ${name}_exit,
+    LTTNG_UST_TP_ARGS(
+        uint64_t, lttng_thread_id,
+        uint64_t, lttng_local_id,
+        int, lttng_has_ret,
+        $ret_type, lttng_ret
+    ),
+    LTTNG_UST_TP_FIELDS(
+        lttng_ust_field_integer(uint64_t, lttng_thread_id, lttng_thread_id)
+        lttng_ust_field_integer(uint64_t, lttng_local_id, lttng_local_id)
+        lttng_ust_field_integer(int, lttng_has_ret, lttng_has_ret)
+        lttng_ust_field_integer($ret_type, lttng_ret, lttng_ret)
+    )
+)
+""")
+
+    tp_void_tpl = Template("""
+LTTNG_UST_TRACEPOINT_EVENT(
+    $provider,
+    ${name}_exit,
+    LTTNG_UST_TP_ARGS(
+        uint64_t, lttng_thread_id,
+        uint64_t, lttng_local_id,
+        int, lttng_has_ret
+    ),
+    LTTNG_UST_TP_FIELDS(
+        lttng_ust_field_integer(uint64_t, lttng_thread_id, lttng_thread_id)
+        lttng_ust_field_integer(uint64_t, lttng_local_id, lttng_local_id)
+        lttng_ust_field_integer(int, lttng_has_ret, lttng_has_ret)
+    )
+)
+""")
+    enum_tpl = Template("""
+LTTNG_UST_TRACEPOINT_ENUM($provider, $name,
+    LTTNG_UST_TP_ENUM_VALUES(
+        $values
+    )
+)
+""")
+    with open(output_defs, "w") as output:
+        definitions = []
+        for fn in function_declarations:
+            ret_type = fn.get_return_type_name()
+            definitions.append(tp_tpl.substitute(provider=PROVIDER,
+                                                 name=fn.name,
+                                                 tp_args=fn.tp_args(),
+                                                 tp_fields=fn.tp_fields()))
+            if ret_type == "void":
+                tpl = tp_void_tpl
+            else:
+                tpl = tp_ret_tpl
+
+            definitions.append(tpl.substitute(provider=PROVIDER,
+                                              name=fn.name,
+                                              ret_type=ret_type))
+
+        tracepoint_definitions = "\n".join(definitions)
+
+        enum_definitions = "\n".join([
+            enum_tpl.substitute(provider=PROVIDER,
+                                name=en.name,
+                                values="\n        ".join([f'lttng_ust_field_enum_value("{ev.name}", {ev.value})'
+                                                          for ev in en.values]))
+            for en in enum_declarations
+        ])
+
+        output.write(defs_tpl.substitute(provider=PROVIDER,
+                                         output_defs=output_defs,
+                                         header_guard=header_guard,
+                                         tracepoint_definitions=tracepoint_definitions,
+                                         enum_definitions=enum_definitions,
+                                         api_file=api_file,
+                                         pass_by_struct="".join([fn.arguments_struct()
+                                                                 for fn in function_declarations])))
+    with open(output_interface, "w") as output:
+        output.write(interface_tpl.substitute(header_guard=header_guard,
+                                              output_defs=output_defs,))
+
+def generate_tracepoint_classes(function_declarations, api_file, output_path, header_guard, namespace):
+    global_tpl = Template("""/* Auto-generated file! */
+#include <atomic>
+#include <cstdint>
+#include <$api_file>
+namespace $namespace {
+    struct unique_id {
+        uint64_t thread_id;
+        uint64_t local_id;
+    };
+
+    class id_generator {
+        static std::atomic<uint64_t> _thread_counter;
+        uint64_t _thread_id;
+        uint64_t _local_id;
+    public:
+        id_generator() {
+            _thread_id = _thread_counter++;
+            _local_id  = 0;
+        }
+
+        unique_id next_id() {
+            return {
+                .thread_id = _thread_id,
+                .local_id  = _local_id++,
+            };
+        }
+    };
+
+    extern thread_local id_generator generator;
+
+    template<typename RetType>
+    class base_api_object {
+    protected:
+        unique_id  _id;
+        int        _has_ret;
+        RetType    _ret;
+    public:
+        void generate_id() {
+            _id = generator.next_id();
+        }
+
+        void mark_return(RetType ret) {
+            _ret     = ret;
+            _has_ret = 1;
+        }
+    };
+
+    class base_api_object_void {
+    protected:
+        unique_id  _id;
+        int        _has_ret;
+    public:
+        void generate_id() {
+            _id = generator.next_id();
+        }
+
+        void mark_return(void) {
+            _has_ret = 1;
+        }
+    };
+
+$classes
+};
+""")
+
+    cls_ret_tpl = Template("""
+class api_object_$fn_name : public base_api_object<$ret_type>
+{
+public:
+    api_object_$fn_name($ctor_type_params) {
+        if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_entry)) {
+            generate_id();
+            $pass_by_struct
+            lttng_ust_do_tracepoint($provider,
+                                    ${fn_name}_entry,
+                                    _id.thread_id,
+                                    _id.local_id$ctor_params);
+        }
+    }
+    ~api_object_$fn_name() {
+        if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_exit)) {
+            lttng_ust_do_tracepoint($provider,
+                                    ${fn_name}_exit,
+                                    _id.thread_id,
+                                    _id.local_id,
+                                    _has_ret,
+                                    _ret);
+        }
+    }
+};
+""")
+
+    cls_void_tpl = Template("""
+class api_object_$fn_name : public base_api_object_void
+{
+public:
+    api_object_$fn_name($ctor_type_params) {
+        if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_entry)) {
+            generate_id();
+            $pass_by_struct
+            lttng_ust_do_tracepoint($provider,
+                                    ${fn_name}_entry,
+                                    _id.thread_id,
+                                    _id.local_id$ctor_params);
+        }
+    }
+    ~api_object_$fn_name() {
+        if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_exit)) {
+            lttng_ust_do_tracepoint($provider,
+                                    ${fn_name}_exit,
+                                    _id.thread_id,
+                                    _id.local_id,
+                                    _has_ret);
+        }
+    }
+};
+""")
+
+    with open(output_path, "w") as output:
+        classes = []
+        for fn in function_declarations:
+            ret_type = fn.get_return_type_name()
+            if ret_type == "void":
+                cls_tpl = cls_void_tpl
+            else:
+                cls_tpl = cls_ret_tpl
+            classes.append(cls_tpl.substitute(provider=PROVIDER,
+                                              fn_name=fn.name,
+                                              pass_by_struct=fn.arguments_struct_variable(),
+                                              ctor_type_params=", ".join([f"{arg.type_name()} {arg.name()}"
+                                                                          for arg in fn.args]),
+                                              ctor_params=fn.ctor_params(),
+                                              ret_type=ret_type))
+        output.write(global_tpl.substitute(api_file=api_file,
+                                           namespace=namespace,
+                                           classes="".join(classes)))
+
+def generate_tracepoint_emulated_classes(function_declarations, api_file, output_path,
+                                         header_guard, namespace):
+    global_tpl = Template("""/* Auto-generated file! */
+#include <stdint.h>
+#include <$api_file>
+#define ${NAMESPACE}_CAT_PRIMITIVE(A, B) A##B
+#define ${NAMESPACE}_CAT(A, B) ${NAMESPACE}_CAT_PRIMITIVE(A, B)
+
+struct ${namespace}_unique_id {
+       uint64_t thread_id;
+       uint64_t local_id;
+};
+
+struct ${namespace}_id_generator {
+       uint64_t thread_id;
+       uint64_t local_id;
+       int initialized;
+};
+
+extern uint64_t ${namespace}_id_generator_thread_counter;
+extern _Thread_local struct ${namespace}_id_generator ${namespace}_generator;
+
+#define ${namespace}_unlikely(x) __builtin_expect(!!(x), 0)
+
+static inline void ${namespace}_id_generator_next_id(struct ${namespace}_unique_id *id)
+{
+       if (${namespace}_unlikely(!${namespace}_generator.initialized)) {
+               ${namespace}_generator.thread_id =
+                       __atomic_fetch_add(&${namespace}_id_generator_thread_counter,
+                                          1,
+                                          __ATOMIC_RELAXED);
+               ${namespace}_generator.initialized = 1;
+       }
+
+       id->thread_id = ${namespace}_generator.thread_id;
+       id->local_id = ${namespace}_generator.local_id++;
+}
+
+#define ${NAMESPACE}_API_OBJECT_NAME ${namespace}_api_object
+
+#define ${NAMESPACE}_MAKE_API_OBJECT(name, ...) \\
+       struct ${NAMESPACE}_CAT(${namespace}_api_state_, name) __attribute__((cleanup(${NAMESPACE}_CAT(name, _exit)))) \\
+       ${NAMESPACE}_API_OBJECT_NAME = { 0 };  \\
+       ${NAMESPACE}_CAT(name, _entry)(&${NAMESPACE}_API_OBJECT_NAME, ##__VA_ARGS__); \\
+       do { } while (0)
+
+#define ${NAMESPACE}_MARK_RETURN_API_OBJECT(code) \\
+       ({                                                   \\
+               ${NAMESPACE}_API_OBJECT_NAME.ret = code;     \\
+               ${NAMESPACE}_API_OBJECT_NAME.has_ret = 1;    \\
+       })
+${classes}
+""")
+
+    cls_tpl = Template("""
+struct ${namespace}_api_state_${fn_name} {
+       struct ${namespace}_unique_id id;
+       int has_ret;
+       $ret_type ret;
+};
+
+static inline void ${fn_name}_entry(${ctor_type_params})
+{
+       if (${namespace}_ust_tracepoint_enabled(${provider}, ${fn_name}_entry)) {
+               ${namespace}_id_generator_next_id(&lttng_state->id);
+               ${pass_by_struct}
+               ${namespace}_ust_do_tracepoint($provider, ${fn_name}_entry,
+                                              lttng_state->id.thread_id,
+                                              lttng_state->id.local_id${ctor_params});
+       }
+}
+
+static inline void ${fn_name}_exit(const struct ${namespace}_api_state_${fn_name} *lttng_state)
+{
+       lttng_ust_tracepoint(${provider}, ${fn_name}_exit,
+                            lttng_state->id.thread_id,
+                            lttng_state->id.local_id,
+                            lttng_state->has_ret,
+                            lttng_state->ret);
+}
+""")
+    with open(output_path, "w") as output:
+        output.write(global_tpl.substitute(api_file=api_file,
+                                           namespace=namespace,
+                                           NAMESPACE=namespace.upper(),
+                                           classes="".join([
+                                               cls_tpl.substitute(provider=PROVIDER,
+                                                                  fn_name=fn.name,
+                                                                  pass_by_struct=fn.arguments_struct_variable(),
+                                                                  ctor_params=fn.ctor_params(),
+                                                                  ctor_type_params=", ".join([f"struct {namespace}_api_state_{fn.name} *lttng_state"] +
+                                                                                             [f"{arg.type_name()} {arg.name()}"
+                                                                                              for arg in fn.args]),
+                                                                  namespace=namespace,
+                                                                  NAMESPACE=namespace.upper(),
+                                                                  ret_type=fn.get_return_type_name())
+                                               for fn in function_declarations
+                                           ])))
+
+
+def generate_tracepoint_implementations(namespace, defs, impls):
+    tpl = Template("""/* Auto-generated !*/
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+#include "${defs}"
+""")
+
+    with open(impls, "w") as output:
+        output.write(tpl.substitute(defs=defs))
+
+def generate_tracepoint_states(namespace,
+                               interface,
+                               classes,
+                               states,
+                               emulated_classes):
+
+    if emulated_classes:
+        body_tpl = Template("""
+uint64_t ${namespace}_id_generator_thread_counter = 0;
+_Thread_local struct ${namespace}_id_generator ${namespace}_generator;
+""")
+    else:
+        body_tpl = Template("""
+#include <atomic>
+namespace ${namespace} {
+       std::atomic<uint64_t> id_generator::_thread_counter{0};
+       thread_local id_generator generator;
+};
+""")
+
+    tpl = Template("""/* Auto-generated! */
+#include "${interface}"
+#include "${classes}"
+
+$body
+""")
+
+    with open(states, "w") as output:
+        output.write(tpl.substitute(interface=interface,
+                                    classes=classes,
+                                    body=body_tpl.substitute(namespace=namespace)))
+
+def main():
+
+    global COMMON_PREFIX
+    global IGNORE
+    global PROVIDER
+
+    parser = argparse.ArgumentParser(prog="lttng-ust-autogen-api",
+                                     description="Generate LTTng classes and tracepoint definitions")
+
+    parser.add_argument("api",
+                        help="Header file that has the API")
+
+    parser.add_argument("defs",
+                        help="Path to tracepoint definitions")
+
+    parser.add_argument("interface",
+                        help="Path to tracepoints interfaces")
+
+    parser.add_argument("classes",
+                        help="Path to tracepoint classes")
+
+    parser.add_argument("impl",
+                        help="Path to tracepoint implementations")
+
+    parser.add_argument("states",
+                        help="Path to states")
+
+    parser.add_argument("--provider",
+                        dest="provider",
+                        metavar="PROVIDER",
+                        default="noprovider",
+                        help="Tracepoints PROVIDER")
+
+    parser.add_argument("--common-prefix",
+                        dest="common_prefix",
+                        metavar="PREFIX",
+                        default="",
+                        help="Common PREFIX of API functions (C namespace)")
+
+    parser.add_argument("-I",
+                        action="append",
+                        metavar="DIR",
+                        dest="includes",
+                        help="Add DIR to list of directories to include")
+
+    parser.add_argument("-D",
+                        action="append",
+                        metavar="DEFINITION",
+                        dest="defines",
+                        help="Add DEFINITION to list of definitions")
+
+    parser.add_argument("--tp-guard",
+                        dest="tp_guard",
+                        metavar="GUARD",
+                        default="LTTNG_TRACEPOINT_DEF_H",
+                        help="Use GUARD as header guard for tracepoint definitions")
+
+    parser.add_argument("--classes-guard",
+                        dest="classes_guard",
+                        metavar="GUARD",
+                        default="LTTNG_TRACEPOINT_CLASSES_HPP",
+                        help="Use GUARD as header guard for classes definitions")
+
+    parser.add_argument("--emulated-classes",
+                        dest="emulated_classes",
+                        action="store_true",
+                        default=False,
+                        help="Emulate C++ classes")
+
+    parser.add_argument("--namespace",
+                        dest="namespace",
+                        metavar="NAMESPACE",
+                        default="lttng",
+                        help="Generate classes in NAMESPACE")
+
+    parser.add_argument("--ignores",
+                        dest="ignores",
+                        metavar="FILE",
+                        default=None,
+                        help="Ignore functions listed in FILE")
+
+    parser.add_argument("--c-header",
+                        dest="required_c_headers",
+                        metavar="HEADER",
+                        action="append",
+                        default=[],
+                        help="Search for HEADER in C_INCLUDE_PATH and add its directory to search path")
+
+    parser.add_argument("--cxx-header",
+                        dest="required_cxx_headers",
+                        metavar="HEADER",
+                        action="append",
+                        default=[],
+                        help="Search for HEADER in CPLUS_INCLUDE_PATH add its directory to search path")
+
+    args = parser.parse_args()
+
+    PROVIDER      = args.provider
+    COMMON_PREFIX = args.common_prefix
+
+    if args.ignores:
+        with open(args.ignores, "r") as f:
+            for ignore in f.read().splitlines():
+                IGNORE.add(ignore)
+
+    root = parse_header(args.api, args.includes, args.defines,
+                        args.required_c_headers,
+                        args.required_cxx_headers)
+
+    function_declarations = list_functions(root)
+    enum_declarations     = list_enums(root)
+
+    generate_tracepoint_definitions(function_declarations,
+                                    enum_declarations,
+                                    args.api, args.defs, args.interface,
+                                    args.tp_guard)
+
+    if args.emulated_classes:
+        generate_tracepoint_emulated_classes(function_declarations,
+                                             args.api,
+                                             args.classes,
+                                             args.classes_guard,
+                                             args.namespace)
+    else:
+        generate_tracepoint_classes(function_declarations,
+                                    args.api,
+                                    args.classes,
+                                    args.classes_guard,
+                                    args.namespace)
+
+    generate_tracepoint_implementations(args.namespace,
+                                        args.interface,
+                                        args.impl)
+
+    generate_tracepoint_states(args.namespace,
+                               args.interface,
+                               args.classes,
+                               args.states,
+                               args.emulated_classes)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/lttng-ust-hip-post-processing.sed b/scripts/lttng-ust-hip-post-processing.sed
new file mode 100644 (file)
index 0000000..f41c5a9
--- /dev/null
@@ -0,0 +1 @@
+s/hipChooseDeviceR[0-9]*/hipChooseDevice/g
diff --git a/scripts/lttng-ust-hsa-post-processing.sed b/scripts/lttng-ust-hsa-post-processing.sed
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/hip-ignores.txt b/src/hip-ignores.txt
new file mode 100644 (file)
index 0000000..bd59bb7
--- /dev/null
@@ -0,0 +1,18 @@
+hipCreateChannelDesc
+hipDrvGraphAddMemsetNode
+hipDrvGraphMemcpyNodeGetParams
+hipDrvGraphMemcpyNodeSetParams
+hipExtGetLastError
+hipExternalMemoryGetMappedMipmappedArray
+hipGraphAddExternalSemaphoresSignalNode
+hipGraphAddExternalSemaphoresWaitNode
+hipGraphAddNode
+hipGraphExecExternalSemaphoresSignalNodeSetParams
+hipGraphExecExternalSemaphoresWaitNodeSetParams
+hipGraphExecGetFlags
+hipGraphExecNodeSetParams
+hipGraphExternalSemaphoresSignalNodeGetParams
+hipGraphExternalSemaphoresSignalNodeSetParams
+hipGraphExternalSemaphoresWaitNodeGetParams
+hipGraphExternalSemaphoresWaitNodeSetParams
+hipGraphNodeSetParams
diff --git a/src/hsa-ignores.txt b/src/hsa-ignores.txt
new file mode 100644 (file)
index 0000000..9b55883
--- /dev/null
@@ -0,0 +1,37 @@
+hsa_queue_add_write_index_acq_rel
+hsa_queue_add_write_index_acquire
+hsa_queue_add_write_index_release
+hsa_queue_cas_write_index_acq_rel
+hsa_queue_cas_write_index_acquire
+hsa_queue_cas_write_index_release
+hsa_queue_load_read_index_acquire
+hsa_queue_load_write_index_acq_rel
+hsa_queue_load_write_index_acquire
+hsa_queue_load_write_index_release
+hsa_queue_store_read_index_release
+hsa_queue_store_write_index_release
+hsa_signal_add_acq_rel
+hsa_signal_add_acquire
+hsa_signal_add_release
+hsa_signal_and_acq_rel
+hsa_signal_and_acquire
+hsa_signal_and_release
+hsa_signal_cas_acq_rel
+hsa_signal_cas_acquire
+hsa_signal_cas_release
+hsa_signal_exchange_acq_rel
+hsa_signal_exchange_acquire
+hsa_signal_exchange_release
+hsa_signal_load_acquire
+hsa_signal_load_release
+hsa_signal_or_acq_rel
+hsa_signal_or_acquire
+hsa_signal_or_release
+hsa_signal_store_release
+hsa_signal_subtract_acq_rel
+hsa_signal_subtract_acquire
+hsa_signal_subtract_release
+hsa_signal_wait_acquire
+hsa_signal_xor_acq_rel
+hsa_signal_xor_acquire
+hsa_signal_xor_release
diff --git a/src/lttng-roctx.c b/src/lttng-roctx.c
new file mode 100644 (file)
index 0000000..b836c0e
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright © 2023 AMD
+ *
+ * All Rights Reserved
+ */
+
+#include <limits.h>
+#include <stdint.h>
+
+#include "lttng-roctx.h"
+
+static void lttng_roctxMark(const char *message)
+{
+       lttng_ust_tracepoint(roctx, mark, message);
+       next_roctx_core_table.roctxMarkA_fn(message);
+}
+
+static int lttng_roctxRangePush(const char *message)
+{
+       int level = next_roctx_core_table.roctxRangePushA_fn(message);
+
+       lttng_ust_tracepoint(roctx, range_push, message, level);
+
+       return level;
+}
+
+static int lttng_roctxRangePop(void)
+{
+       int level = next_roctx_core_table.roctxRangePop_fn();
+
+       lttng_ust_tracepoint(roctx, range_pop, level);
+
+       return level;
+}
+
+static roctx_range_id_t lttng_roctxRangeStart(const char *message)
+{
+       roctx_range_id_t id;
+
+       id = next_roctx_core_table.roctxRangeStartA_fn(message);
+
+       lttng_ust_tracepoint(roctx, range_start, message, id);
+
+       return id;
+}
+
+static void lttng_roctxRangeStop(roctx_range_id_t id)
+{
+       lttng_ust_tracepoint(roctx, range_stop, id);
+       next_roctx_core_table.roctxRangeStop_fn(id);
+}
+
+static int lttng_roctxGetThreadId(roctx_thread_id_t *tid)
+{
+       int ret = next_roctx_core_table.roctxGetThreadId_fn(tid);
+       lttng_ust_tracepoint(roctx, get_thread_id, *tid);
+       return ret;
+}
+
+static void lttng_roctx_install_wrappers(void)
+{
+       lttng_roctx_core_table.roctxMarkA_fn = &lttng_roctxMark;
+       lttng_roctx_core_table.roctxRangePushA_fn = &lttng_roctxRangePush;
+       lttng_roctx_core_table.roctxRangePop_fn = &lttng_roctxRangePop;
+       lttng_roctx_core_table.roctxRangeStartA_fn = &lttng_roctxRangeStart;
+       lttng_roctx_core_table.roctxRangeStop_fn = &lttng_roctxRangeStop;
+}
+
diff --git a/src/lttng-roctx.h b/src/lttng-roctx.h
new file mode 100644 (file)
index 0000000..60f5d19
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * Copyright © 2023 AMD
+ *
+ * All Rights Reserved
+ */
+
+#ifndef LTTNG_ROCTX_H
+#define LTTNG_ROCTX_H
+
+#include "lttng-ust-roctx.h"
+
+#endif /* LTTNG_ROCTX_H */
diff --git a/src/lttng-ust-exatracer.cpp b/src/lttng-ust-exatracer.cpp
new file mode 100644 (file)
index 0000000..7eb759d
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ * Copyright © 2024 AMD
+ *
+ * All Rights Reserved
+ */
+
+#include <cinttypes>
+
+#include <hip/hip_runtime.h>
+#include <hip/hip_gl_interop.h>
+
+/*
+ * Do not re-order!  hip_api_trace.h header referenced some R0000 types which
+ * are only defined in the hip_deprecated.h header.
+ */
+#include <hip/hip_deprecated.h>
+#include <hip/amd_detail/hip_api_trace.hpp>
+
+/*
+ * Without AMD_INTERNAL_BUILD, hsa_api_trace.h tries to include
+ * "inc/*", which is not installed.
+ */
+#define AMD_INTERNAL_BUILD
+#include <hsa/hsa.h>
+#include <hsa/hsa_api_trace.h>
+
+/*
+ * Rocprofiler integration.
+ */
+#include <rocprofiler-sdk/registration.h>
+#include <rocprofiler-sdk/rocprofiler.h>
+#include <rocprofiler-sdk-roctx/api_trace.h>
+
+/*
+ * This is the table with LTTng tracepoints for HIP.
+ */
+static struct HipDispatchTable lttng_hip_table;
+
+/*
+ * This is an internal copy of the orignal table used by the LTTng wrappers to
+ * call the next function for HIP.
+ */
+static struct HipDispatchTable next_hip_table;
+
+/* Auto-generated by gen-hip-wrappers. */
+#include "lttng-ust-hip-wrappers.cpp"
+
+
+/*
+ * This is the table with LTTng tracepoints for HSA.
+ */
+static struct CoreApiTable lttng_hsa_core_table;
+
+/*
+ * This is an internal copy of the orignal table used by the LTTng wrappers to
+ * call the next function for HSA.
+ */
+static struct CoreApiTable next_hsa_core_table;
+
+/* Auto-generated by gen-hsa-wrappers. */
+#include "lttng-ust-hsa-wrappers.cpp"
+
+
+/*
+ * This is the table with LTTng tracepoints for roctx.
+ */
+static roctxCoreApiTable_t lttng_roctx_core_table;
+
+/*
+ * This is an internal copy of the orignal table used by the LTTng wrappers to
+ * call the next function for roctx.
+ */
+static roctxCoreApiTable_t next_roctx_core_table;
+
+/* LTTng events for roctx. */
+#include "lttng-roctx.c"
+
+#define die(FMT, ...)                                           \
+    do {                                                        \
+        fprintf(stderr, FMT "\n" __VA_OPT__(,) __VA_ARGS__);    \
+        exit(EXIT_FAILURE);                                     \
+    } while(0)
+
+#define error(FMT, ...)                                 \
+    fprintf(stderr, FMT "\n" __VA_OPT__(,) __VA_ARGS__)
+
+static void
+register_hip_table(uint64_t lib_instance, uint64_t num_tables, void **tables)
+{
+    /* Add instrumentation only to first library instance. */
+    if (0 != lib_instance) {
+        return;
+    }
+
+    /* Ensure that there is at least one table. */
+    if (num_tables < 1) {
+        return;
+    }
+  
+    /*
+     * This could be done at compile time in some way if C++ could support
+     * designated initializers.
+     */
+    lttng_hip_install_wrappers();
+
+    auto original_hip_table = static_cast<HipDispatchTable*>(tables[0]);
+
+    /* Swap tables. */
+    if (original_hip_table) {
+        auto size = std::min(sizeof(next_hip_table), original_hip_table->size);
+        memcpy(&next_hip_table, original_hip_table, size);
+        memcpy(original_hip_table, &lttng_hip_table, size);
+    }
+}
+
+static void
+register_hsa_core_table(uint64_t lib_instance, uint64_t num_tables, void **tables)
+{
+    /* Add instrumentation only to first library instance. */
+    if (0 != lib_instance) {
+        return;
+    }
+
+    /* Ensure that there is at least one table. */
+    if (num_tables < 1) {
+        return;
+    }
+  
+    /*
+     * This could be done at compile time in some way if C++ could support
+     * designated initializers.
+     */
+    lttng_hsa_install_wrappers();
+
+    auto original_hsa_table = static_cast<HsaApiTable*>(tables[0]);
+
+    /* Swap tables. */
+    if (original_hsa_table) {
+        auto core = original_hsa_table->core_;
+        auto size = sizeof(*core);
+        memcpy(&next_hsa_core_table, core, size);
+        memcpy(core, &lttng_hsa_core_table, size);
+    }
+}
+
+static void
+register_roctx_core_table(uint64_t lib_instance, uint64_t num_tables, void **tables)
+{
+    /* Add instrumentation only to first library instance. */
+    if (0 != lib_instance) {
+        return;
+    }
+
+    /* Ensure that there is at least one table. */
+    if (num_tables < 1) {
+        return;
+    }
+
+    /*
+     * This could be done at compile time in some way if C++ could support
+     * designated initializers.
+     */
+    lttng_roctx_install_wrappers();
+  
+    auto original_roctx_core_table = static_cast<roctxCoreApiTable_t*>(tables[0]);
+
+    /* Swap tables. */
+    if (original_roctx_core_table) {
+        auto size = std::min(original_roctx_core_table->size, sizeof(roctxCoreApiTable_t));
+        memcpy(&next_roctx_core_table, original_roctx_core_table, size);
+        memcpy(original_roctx_core_table, &lttng_roctx_core_table, size);
+    }
+}
+
+static void
+api_registration_callback(rocprofiler_intercept_table_t type,
+                          uint64_t lib_version,
+                          uint64_t lib_instance,
+                          void**   tables,
+                          uint64_t num_tables,
+                          void*    user_data)
+{
+    (void) user_data;
+    (void) lib_version;
+
+    /*
+     * We only want HIP runtime and HSA tables.  If we get something
+     * else, there is a bug somewhere.
+     */
+    switch (type) {
+    case ROCPROFILER_HIP_RUNTIME_TABLE:
+        register_hip_table(lib_instance, num_tables, tables);
+        break;
+    case ROCPROFILER_HSA_TABLE:
+        register_hsa_core_table(lib_instance, num_tables, tables);
+        break;
+    case ROCPROFILER_MARKER_CORE_TABLE:
+        register_roctx_core_table(lib_instance, num_tables, tables);
+        break;
+    defualt:
+        die("unexpected library type: %d", type);
+    }
+}
+
+extern "C"
+__attribute__((visibility("default")))
+rocprofiler_tool_configure_result_t *rocprofiler_configure(uint32_t version,
+                                                          const char *runtime_version,
+                                                          uint32_t priority,
+                                                          rocprofiler_client_id_t *id)
+{
+    (void) priority;
+    (void) runtime_version;
+    (void) version;
+
+    /*
+     * Maybe Exatracer instead?
+     */
+    id->name = "LTTng-UST";
+
+    if (ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED ==
+        rocprofiler_at_intercept_table_registration(api_registration_callback,
+                                                    ROCPROFILER_HIP_RUNTIME_TABLE |
+                                                   ROCPROFILER_HSA_TABLE |
+                                                   ROCPROFILER_MARKER_CORE_TABLE,
+                                                    nullptr)) {
+        die("Trying to register API interception table for HIP runtime: "
+            "NOT IMPLEMENTED");
+    }
+
+    static auto cfg = rocprofiler_tool_configure_result_t {
+        sizeof(rocprofiler_tool_configure_result_t),
+        nullptr,
+        nullptr,
+        nullptr
+    };
+
+    return &cfg;
+}
diff --git a/src/lttng-ust-roctx-impl.c b/src/lttng-ust-roctx-impl.c
new file mode 100644 (file)
index 0000000..5fca996
--- /dev/null
@@ -0,0 +1,4 @@
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+#include "lttng-roctx.h"
+
diff --git a/src/lttng-ust-roctx.h b/src/lttng-ust-roctx.h
new file mode 100644 (file)
index 0000000..46f3222
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2023 AMD
+ *
+ * All Rights Reserved
+ */
+
+#undef LTTNG_UST_TRACEPOINT_PROVIDER
+#define LTTNG_UST_TRACEPOINT_PROVIDER roctx
+
+#undef LTTNG_UST_TRACEPOINT_INCLUDE
+#define LTTNG_UST_TRACEPOINT_INCLUDE "lttng-ust-roctx.h"
+
+#if !defined(LTTNG_TRACEPOINT_ROCTX_H) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
+#define LTTNG_TRACEPOINT_ROCTX_H
+
+#include <lttng/tracepoint.h>
+
+LTTNG_UST_TRACEPOINT_EVENT(
+    roctx,
+    mark,
+    LTTNG_UST_TP_ARGS(
+        const char *, message
+    ),
+    LTTNG_UST_TP_FIELDS(
+           lttng_ust_field_string(message, message)
+    )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+    roctx,
+    range_push,
+    LTTNG_UST_TP_ARGS(
+           const char *, message,
+           int, level
+    ),
+    LTTNG_UST_TP_FIELDS(
+           lttng_ust_field_string(message, message)
+           lttng_ust_field_integer(uint64_t, level, level)
+    )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+    roctx,
+    range_pop,
+    LTTNG_UST_TP_ARGS(
+           int, level
+    ),
+    LTTNG_UST_TP_FIELDS(
+           lttng_ust_field_integer(uint64_t, level, level)
+    )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+    roctx,
+    range_start,
+    LTTNG_UST_TP_ARGS(
+           const char *, message,
+           uint64_t, id
+    ),
+    LTTNG_UST_TP_FIELDS(
+           lttng_ust_field_string(message, message)
+           lttng_ust_field_integer_hex(uint64_t, id, id)
+    )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+    roctx,
+    range_stop,
+    LTTNG_UST_TP_ARGS(
+           uint64_t, id
+    ),
+    LTTNG_UST_TP_FIELDS(
+           lttng_ust_field_integer_hex(uint64_t, id, id)
+    )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+    roctx,
+    get_thread_id,
+    LTTNG_UST_TP_ARGS(
+           uint64_t, tid
+    ),
+    LTTNG_UST_TP_FIELDS(
+           lttng_ust_field_integer_hex(uint64_t, roctx_tid, tid)
+    )
+)
+
+#endif /* LTTNG_TRACEPOINT_ROCTX_H */
+
+#include <lttng/tracepoint-event.h>
diff --git a/tests/hello.cpp b/tests/hello.cpp
new file mode 100644 (file)
index 0000000..0e6956e
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+  Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+#include <rocprofiler-sdk-roctx/roctx.h>
+#include <hip/hip_runtime.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+#include <string>
+#include <fstream>
+
+#define SAMPLE_VERSION "HIP-Examples-Application-v1.0"
+#define SUCCESS 0
+#define FAILURE 1
+
+using namespace std;
+
+__global__ void helloworld(char* in, char* out)
+{
+    int num = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x;
+    out[num] = in[num] + 1;
+}
+
+int main(int argc, char* argv[])
+{
+
+    hipDeviceProp_t devProp;
+
+    roctx_range_id_t range = roctxRangeStart("hello");
+    
+    hipGetDeviceProperties(&devProp, 0);
+    cout << " System minor " << devProp.minor << endl;
+    cout << " System major " << devProp.major << endl;
+    cout << " agent prop name " << devProp.name << endl;
+
+    /* Initial input,output for the host and create memory objects for the kernel*/
+    const char* input = "GdkknVnqkc";
+    size_t strlength = strlen(input);
+    cout << "input string:" << endl;
+    cout << input << endl;
+    char *output = (char*) malloc(strlength + 1);
+
+    char* inputBuffer;
+    char* outputBuffer;
+    hipMalloc((void**)&inputBuffer, (strlength + 1) * sizeof(char));
+    hipMalloc((void**)&outputBuffer, (strlength + 1) * sizeof(char));
+
+    hipMemcpy(inputBuffer, input, (strlength + 1) * sizeof(char), hipMemcpyHostToDevice);
+
+    hipLaunchKernelGGL(helloworld,
+                       dim3(1),
+                       dim3(strlength),
+                       0, 0,
+                       inputBuffer ,outputBuffer );
+
+    hipMemcpy(output, outputBuffer,(strlength + 1) * sizeof(char), hipMemcpyDeviceToHost);
+
+    hipFree(inputBuffer);
+    hipFree(outputBuffer);
+
+    output[strlength] = '\0';  //Add the terminal character to the end of output.
+    cout << "\noutput string:" << endl;
+    cout << output << endl;
+
+    free(output);
+
+    std::cout<<"Passed!\n";
+    roctxRangeStop(range);
+    return SUCCESS;
+}
This page took 0.055523 seconds and 4 git commands to generate.