--- /dev/null
+*.tar.gz
+build
\ No newline at end of file
--- /dev/null
+# Configure this.
+ROCM_VERSION?=6.1.0-1388
+ROCM?=/opt/rocm-$(ROCM_VERSION)
+VENDOR?=AMD
+CXX?=g++
+builddir?=$(CURDIR)/build
+
+# Do not touch below.
+HIPCC=$(ROCM)/bin/hipcc
+PLATFORM=__HIP_PLATFORM_$(VENDOR)__
+
+# LTTng-UST
+LTTNG_UST_CFLAGS=$(shell pkg-config --cflags lttng-ust)
+LTTNG_UST_LIBS=$(shell pkg-config --libs lttng-ust)
+
+# Rocprofiler-sdk
+ROCPROFILER_SDK_CFLAGS=-I $(ROCM)/include -L $(ROCM)/lib -Wl,-rpath=$(ROCM)/lib
+ROCPROFILER_SDK_LIBS=-lrocprofiler-sdk
+
+# Concat dependencies.
+DEPS_CFLAGS=$(LTTNG_UST_CFLAGS) $(ROCPROFILER_SDK_CFLAGS)
+DEPS_LIBS=$(LTTNG_UST_LIBS) $(ROCPROFILER_SDK_LIBS)
+
+
+# HIP stuff.
+AUTOGEN_HIP_API=$(builddir)/lttng-ust-hip-defs.h \
+ $(builddir)/lttng-ust-hip.h \
+ $(builddir)/lttng-ust-hip-classes.h \
+ $(builddir)/lttng-ust-hip-impl.c \
+ $(builddir)/lttng-ust-hip-states.h
+
+HIP_HEADER=$(ROCM)/include/hip/hip_runtime.h
+
+# HSA stuff.
+AUTOGEN_HSA_API=$(builddir)/lttng-ust-hsa-defs.h \
+ $(builddir)/lttng-ust-hsa.h \
+ $(builddir)/lttng-ust-hsa-classes.h \
+ $(builddir)/lttng-ust-hsa-impl.c \
+ $(builddir)/lttng-ust-hsa-states.h
+
+HSA_HEADER=$(ROCM)/include/hsa/hsa.h
+
+# Final target.
+TARGET=$(builddir)/libexatracer.so
+
+# Toolchain flags.
+CFLAGS=-I $(builddir) -D $(PLATFORM) -O2 -g -fmax-errors=1 -fvisibility=hidden -Wextra -Wno-deprecated-declarations
+LDFLAGS=-shared -fPIC
+
+all: $(builddir) $(TARGET)
+
+$(builddir):
+ mkdir -p $@
+
+# Do not add *-wrappers.cpp to toolchain inputs.
+$(TARGET): src/lttng-ust-exatracer.cpp src/lttng-ust-roctx-impl.c $(builddir)/lttng-ust-hsa-impl.c $(builddir)/lttng-ust-hip-impl.c
+ $(CXX) -I src $(CFLAGS) $(DEPS_CFLAGS) $(LDFLAGS) -o $@ $^ $(DEPS_LIBS)
+
+src/lttng-ust-exatracer.cpp: $(builddir)/lttng-ust-hsa-wrappers.cpp $(builddir)/lttng-ust-hip-wrappers.cpp
+
+# HIP rules.
+$(builddir)/lttng-ust-hip-wrappers.cpp: $(AUTOGEN_HIP_API) scripts/gen-hip-wrappers
+ scripts/gen-hip-wrappers -D $(PLATFORM) -I $(ROCM)/include --ignore=src/hip-ignores.txt $(HIP_HEADER) $@
+ sed -i -f scripts/lttng-ust-hip-post-processing.sed $@
+
+$(AUTOGEN_HIP_API) &: $(HIP_HEADER) scripts/lttng-ust-auto-api
+ scripts/lttng-ust-auto-api --ignores=src/hip-ignores.txt -D $(PLATFORM) -I $(ROCM)/include --namespace=lttng_hip --provider=hip --common-prefix=hip --classes-guard=LTTNG_HIP_TRACEPOINT_CLASSES_HPP --tp-guard=LTTNG_HIP_TRACEPOINT_DEF_H $< $(AUTOGEN_HIP_API)
+ sed -i -f scripts/lttng-ust-hip-post-processing.sed $(AUTOGEN_HIP_API)
+
+# HSA rules.
+$(builddir)/lttng-ust-hsa-wrappers.cpp: $(AUTOGEN_HSA_API) scripts/gen-hsa-wrappers
+ scripts/gen-hsa-wrappers -D $(PLATFORM) -I $(ROCM)/include --ignore=src/hsa-ignores.txt $(HSA_HEADER) $@
+ sed -i -f scripts/lttng-ust-hsa-post-processing.sed $@
+
+$(AUTOGEN_HSA_API) &: $(HSA_HEADER) scripts/lttng-ust-auto-api
+ scripts/lttng-ust-auto-api --ignores=src/hsa-ignores.txt -D $(PLATFORM) -I $(ROCM)/include --namespace=lttng_hsa --provider=hsa --common-prefix=hsa --classes-guard=LTTNG_HSA_TRACEPOINT_CLASSES_HPP --tp-guard=LTTNG_HSA_TRACEPOINT_DEF_H $< $(AUTOGEN_HSA_API)
+ sed -i -f scripts/lttng-ust-hsa-post-processing.sed $(AUTOGEN_HSA_API)
+
+# Testing.
+$(builddir)/hello: tests/hello.cpp
+ $(HIPCC) -lrocprofiler-sdk-roctx -Wno-unused -o $@ $^
+
+check: $(builddir)/hello
+ scripts/check $(TARGET) $^
+
+clean:
+ rm -rf ./build
+ rm -rf ./traces
+
+dist:
+ git archive --prefix extracer/ --format=tar.gz --output exatracer.tar.gz HEAD
+
+.PHONY: all clean dist
--- /dev/null
+#!/bin/sh
+#
+# SPDX-License-Identifier: MIT
+#
+# Copyright (c) 2024 Olivier Dion <odion@efficios.com>
+
+TRACE_OUTPUT=traces
+
+lttng create --output $TRACE_OUTPUT
+
+# Enable all HSA, HIP and roctx event types.
+lttng enable-event --userspace 'hsa:*'
+lttng enable-event --userspace 'hip:*'
+lttng enable-event --userspace 'roctx:*'
+
+lttng start
+
+LD_PRELOAD=$1 $2
+
+lttng destroy
+
+babeltrace2 $TRACE_OUTPUT
--- /dev/null
+#!/usr/bin/env python3
+#
+# Copyright (c) 2023 EfficiOS, Inc.
+#
+# Author: Olivier Dion <odion@efficios.com>
+#
+# Auto-generate lttng-ust tracepoints for HIP.
+#
+# Require: python-clang (libclang)
+
+import argparse
+import re
+import subprocess
+
+from string import Template
+
+import clang.cindex
+
+def list_function_declarations(root):
+ return [
+ child
+ for child in root.get_children()
+ if child.kind == clang.cindex.CursorKind.FUNCTION_DECL
+ ]
+
+def get_system_include_paths():
+
+ clang_args = ["clang", "-v", "-c", "-xc", "-o", "/dev/null", "/dev/null"]
+ paths = []
+
+ with subprocess.Popen(clang_args, stderr=subprocess.PIPE, text=True) as proc:
+ start_sys_search = False
+ for line in proc.stderr:
+ if start_sys_search:
+ if line == "End of search list.\n":
+ break
+ paths.append("-isystem")
+ paths.append(line.strip())
+ elif line == "#include <...> search starts here:\n":
+ start_sys_search = True
+
+ return paths
+
+def parse_header(header_file, includes, defines):
+
+ args = get_system_include_paths()
+
+ if includes:
+ for inc in includes:
+ args.append("-I")
+ args.append(inc)
+
+ if defines:
+ for d in defines:
+ args.append("-D")
+ args.append(d)
+
+ tu = clang.cindex.Index.create().parse(header_file, args=args)
+
+ for d in tu.diagnostics:
+ print(f"WARNING: {d}")
+
+ return tu.cursor
+
+def list_functions(root):
+ return [
+ fn
+ for fn in list_function_declarations(root)
+ if fn.spelling.startswith("hip") and fn.spelling
+ ]
+
+def exact_definition(arg):
+
+ ct = arg.type.get_canonical()
+ if ct.kind == clang.cindex.TypeKind.POINTER:
+ pt = ct.get_pointee()
+ if pt.kind == clang.cindex.TypeKind.FUNCTIONPROTO:
+ ret_type = pt.get_result().spelling
+ argument_types = ", ".join([a.spelling for a in pt.argument_types()])
+ return f"{ret_type} (*{arg.spelling})({argument_types})"
+ m = re.search(r'(\[[0-9]*\])+', arg.type.spelling)
+ if m:
+ return f"{arg.type.spelling[:m.start(0)]} {arg.spelling}{m.group(0)}"
+ else:
+ return f"{arg.type.spelling} {arg.spelling}"
+
+def cast(arg):
+ canon = arg.type.get_canonical()
+ if canon.kind == clang.cindex.TypeKind.POINTER:
+ return "void *"
+ return re.sub(r'\[[0-9]*\]', '*', canon.spelling)
+
+forbiden_list = set()
+
+extra_works = {
+}
+
+def main():
+
+ parser = argparse.ArgumentParser(prog="gen-hip-wrappers")
+
+ parser.add_argument("api",
+ help="HIP API header")
+
+ parser.add_argument("wrappers",
+ help="Path to HIP wrappers")
+
+ parser.add_argument("--ignores",
+ dest="ignores",
+ metavar="FILE",
+ default=None,
+ help="Ignore list")
+
+ parser.add_argument("-I",
+ action="append",
+ metavar="DIR",
+ dest="includes",
+ help="Add DIR to list of directories to include")
+
+ parser.add_argument("-D",
+ action="append",
+ metavar="DEFINITION",
+ dest="defines",
+ help="Add DEFINITION to list of definitions")
+
+ args = parser.parse_args()
+
+ if args.ignores:
+ with open(args.ignores, "r") as f:
+ for ignore in f.read().splitlines():
+ forbiden_list.add(ignore)
+
+ prologue_tpl = Template("""/* Auto-generated */
+#include "lttng-ust-hip-states.h"
+""")
+
+ ret_fn_tpl = Template("""
+static ${ret_type} lttng_${fn_name}(${fn_arguments})
+{
+ ${ret_type} ret;
+ {
+ lttng_hip::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+ ret = next_hip_table.${fn_name}_fn(${fn_pass_argument_names});
+ lttng_api_object.mark_return(ret);
+ }
+$extra_work
+ return ret;
+}
+""")
+
+ void_fn_tpl = Template("""
+static void lttng_${fn_name}(${fn_arguments})
+{
+ {
+ lttng_hip::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+ next_hip_table.${fn_name}_fn(${fn_pass_argument_names});
+ lttng_api_object.mark_return();
+ }
+$extra_work
+}
+""")
+
+ epilogue_tpl = Template("""
+static void lttng_hip_install_wrappers(void)
+{
+ ${wrappers}
+}
+""")
+
+ functions = list_functions(parse_header(args.api,
+ args.includes,
+ args.defines))
+
+ with open(args.wrappers, "w") as output:
+
+ output.write(prologue_tpl.substitute())
+
+ for fn in functions:
+
+ if fn.spelling in forbiden_list:
+ continue
+
+ args = list(fn.get_arguments())
+ fn_pass_argument_names = ", ".join([
+ f"{arg.spelling}"
+ for arg in args
+ ])
+
+ if args:
+ fn_rest_argument_names = ", ".join([
+ "(%s)%s" % (cast(arg), arg.spelling)
+ for arg in args
+ ])
+ else:
+ fn_rest_argument_names=""
+
+ if fn.spelling in extra_works:
+ extra_work = extra_works[fn.spelling]
+ else:
+ extra_work = ""
+
+ if "void"== fn.type.get_result().spelling:
+ fn_tpl = void_fn_tpl
+ else:
+ fn_tpl = ret_fn_tpl
+
+ output.write(fn_tpl.substitute(fn_name=fn.spelling,
+ fn_arguments=", ".join([
+ exact_definition(arg)
+ for arg in fn.get_arguments()
+ ]),
+ fn_pass_argument_names=fn_pass_argument_names,
+ fn_rest_argument_names=fn_rest_argument_names,
+ ret_type=fn.type.get_result().spelling,
+ extra_work=extra_work))
+
+ output.write(epilogue_tpl.substitute(wrappers="\n ".join([
+ f"lttng_hip_table.{fn.spelling}_fn = <tng_{fn.spelling};"
+ for fn in functions if fn.spelling not in forbiden_list
+ ])))
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+#!/usr/bin/env python3
+#
+# Copyright (c) 2023 EfficiOS, Inc.
+#
+# Author: Olivier Dion <odion@efficios.com>
+#
+# Auto-generate lttng-ust tracepoints for HSA.
+#
+# Require: python-clang (libclang)
+
+import argparse
+import re
+import subprocess
+
+from string import Template
+
+import clang.cindex
+
+def list_function_declarations(root):
+ return [
+ child
+ for child in root.get_children()
+ if child.kind == clang.cindex.CursorKind.FUNCTION_DECL
+ ]
+
+def get_system_include_paths():
+
+ clang_args = ["clang", "-v", "-c", "-xc", "-o", "/dev/null", "/dev/null"]
+ paths = []
+
+ with subprocess.Popen(clang_args, stderr=subprocess.PIPE, text=True) as proc:
+ start_sys_search = False
+ for line in proc.stderr:
+ if start_sys_search:
+ if line == "End of search list.\n":
+ break
+ paths.append("-isystem")
+ paths.append(line.strip())
+ elif line == "#include <...> search starts here:\n":
+ start_sys_search = True
+
+ return paths
+
+def parse_header(header_file, includes, defines):
+
+ args = get_system_include_paths()
+
+ if includes:
+ for inc in includes:
+ args.append("-I")
+ args.append(inc)
+
+ if defines:
+ for d in defines:
+ args.append("-D")
+ args.append(d)
+
+ tu = clang.cindex.Index.create().parse(header_file, args=args)
+
+ for d in tu.diagnostics:
+ print(f"WARNING: {d}")
+
+ return tu.cursor
+
+def list_functions(root):
+ return [
+ fn
+ for fn in list_function_declarations(root)
+ if fn.spelling.startswith("hsa") and fn.spelling
+ ]
+
+def exact_definition(arg):
+
+ ct = arg.type.get_canonical()
+ if ct.kind == clang.cindex.TypeKind.POINTER:
+ pt = ct.get_pointee()
+ if pt.kind == clang.cindex.TypeKind.FUNCTIONPROTO:
+ ret_type = pt.get_result().spelling
+ argument_types = ", ".join([a.spelling for a in pt.argument_types()])
+ return f"{ret_type} (*{arg.spelling})({argument_types})"
+ m = re.search(r'(\[[0-9]*\])+', arg.type.spelling)
+ if m:
+ return f"{arg.type.spelling[:m.start(0)]} {arg.spelling}{m.group(0)}"
+ else:
+ return f"{arg.type.spelling} {arg.spelling}"
+
+def cast(arg):
+ canon = arg.type.get_canonical()
+ if canon.kind == clang.cindex.TypeKind.POINTER:
+ return "void *"
+ return re.sub(r'\[[0-9]*\]', '*', canon.spelling)
+
+forbiden_list = set()
+
+extra_works = {
+}
+
+def main():
+
+ parser = argparse.ArgumentParser(prog="gen-hsa-wrappers")
+
+ parser.add_argument("api",
+ help="HSA API header")
+
+ parser.add_argument("wrappers",
+ help="Path to HSA wrappers")
+
+ parser.add_argument("--ignores",
+ dest="ignores",
+ metavar="FILE",
+ default=None,
+ help="Ignore list")
+
+ parser.add_argument("-I",
+ action="append",
+ metavar="DIR",
+ dest="includes",
+ help="Add DIR to list of directories to include")
+
+ parser.add_argument("-D",
+ action="append",
+ metavar="DEFINITION",
+ dest="defines",
+ help="Add DEFINITION to list of definitions")
+
+ args = parser.parse_args()
+
+ if args.ignores:
+ with open(args.ignores, "r") as f:
+ for ignore in f.read().splitlines():
+ forbiden_list.add(ignore)
+
+ prologue_tpl = Template("""/* Auto-generated */
+#include "lttng-ust-hsa-states.h"
+""")
+
+ ret_fn_tpl = Template("""
+static ${ret_type} lttng_${fn_name}(${fn_arguments})
+{
+ ${ret_type} ret;
+ {
+ lttng_hsa::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+ ret = next_hsa_core_table.${fn_name}_fn(${fn_pass_argument_names});
+ lttng_api_object.mark_return(ret);
+ }
+$extra_work
+ return ret;
+}
+""")
+
+ void_fn_tpl = Template("""
+static void lttng_${fn_name}(${fn_arguments})
+{
+ {
+ lttng_hsa::api_object_${fn_name} lttng_api_object {${fn_rest_argument_names}};
+ next_hsa_core_table.${fn_name}_fn(${fn_pass_argument_names});
+ lttng_api_object.mark_return();
+ }
+$extra_work
+}
+""")
+
+ epilogue_tpl = Template("""
+static void lttng_hsa_install_wrappers(void)
+{
+ ${wrappers}
+}
+""")
+
+ functions = list_functions(parse_header(args.api,
+ args.includes,
+ args.defines))
+
+ with open(args.wrappers, "w") as output:
+
+ output.write(prologue_tpl.substitute())
+
+ for fn in functions:
+
+ if fn.spelling in forbiden_list:
+ continue
+
+ args = list(fn.get_arguments())
+ fn_pass_argument_names = ", ".join([
+ f"{arg.spelling}"
+ for arg in args
+ ])
+
+ if args:
+ fn_rest_argument_names = ", ".join([
+ "(%s)%s" % (cast(arg), arg.spelling)
+ for arg in args
+ ])
+ else:
+ fn_rest_argument_names=""
+
+ if fn.spelling in extra_works:
+ extra_work = extra_works[fn.spelling]
+ else:
+ extra_work = ""
+
+ if "void"== fn.type.get_result().spelling:
+ fn_tpl = void_fn_tpl
+ else:
+ fn_tpl = ret_fn_tpl
+
+ output.write(fn_tpl.substitute(fn_name=fn.spelling,
+ fn_arguments=", ".join([
+ exact_definition(arg)
+ for arg in fn.get_arguments()
+ ]),
+ fn_pass_argument_names=fn_pass_argument_names,
+ fn_rest_argument_names=fn_rest_argument_names,
+ ret_type=fn.type.get_result().spelling,
+ extra_work=extra_work))
+
+ output.write(epilogue_tpl.substitute(wrappers="\n ".join([
+ f"lttng_hsa_core_table.{fn.spelling}_fn = <tng_{fn.spelling};"
+ for fn in functions if fn.spelling not in forbiden_list
+ ])))
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: MIT
+#
+# Copyright (c) 2023 EfficiOS, Inc.
+#
+# Author: Olivier Dion <odion@efficios.com>
+#
+# Auto-generate lttng-ust tracepoints for OpenMPI.
+#
+# Require: python-clang (libclang)
+
+import argparse
+import re
+import os
+import subprocess
+
+from string import Template
+
+import clang.cindex
+
+COMMON_PREFIX = None
+IGNORE = set()
+PROVIDER = None
+
+# LTTNG_UST_TP_ARGS is limited to 10 arguments. Since we introduce two
+# arguments of our own (thread-id and local-id), the maximum is 8.
+#
+# If a function has more arguments than this limit, all arguments -- at the
+# exception of the IDs -- will be passed through a data structure instead.
+MAX_TP_ARGS_COUNT = 8
+
+class EnumValue:
+
+ def __init__(self, ev):
+ self.name = ev.spelling
+ self.value = ev.enum_value
+
+class EnumType:
+
+ def __init__(self, en, name=None):
+ self.name = name or en.spelling
+ self.values = [EnumValue(ev) for ev in en.get_children()]
+ self.en = en
+
+class Typedef:
+
+ def __init__(self, spelling, value):
+ self.spelling = spelling
+ self.value = value
+
+class ArgumentType:
+
+ integer_set = {
+ clang.cindex.TypeKind.UCHAR,
+ clang.cindex.TypeKind.USHORT,
+ clang.cindex.TypeKind.UINT,
+ clang.cindex.TypeKind.ULONG,
+ clang.cindex.TypeKind.ULONGLONG,
+ clang.cindex.TypeKind.SHORT,
+ clang.cindex.TypeKind.INT,
+ clang.cindex.TypeKind.LONG,
+ clang.cindex.TypeKind.LONGLONG,
+ }
+
+ float_set = {
+ clang.cindex.TypeKind.FLOAT,
+ clang.cindex.TypeKind.DOUBLE,
+ }
+
+ address_set = {
+ clang.cindex.TypeKind.POINTER,
+ clang.cindex.TypeKind.INCOMPLETEARRAY,
+ }
+
+ def __init__(self, arg, name_prefix="", expr_prefix=""):
+ self.type = arg.type
+ self.arg = arg
+ self.const = ""
+ self.name_prefix = name_prefix
+ self.expr_prefix = expr_prefix
+
+ if self.kind() == clang.cindex.TypeKind.POINTER:
+ if self.type.get_pointee().is_const_qualified():
+ self.const = "const "
+ elif self.type.is_const_qualified():
+ self.const = "const "
+
+ def name(self):
+ return self.arg.spelling
+
+ def type_name(self):
+ if self.kind() == clang.cindex.TypeKind.INCOMPLETEARRAY:
+ return self.const + re.sub(r"\[[0-9]*\]", "*", self.type.spelling)
+ if self.kind() == clang.cindex.TypeKind.POINTER:
+ return f"{self.const}void *"
+ return self.const + self.type.spelling
+
+ def kind(self):
+ return self.type.get_canonical().kind
+
+ def to_lttng_field(self):
+ if self.name() == "reserved":
+ return ""
+ elif self.kind() in ArgumentType.address_set:
+ return f"lttng_ust_field_integer_hex(uintptr_t, {self.name_prefix}{self.name()}, (uintptr_t){self.expr_prefix}{self.name()})"
+ elif self.kind() in ArgumentType.integer_set:
+ return f"lttng_ust_field_integer({self.type_name()}, {self.name_prefix}{self.name()}, {self.expr_prefix}{self.name()})"
+ elif self.kind() in ArgumentType.float_set:
+ return f"lttng_ust_field_float({self.type_name()}, {self.name_prefix}{self.name()}, {self.expr_prefix}{self.name()})"
+ elif self.kind() == clang.cindex.TypeKind.ENUM:
+ enum_name = self.type_name().removeprefix("enum ")
+ return f"lttng_ust_field_enum({PROVIDER}, {enum_name}, int, {self.name_prefix}{self.name()}, {self.expr_prefix}{self.name()})"
+ elif self.kind() == clang.cindex.TypeKind.RECORD:
+ return [
+ ArgumentType(field, f"{self.name()}_", f"{self.expr_prefix}{self.name()}.").to_lttng_field()
+ for field in self.type.get_canonical().get_fields()
+ ]
+ else:
+ raise Exception("Unsupported kind: %s" % self.kind())
+
+class FunctionType:
+
+ struct_tpl = Template("""
+$name {
+ $fields
+};
+""")
+
+ def __init__(self, fn):
+ self.name = fn.spelling
+ self.args = [ArgumentType(arg) for arg in fn.get_arguments()]
+ self.fn = fn
+
+ def tp_args(self):
+ if len(self.args) == 0:
+ return ""
+ elif len(self.args) > MAX_TP_ARGS_COUNT:
+ return ",\n " + f"{self.arguments_struct_name()} *, lttng_args"
+ else:
+ return ",\n " + ",\n ".join([f"{arg.type_name()}, {arg.name()}"
+ for arg in self.args])
+
+ def tp_fields(self):
+ if len(self.args) == 0:
+ return ""
+ elif len(self.args) > MAX_TP_ARGS_COUNT:
+ packed_args = [ArgumentType(arg.arg, "", "lttng_args->") for arg in self.args]
+ return "\n ".join(flatten([arg.to_lttng_field()
+ for arg in packed_args]))
+ else:
+ return "\n ".join(flatten([arg.to_lttng_field()
+ for arg in self.args]))
+ def get_return_type_name(self):
+ return self.fn.type.get_result().spelling
+
+ def ctor_params(self):
+ if len(self.args) == 0:
+ return ""
+ elif len(self.args) > MAX_TP_ARGS_COUNT:
+ return ", <tng_args"
+ else:
+ return ", " + ", ".join(arg.name() for arg in self.args)
+
+ def arguments_struct_variable(self):
+ if len(self.args) > MAX_TP_ARGS_COUNT:
+ return "%s lttng_args = {%s};" % (self.arguments_struct_name(),
+ ", ".join([arg.name() for arg in self.args]))
+ else:
+ return f"/* {self.arguments_struct_name()} lttng_args */"
+
+
+ def arguments_struct_name(self):
+ return f"struct lttng_arguments_of_{self.name}"
+
+ def arguments_struct(self):
+ if len(self.args) > MAX_TP_ARGS_COUNT:
+ return self.struct_tpl.substitute(name=self.arguments_struct_name(),
+ fields="\n ".join([
+ f"{arg.type_name()} {arg.name()};"
+ for arg in self.args
+ ]))
+ else:
+ return ""
+
+def flatten(lst):
+ new_lst = []
+ for e in lst:
+ if isinstance(e, list):
+ for e in flatten(e):
+ new_lst.append(e)
+ else:
+ new_lst.append(e)
+ return new_lst
+
+def list_function_declarations(root):
+ return [ child
+ for child in root.get_children()
+ if child.kind == clang.cindex.CursorKind.FUNCTION_DECL ]
+
+def list_enum_declarations(root):
+ return [
+ child
+ for child in root.get_children()
+ if child.kind == clang.cindex.CursorKind.ENUM_DECL
+ ]
+
+def list_typedef_enums(root):
+ enums = []
+ for child in root.get_children():
+ if child.kind == clang.cindex.CursorKind.TYPEDEF_DECL:
+ maybe_enum = child.underlying_typedef_type.get_declaration()
+ if maybe_enum.kind == clang.cindex.CursorKind.ENUM_DECL:
+ enums.append(Typedef(child.spelling, maybe_enum))
+ return enums
+
+def search_header_in(name, paths):
+ for path in paths.split(":"):
+ for dirpath, _, files in os.walk(path, followlinks=True):
+ for file in files:
+ if file == name:
+ return os.path.join(dirpath, file)
+ return None
+
+def search_c_header(name):
+ return search_header_in(name, os.environ["C_INCLUDE_PATH"])
+
+def search_cxx_header(name):
+ return search_header_in(name, os.environ["CPLUS_INCLUDE_PATH"])
+
+def get_system_include_paths():
+
+ clang_args = ["clang", "-v", "-c", "-xc", "/dev/null", "-o", "/dev/null"]
+ paths = []
+
+ with subprocess.Popen(clang_args, stderr=subprocess.PIPE,
+ encoding="ascii") as proc:
+ start_sys_search = False
+ for line in proc.stderr:
+ if start_sys_search:
+ if line == "End of search list.\n":
+ break
+ paths.append("-isystem")
+ paths.append(line.strip())
+ elif line == "#include <...> search starts here:\n":
+ start_sys_search = True
+
+ return paths
+
+def parse_header(header_file, includes, defines,
+ required_c_headers, required_cxx_headers):
+
+ args = get_system_include_paths()
+
+ if includes:
+ for inc in includes:
+ args.append("-I")
+ args.append(inc)
+
+ if defines:
+ for d in defines:
+ args.append("-D")
+ args.append(d)
+
+ for header in required_c_headers:
+ found = search_c_header(header)
+ if found:
+ args.append("-I")
+ args.append(os.path.dirname(found))
+
+ for header in required_cxx_headers:
+ found = search_cxx_header(header)
+ if found:
+ args.append("-I")
+ args.append(os.path.dirname(found))
+
+ tu = clang.cindex.Index.create().parse(header_file, args=args)
+
+ for d in tu.diagnostics:
+ print(f"WARNING: {d}")
+
+ return tu.cursor
+
+def list_functions(root):
+ return [
+ FunctionType(fn)
+ for fn in list_function_declarations(root)
+ if fn.spelling.startswith(COMMON_PREFIX) and fn.spelling not in IGNORE
+ ]
+
+def list_enums(root):
+
+ enums = [
+ en
+ for en in list_enum_declarations(root)
+ if en.spelling.startswith(COMMON_PREFIX) and en.spelling not in IGNORE
+ ]
+
+ typedef_enums = [
+ typedef
+ for typedef in list_typedef_enums(root)
+ if typedef.spelling.startswith(COMMON_PREFIX) and
+ typedef.spelling not in IGNORE and
+ typedef.value.get_definition() not in enums
+ ]
+
+ all_enums = ([ EnumType(e) for e in enums ] +
+ [ EnumType(td.value, td.spelling) for td in typedef_enums])
+
+ return all_enums
+
+def generate_tracepoint_definitions(function_declarations, enum_declarations,
+ api_file, output_defs, output_interface,
+ header_guard):
+ defs_tpl = Template("""/* Auto-generated file! */
+#undef LTTNG_UST_TRACEPOINT_PROVIDER
+#define LTTNG_UST_TRACEPOINT_PROVIDER $provider
+
+#undef LTTNG_UST_TRACEPOINT_INCLUDE
+#define LTTNG_UST_TRACEPOINT_INCLUDE "$output_defs"
+
+#if !defined($header_guard)
+#include <$api_file>
+$pass_by_struct
+#endif
+
+#if !defined($header_guard) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
+#define $header_guard
+
+#include <lttng/tracepoint.h>
+
+$enum_definitions
+$tracepoint_definitions
+
+#endif /* $header_guard */
+
+#include <lttng/tracepoint-event.h>
+""")
+
+ interface_tpl = Template("""/* Auto-generated file! */
+#ifndef ${header_guard}_IMPL
+#define ${header_guard}_IMPL
+
+#include "${output_defs}"
+
+#endif /* ${header_guard}_IMPL */
+""")
+
+ tp_tpl = Template("""
+LTTNG_UST_TRACEPOINT_EVENT(
+ $provider,
+ ${name}_entry,
+ LTTNG_UST_TP_ARGS(
+ uint64_t, lttng_thread_id,
+ uint64_t, lttng_local_id$tp_args
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer(uint64_t, lttng_thread_id, lttng_thread_id)
+ lttng_ust_field_integer(uint64_t, lttng_local_id, lttng_local_id)
+ $tp_fields
+ )
+)
+""")
+
+ tp_ret_tpl = Template("""
+LTTNG_UST_TRACEPOINT_EVENT(
+ $provider,
+ ${name}_exit,
+ LTTNG_UST_TP_ARGS(
+ uint64_t, lttng_thread_id,
+ uint64_t, lttng_local_id,
+ int, lttng_has_ret,
+ $ret_type, lttng_ret
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer(uint64_t, lttng_thread_id, lttng_thread_id)
+ lttng_ust_field_integer(uint64_t, lttng_local_id, lttng_local_id)
+ lttng_ust_field_integer(int, lttng_has_ret, lttng_has_ret)
+ lttng_ust_field_integer($ret_type, lttng_ret, lttng_ret)
+ )
+)
+""")
+
+ tp_void_tpl = Template("""
+LTTNG_UST_TRACEPOINT_EVENT(
+ $provider,
+ ${name}_exit,
+ LTTNG_UST_TP_ARGS(
+ uint64_t, lttng_thread_id,
+ uint64_t, lttng_local_id,
+ int, lttng_has_ret
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer(uint64_t, lttng_thread_id, lttng_thread_id)
+ lttng_ust_field_integer(uint64_t, lttng_local_id, lttng_local_id)
+ lttng_ust_field_integer(int, lttng_has_ret, lttng_has_ret)
+ )
+)
+""")
+ enum_tpl = Template("""
+LTTNG_UST_TRACEPOINT_ENUM($provider, $name,
+ LTTNG_UST_TP_ENUM_VALUES(
+ $values
+ )
+)
+""")
+ with open(output_defs, "w") as output:
+ definitions = []
+ for fn in function_declarations:
+ ret_type = fn.get_return_type_name()
+ definitions.append(tp_tpl.substitute(provider=PROVIDER,
+ name=fn.name,
+ tp_args=fn.tp_args(),
+ tp_fields=fn.tp_fields()))
+ if ret_type == "void":
+ tpl = tp_void_tpl
+ else:
+ tpl = tp_ret_tpl
+
+ definitions.append(tpl.substitute(provider=PROVIDER,
+ name=fn.name,
+ ret_type=ret_type))
+
+ tracepoint_definitions = "\n".join(definitions)
+
+ enum_definitions = "\n".join([
+ enum_tpl.substitute(provider=PROVIDER,
+ name=en.name,
+ values="\n ".join([f'lttng_ust_field_enum_value("{ev.name}", {ev.value})'
+ for ev in en.values]))
+ for en in enum_declarations
+ ])
+
+ output.write(defs_tpl.substitute(provider=PROVIDER,
+ output_defs=output_defs,
+ header_guard=header_guard,
+ tracepoint_definitions=tracepoint_definitions,
+ enum_definitions=enum_definitions,
+ api_file=api_file,
+ pass_by_struct="".join([fn.arguments_struct()
+ for fn in function_declarations])))
+ with open(output_interface, "w") as output:
+ output.write(interface_tpl.substitute(header_guard=header_guard,
+ output_defs=output_defs,))
+
+def generate_tracepoint_classes(function_declarations, api_file, output_path, header_guard, namespace):
+ global_tpl = Template("""/* Auto-generated file! */
+#include <atomic>
+#include <cstdint>
+#include <$api_file>
+namespace $namespace {
+ struct unique_id {
+ uint64_t thread_id;
+ uint64_t local_id;
+ };
+
+ class id_generator {
+ static std::atomic<uint64_t> _thread_counter;
+ uint64_t _thread_id;
+ uint64_t _local_id;
+ public:
+ id_generator() {
+ _thread_id = _thread_counter++;
+ _local_id = 0;
+ }
+
+ unique_id next_id() {
+ return {
+ .thread_id = _thread_id,
+ .local_id = _local_id++,
+ };
+ }
+ };
+
+ extern thread_local id_generator generator;
+
+ template<typename RetType>
+ class base_api_object {
+ protected:
+ unique_id _id;
+ int _has_ret;
+ RetType _ret;
+ public:
+ void generate_id() {
+ _id = generator.next_id();
+ }
+
+ void mark_return(RetType ret) {
+ _ret = ret;
+ _has_ret = 1;
+ }
+ };
+
+ class base_api_object_void {
+ protected:
+ unique_id _id;
+ int _has_ret;
+ public:
+ void generate_id() {
+ _id = generator.next_id();
+ }
+
+ void mark_return(void) {
+ _has_ret = 1;
+ }
+ };
+
+$classes
+};
+""")
+
+ cls_ret_tpl = Template("""
+class api_object_$fn_name : public base_api_object<$ret_type>
+{
+public:
+ api_object_$fn_name($ctor_type_params) {
+ if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_entry)) {
+ generate_id();
+ $pass_by_struct
+ lttng_ust_do_tracepoint($provider,
+ ${fn_name}_entry,
+ _id.thread_id,
+ _id.local_id$ctor_params);
+ }
+ }
+ ~api_object_$fn_name() {
+ if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_exit)) {
+ lttng_ust_do_tracepoint($provider,
+ ${fn_name}_exit,
+ _id.thread_id,
+ _id.local_id,
+ _has_ret,
+ _ret);
+ }
+ }
+};
+""")
+
+ cls_void_tpl = Template("""
+class api_object_$fn_name : public base_api_object_void
+{
+public:
+ api_object_$fn_name($ctor_type_params) {
+ if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_entry)) {
+ generate_id();
+ $pass_by_struct
+ lttng_ust_do_tracepoint($provider,
+ ${fn_name}_entry,
+ _id.thread_id,
+ _id.local_id$ctor_params);
+ }
+ }
+ ~api_object_$fn_name() {
+ if (lttng_ust_tracepoint_enabled($provider, ${fn_name}_exit)) {
+ lttng_ust_do_tracepoint($provider,
+ ${fn_name}_exit,
+ _id.thread_id,
+ _id.local_id,
+ _has_ret);
+ }
+ }
+};
+""")
+
+ with open(output_path, "w") as output:
+ classes = []
+ for fn in function_declarations:
+ ret_type = fn.get_return_type_name()
+ if ret_type == "void":
+ cls_tpl = cls_void_tpl
+ else:
+ cls_tpl = cls_ret_tpl
+ classes.append(cls_tpl.substitute(provider=PROVIDER,
+ fn_name=fn.name,
+ pass_by_struct=fn.arguments_struct_variable(),
+ ctor_type_params=", ".join([f"{arg.type_name()} {arg.name()}"
+ for arg in fn.args]),
+ ctor_params=fn.ctor_params(),
+ ret_type=ret_type))
+ output.write(global_tpl.substitute(api_file=api_file,
+ namespace=namespace,
+ classes="".join(classes)))
+
+def generate_tracepoint_emulated_classes(function_declarations, api_file, output_path,
+ header_guard, namespace):
+ global_tpl = Template("""/* Auto-generated file! */
+#include <stdint.h>
+#include <$api_file>
+#define ${NAMESPACE}_CAT_PRIMITIVE(A, B) A##B
+#define ${NAMESPACE}_CAT(A, B) ${NAMESPACE}_CAT_PRIMITIVE(A, B)
+
+struct ${namespace}_unique_id {
+ uint64_t thread_id;
+ uint64_t local_id;
+};
+
+struct ${namespace}_id_generator {
+ uint64_t thread_id;
+ uint64_t local_id;
+ int initialized;
+};
+
+extern uint64_t ${namespace}_id_generator_thread_counter;
+extern _Thread_local struct ${namespace}_id_generator ${namespace}_generator;
+
+#define ${namespace}_unlikely(x) __builtin_expect(!!(x), 0)
+
+static inline void ${namespace}_id_generator_next_id(struct ${namespace}_unique_id *id)
+{
+ if (${namespace}_unlikely(!${namespace}_generator.initialized)) {
+ ${namespace}_generator.thread_id =
+ __atomic_fetch_add(&${namespace}_id_generator_thread_counter,
+ 1,
+ __ATOMIC_RELAXED);
+ ${namespace}_generator.initialized = 1;
+ }
+
+ id->thread_id = ${namespace}_generator.thread_id;
+ id->local_id = ${namespace}_generator.local_id++;
+}
+
+#define ${NAMESPACE}_API_OBJECT_NAME ${namespace}_api_object
+
+#define ${NAMESPACE}_MAKE_API_OBJECT(name, ...) \\
+ struct ${NAMESPACE}_CAT(${namespace}_api_state_, name) __attribute__((cleanup(${NAMESPACE}_CAT(name, _exit)))) \\
+ ${NAMESPACE}_API_OBJECT_NAME = { 0 }; \\
+ ${NAMESPACE}_CAT(name, _entry)(&${NAMESPACE}_API_OBJECT_NAME, ##__VA_ARGS__); \\
+ do { } while (0)
+
+#define ${NAMESPACE}_MARK_RETURN_API_OBJECT(code) \\
+ ({ \\
+ ${NAMESPACE}_API_OBJECT_NAME.ret = code; \\
+ ${NAMESPACE}_API_OBJECT_NAME.has_ret = 1; \\
+ })
+${classes}
+""")
+
+ cls_tpl = Template("""
+struct ${namespace}_api_state_${fn_name} {
+ struct ${namespace}_unique_id id;
+ int has_ret;
+ $ret_type ret;
+};
+
+static inline void ${fn_name}_entry(${ctor_type_params})
+{
+ if (${namespace}_ust_tracepoint_enabled(${provider}, ${fn_name}_entry)) {
+ ${namespace}_id_generator_next_id(<tng_state->id);
+ ${pass_by_struct}
+ ${namespace}_ust_do_tracepoint($provider, ${fn_name}_entry,
+ lttng_state->id.thread_id,
+ lttng_state->id.local_id${ctor_params});
+ }
+}
+
+static inline void ${fn_name}_exit(const struct ${namespace}_api_state_${fn_name} *lttng_state)
+{
+ lttng_ust_tracepoint(${provider}, ${fn_name}_exit,
+ lttng_state->id.thread_id,
+ lttng_state->id.local_id,
+ lttng_state->has_ret,
+ lttng_state->ret);
+}
+""")
+ with open(output_path, "w") as output:
+ output.write(global_tpl.substitute(api_file=api_file,
+ namespace=namespace,
+ NAMESPACE=namespace.upper(),
+ classes="".join([
+ cls_tpl.substitute(provider=PROVIDER,
+ fn_name=fn.name,
+ pass_by_struct=fn.arguments_struct_variable(),
+ ctor_params=fn.ctor_params(),
+ ctor_type_params=", ".join([f"struct {namespace}_api_state_{fn.name} *lttng_state"] +
+ [f"{arg.type_name()} {arg.name()}"
+ for arg in fn.args]),
+ namespace=namespace,
+ NAMESPACE=namespace.upper(),
+ ret_type=fn.get_return_type_name())
+ for fn in function_declarations
+ ])))
+
+
+def generate_tracepoint_implementations(namespace, defs, impls):
+ tpl = Template("""/* Auto-generated !*/
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+#include "${defs}"
+""")
+
+ with open(impls, "w") as output:
+ output.write(tpl.substitute(defs=defs))
+
+def generate_tracepoint_states(namespace,
+ interface,
+ classes,
+ states,
+ emulated_classes):
+
+ if emulated_classes:
+ body_tpl = Template("""
+uint64_t ${namespace}_id_generator_thread_counter = 0;
+_Thread_local struct ${namespace}_id_generator ${namespace}_generator;
+""")
+ else:
+ body_tpl = Template("""
+#include <atomic>
+namespace ${namespace} {
+ std::atomic<uint64_t> id_generator::_thread_counter{0};
+ thread_local id_generator generator;
+};
+""")
+
+ tpl = Template("""/* Auto-generated! */
+#include "${interface}"
+#include "${classes}"
+
+$body
+""")
+
+ with open(states, "w") as output:
+ output.write(tpl.substitute(interface=interface,
+ classes=classes,
+ body=body_tpl.substitute(namespace=namespace)))
+
+def main():
+
+ global COMMON_PREFIX
+ global IGNORE
+ global PROVIDER
+
+ parser = argparse.ArgumentParser(prog="lttng-ust-autogen-api",
+ description="Generate LTTng classes and tracepoint definitions")
+
+ parser.add_argument("api",
+ help="Header file that has the API")
+
+ parser.add_argument("defs",
+ help="Path to tracepoint definitions")
+
+ parser.add_argument("interface",
+ help="Path to tracepoints interfaces")
+
+ parser.add_argument("classes",
+ help="Path to tracepoint classes")
+
+ parser.add_argument("impl",
+ help="Path to tracepoint implementations")
+
+ parser.add_argument("states",
+ help="Path to states")
+
+ parser.add_argument("--provider",
+ dest="provider",
+ metavar="PROVIDER",
+ default="noprovider",
+ help="Tracepoints PROVIDER")
+
+ parser.add_argument("--common-prefix",
+ dest="common_prefix",
+ metavar="PREFIX",
+ default="",
+ help="Common PREFIX of API functions (C namespace)")
+
+ parser.add_argument("-I",
+ action="append",
+ metavar="DIR",
+ dest="includes",
+ help="Add DIR to list of directories to include")
+
+ parser.add_argument("-D",
+ action="append",
+ metavar="DEFINITION",
+ dest="defines",
+ help="Add DEFINITION to list of definitions")
+
+ parser.add_argument("--tp-guard",
+ dest="tp_guard",
+ metavar="GUARD",
+ default="LTTNG_TRACEPOINT_DEF_H",
+ help="Use GUARD as header guard for tracepoint definitions")
+
+ parser.add_argument("--classes-guard",
+ dest="classes_guard",
+ metavar="GUARD",
+ default="LTTNG_TRACEPOINT_CLASSES_HPP",
+ help="Use GUARD as header guard for classes definitions")
+
+ parser.add_argument("--emulated-classes",
+ dest="emulated_classes",
+ action="store_true",
+ default=False,
+ help="Emulate C++ classes")
+
+ parser.add_argument("--namespace",
+ dest="namespace",
+ metavar="NAMESPACE",
+ default="lttng",
+ help="Generate classes in NAMESPACE")
+
+ parser.add_argument("--ignores",
+ dest="ignores",
+ metavar="FILE",
+ default=None,
+ help="Ignore functions listed in FILE")
+
+ parser.add_argument("--c-header",
+ dest="required_c_headers",
+ metavar="HEADER",
+ action="append",
+ default=[],
+ help="Search for HEADER in C_INCLUDE_PATH and add its directory to search path")
+
+ parser.add_argument("--cxx-header",
+ dest="required_cxx_headers",
+ metavar="HEADER",
+ action="append",
+ default=[],
+ help="Search for HEADER in CPLUS_INCLUDE_PATH add its directory to search path")
+
+ args = parser.parse_args()
+
+ PROVIDER = args.provider
+ COMMON_PREFIX = args.common_prefix
+
+ if args.ignores:
+ with open(args.ignores, "r") as f:
+ for ignore in f.read().splitlines():
+ IGNORE.add(ignore)
+
+ root = parse_header(args.api, args.includes, args.defines,
+ args.required_c_headers,
+ args.required_cxx_headers)
+
+ function_declarations = list_functions(root)
+ enum_declarations = list_enums(root)
+
+ generate_tracepoint_definitions(function_declarations,
+ enum_declarations,
+ args.api, args.defs, args.interface,
+ args.tp_guard)
+
+ if args.emulated_classes:
+ generate_tracepoint_emulated_classes(function_declarations,
+ args.api,
+ args.classes,
+ args.classes_guard,
+ args.namespace)
+ else:
+ generate_tracepoint_classes(function_declarations,
+ args.api,
+ args.classes,
+ args.classes_guard,
+ args.namespace)
+
+ generate_tracepoint_implementations(args.namespace,
+ args.interface,
+ args.impl)
+
+ generate_tracepoint_states(args.namespace,
+ args.interface,
+ args.classes,
+ args.states,
+ args.emulated_classes)
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+s/hipChooseDeviceR[0-9]*/hipChooseDevice/g
--- /dev/null
+hipCreateChannelDesc
+hipDrvGraphAddMemsetNode
+hipDrvGraphMemcpyNodeGetParams
+hipDrvGraphMemcpyNodeSetParams
+hipExtGetLastError
+hipExternalMemoryGetMappedMipmappedArray
+hipGraphAddExternalSemaphoresSignalNode
+hipGraphAddExternalSemaphoresWaitNode
+hipGraphAddNode
+hipGraphExecExternalSemaphoresSignalNodeSetParams
+hipGraphExecExternalSemaphoresWaitNodeSetParams
+hipGraphExecGetFlags
+hipGraphExecNodeSetParams
+hipGraphExternalSemaphoresSignalNodeGetParams
+hipGraphExternalSemaphoresSignalNodeSetParams
+hipGraphExternalSemaphoresWaitNodeGetParams
+hipGraphExternalSemaphoresWaitNodeSetParams
+hipGraphNodeSetParams
--- /dev/null
+hsa_queue_add_write_index_acq_rel
+hsa_queue_add_write_index_acquire
+hsa_queue_add_write_index_release
+hsa_queue_cas_write_index_acq_rel
+hsa_queue_cas_write_index_acquire
+hsa_queue_cas_write_index_release
+hsa_queue_load_read_index_acquire
+hsa_queue_load_write_index_acq_rel
+hsa_queue_load_write_index_acquire
+hsa_queue_load_write_index_release
+hsa_queue_store_read_index_release
+hsa_queue_store_write_index_release
+hsa_signal_add_acq_rel
+hsa_signal_add_acquire
+hsa_signal_add_release
+hsa_signal_and_acq_rel
+hsa_signal_and_acquire
+hsa_signal_and_release
+hsa_signal_cas_acq_rel
+hsa_signal_cas_acquire
+hsa_signal_cas_release
+hsa_signal_exchange_acq_rel
+hsa_signal_exchange_acquire
+hsa_signal_exchange_release
+hsa_signal_load_acquire
+hsa_signal_load_release
+hsa_signal_or_acq_rel
+hsa_signal_or_acquire
+hsa_signal_or_release
+hsa_signal_store_release
+hsa_signal_subtract_acq_rel
+hsa_signal_subtract_acquire
+hsa_signal_subtract_release
+hsa_signal_wait_acquire
+hsa_signal_xor_acq_rel
+hsa_signal_xor_acquire
+hsa_signal_xor_release
--- /dev/null
+/*
+ * Copyright © 2023 AMD
+ *
+ * All Rights Reserved
+ */
+
+#include <limits.h>
+#include <stdint.h>
+
+#include "lttng-roctx.h"
+
+static void lttng_roctxMark(const char *message)
+{
+ lttng_ust_tracepoint(roctx, mark, message);
+ next_roctx_core_table.roctxMarkA_fn(message);
+}
+
+static int lttng_roctxRangePush(const char *message)
+{
+ int level = next_roctx_core_table.roctxRangePushA_fn(message);
+
+ lttng_ust_tracepoint(roctx, range_push, message, level);
+
+ return level;
+}
+
+static int lttng_roctxRangePop(void)
+{
+ int level = next_roctx_core_table.roctxRangePop_fn();
+
+ lttng_ust_tracepoint(roctx, range_pop, level);
+
+ return level;
+}
+
+static roctx_range_id_t lttng_roctxRangeStart(const char *message)
+{
+ roctx_range_id_t id;
+
+ id = next_roctx_core_table.roctxRangeStartA_fn(message);
+
+ lttng_ust_tracepoint(roctx, range_start, message, id);
+
+ return id;
+}
+
+static void lttng_roctxRangeStop(roctx_range_id_t id)
+{
+ lttng_ust_tracepoint(roctx, range_stop, id);
+ next_roctx_core_table.roctxRangeStop_fn(id);
+}
+
+static int lttng_roctxGetThreadId(roctx_thread_id_t *tid)
+{
+ int ret = next_roctx_core_table.roctxGetThreadId_fn(tid);
+ lttng_ust_tracepoint(roctx, get_thread_id, *tid);
+ return ret;
+}
+
+static void lttng_roctx_install_wrappers(void)
+{
+ lttng_roctx_core_table.roctxMarkA_fn = <tng_roctxMark;
+ lttng_roctx_core_table.roctxRangePushA_fn = <tng_roctxRangePush;
+ lttng_roctx_core_table.roctxRangePop_fn = <tng_roctxRangePop;
+ lttng_roctx_core_table.roctxRangeStartA_fn = <tng_roctxRangeStart;
+ lttng_roctx_core_table.roctxRangeStop_fn = <tng_roctxRangeStop;
+}
+
--- /dev/null
+/*
+ * Copyright © 2023 AMD
+ *
+ * All Rights Reserved
+ */
+
+#ifndef LTTNG_ROCTX_H
+#define LTTNG_ROCTX_H
+
+#include "lttng-ust-roctx.h"
+
+#endif /* LTTNG_ROCTX_H */
--- /dev/null
+/*
+ * Copyright © 2024 AMD
+ *
+ * All Rights Reserved
+ */
+
+#include <cinttypes>
+
+#include <hip/hip_runtime.h>
+#include <hip/hip_gl_interop.h>
+
+/*
+ * Do not re-order! hip_api_trace.h header referenced some R0000 types which
+ * are only defined in the hip_deprecated.h header.
+ */
+#include <hip/hip_deprecated.h>
+#include <hip/amd_detail/hip_api_trace.hpp>
+
+/*
+ * Without AMD_INTERNAL_BUILD, hsa_api_trace.h tries to include
+ * "inc/*", which is not installed.
+ */
+#define AMD_INTERNAL_BUILD
+#include <hsa/hsa.h>
+#include <hsa/hsa_api_trace.h>
+
+/*
+ * Rocprofiler integration.
+ */
+#include <rocprofiler-sdk/registration.h>
+#include <rocprofiler-sdk/rocprofiler.h>
+#include <rocprofiler-sdk-roctx/api_trace.h>
+
+/*
+ * This is the table with LTTng tracepoints for HIP.
+ */
+static struct HipDispatchTable lttng_hip_table;
+
+/*
+ * This is an internal copy of the orignal table used by the LTTng wrappers to
+ * call the next function for HIP.
+ */
+static struct HipDispatchTable next_hip_table;
+
+/* Auto-generated by gen-hip-wrappers. */
+#include "lttng-ust-hip-wrappers.cpp"
+
+
+/*
+ * This is the table with LTTng tracepoints for HSA.
+ */
+static struct CoreApiTable lttng_hsa_core_table;
+
+/*
+ * This is an internal copy of the orignal table used by the LTTng wrappers to
+ * call the next function for HSA.
+ */
+static struct CoreApiTable next_hsa_core_table;
+
+/* Auto-generated by gen-hsa-wrappers. */
+#include "lttng-ust-hsa-wrappers.cpp"
+
+
+/*
+ * This is the table with LTTng tracepoints for roctx.
+ */
+static roctxCoreApiTable_t lttng_roctx_core_table;
+
+/*
+ * This is an internal copy of the orignal table used by the LTTng wrappers to
+ * call the next function for roctx.
+ */
+static roctxCoreApiTable_t next_roctx_core_table;
+
+/* LTTng events for roctx. */
+#include "lttng-roctx.c"
+
+#define die(FMT, ...) \
+ do { \
+ fprintf(stderr, FMT "\n" __VA_OPT__(,) __VA_ARGS__); \
+ exit(EXIT_FAILURE); \
+ } while(0)
+
+#define error(FMT, ...) \
+ fprintf(stderr, FMT "\n" __VA_OPT__(,) __VA_ARGS__)
+
+static void
+register_hip_table(uint64_t lib_instance, uint64_t num_tables, void **tables)
+{
+ /* Add instrumentation only to first library instance. */
+ if (0 != lib_instance) {
+ return;
+ }
+
+ /* Ensure that there is at least one table. */
+ if (num_tables < 1) {
+ return;
+ }
+
+ /*
+ * This could be done at compile time in some way if C++ could support
+ * designated initializers.
+ */
+ lttng_hip_install_wrappers();
+
+ auto original_hip_table = static_cast<HipDispatchTable*>(tables[0]);
+
+ /* Swap tables. */
+ if (original_hip_table) {
+ auto size = std::min(sizeof(next_hip_table), original_hip_table->size);
+ memcpy(&next_hip_table, original_hip_table, size);
+ memcpy(original_hip_table, <tng_hip_table, size);
+ }
+}
+
+static void
+register_hsa_core_table(uint64_t lib_instance, uint64_t num_tables, void **tables)
+{
+ /* Add instrumentation only to first library instance. */
+ if (0 != lib_instance) {
+ return;
+ }
+
+ /* Ensure that there is at least one table. */
+ if (num_tables < 1) {
+ return;
+ }
+
+ /*
+ * This could be done at compile time in some way if C++ could support
+ * designated initializers.
+ */
+ lttng_hsa_install_wrappers();
+
+ auto original_hsa_table = static_cast<HsaApiTable*>(tables[0]);
+
+ /* Swap tables. */
+ if (original_hsa_table) {
+ auto core = original_hsa_table->core_;
+ auto size = sizeof(*core);
+ memcpy(&next_hsa_core_table, core, size);
+ memcpy(core, <tng_hsa_core_table, size);
+ }
+}
+
+static void
+register_roctx_core_table(uint64_t lib_instance, uint64_t num_tables, void **tables)
+{
+ /* Add instrumentation only to first library instance. */
+ if (0 != lib_instance) {
+ return;
+ }
+
+ /* Ensure that there is at least one table. */
+ if (num_tables < 1) {
+ return;
+ }
+
+ /*
+ * This could be done at compile time in some way if C++ could support
+ * designated initializers.
+ */
+ lttng_roctx_install_wrappers();
+
+ auto original_roctx_core_table = static_cast<roctxCoreApiTable_t*>(tables[0]);
+
+ /* Swap tables. */
+ if (original_roctx_core_table) {
+ auto size = std::min(original_roctx_core_table->size, sizeof(roctxCoreApiTable_t));
+ memcpy(&next_roctx_core_table, original_roctx_core_table, size);
+ memcpy(original_roctx_core_table, <tng_roctx_core_table, size);
+ }
+}
+
+static void
+api_registration_callback(rocprofiler_intercept_table_t type,
+ uint64_t lib_version,
+ uint64_t lib_instance,
+ void** tables,
+ uint64_t num_tables,
+ void* user_data)
+{
+ (void) user_data;
+ (void) lib_version;
+
+ /*
+ * We only want HIP runtime and HSA tables. If we get something
+ * else, there is a bug somewhere.
+ */
+ switch (type) {
+ case ROCPROFILER_HIP_RUNTIME_TABLE:
+ register_hip_table(lib_instance, num_tables, tables);
+ break;
+ case ROCPROFILER_HSA_TABLE:
+ register_hsa_core_table(lib_instance, num_tables, tables);
+ break;
+ case ROCPROFILER_MARKER_CORE_TABLE:
+ register_roctx_core_table(lib_instance, num_tables, tables);
+ break;
+ defualt:
+ die("unexpected library type: %d", type);
+ }
+}
+
+extern "C"
+__attribute__((visibility("default")))
+rocprofiler_tool_configure_result_t *rocprofiler_configure(uint32_t version,
+ const char *runtime_version,
+ uint32_t priority,
+ rocprofiler_client_id_t *id)
+{
+ (void) priority;
+ (void) runtime_version;
+ (void) version;
+
+ /*
+ * Maybe Exatracer instead?
+ */
+ id->name = "LTTng-UST";
+
+ if (ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED ==
+ rocprofiler_at_intercept_table_registration(api_registration_callback,
+ ROCPROFILER_HIP_RUNTIME_TABLE |
+ ROCPROFILER_HSA_TABLE |
+ ROCPROFILER_MARKER_CORE_TABLE,
+ nullptr)) {
+ die("Trying to register API interception table for HIP runtime: "
+ "NOT IMPLEMENTED");
+ }
+
+ static auto cfg = rocprofiler_tool_configure_result_t {
+ sizeof(rocprofiler_tool_configure_result_t),
+ nullptr,
+ nullptr,
+ nullptr
+ };
+
+ return &cfg;
+}
--- /dev/null
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+#include "lttng-roctx.h"
+
--- /dev/null
+/*
+ * Copyright © 2023 AMD
+ *
+ * All Rights Reserved
+ */
+
+#undef LTTNG_UST_TRACEPOINT_PROVIDER
+#define LTTNG_UST_TRACEPOINT_PROVIDER roctx
+
+#undef LTTNG_UST_TRACEPOINT_INCLUDE
+#define LTTNG_UST_TRACEPOINT_INCLUDE "lttng-ust-roctx.h"
+
+#if !defined(LTTNG_TRACEPOINT_ROCTX_H) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
+#define LTTNG_TRACEPOINT_ROCTX_H
+
+#include <lttng/tracepoint.h>
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ roctx,
+ mark,
+ LTTNG_UST_TP_ARGS(
+ const char *, message
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_string(message, message)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ roctx,
+ range_push,
+ LTTNG_UST_TP_ARGS(
+ const char *, message,
+ int, level
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_string(message, message)
+ lttng_ust_field_integer(uint64_t, level, level)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ roctx,
+ range_pop,
+ LTTNG_UST_TP_ARGS(
+ int, level
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer(uint64_t, level, level)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ roctx,
+ range_start,
+ LTTNG_UST_TP_ARGS(
+ const char *, message,
+ uint64_t, id
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_string(message, message)
+ lttng_ust_field_integer_hex(uint64_t, id, id)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ roctx,
+ range_stop,
+ LTTNG_UST_TP_ARGS(
+ uint64_t, id
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer_hex(uint64_t, id, id)
+ )
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+ roctx,
+ get_thread_id,
+ LTTNG_UST_TP_ARGS(
+ uint64_t, tid
+ ),
+ LTTNG_UST_TP_FIELDS(
+ lttng_ust_field_integer_hex(uint64_t, roctx_tid, tid)
+ )
+)
+
+#endif /* LTTNG_TRACEPOINT_ROCTX_H */
+
+#include <lttng/tracepoint-event.h>
--- /dev/null
+/*
+ Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include <rocprofiler-sdk-roctx/roctx.h>
+#include <hip/hip_runtime.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <iostream>
+#include <string>
+#include <fstream>
+
+#define SAMPLE_VERSION "HIP-Examples-Application-v1.0"
+#define SUCCESS 0
+#define FAILURE 1
+
+using namespace std;
+
+__global__ void helloworld(char* in, char* out)
+{
+ int num = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x;
+ out[num] = in[num] + 1;
+}
+
+int main(int argc, char* argv[])
+{
+
+ hipDeviceProp_t devProp;
+
+ roctx_range_id_t range = roctxRangeStart("hello");
+
+ hipGetDeviceProperties(&devProp, 0);
+ cout << " System minor " << devProp.minor << endl;
+ cout << " System major " << devProp.major << endl;
+ cout << " agent prop name " << devProp.name << endl;
+
+ /* Initial input,output for the host and create memory objects for the kernel*/
+ const char* input = "GdkknVnqkc";
+ size_t strlength = strlen(input);
+ cout << "input string:" << endl;
+ cout << input << endl;
+ char *output = (char*) malloc(strlength + 1);
+
+ char* inputBuffer;
+ char* outputBuffer;
+ hipMalloc((void**)&inputBuffer, (strlength + 1) * sizeof(char));
+ hipMalloc((void**)&outputBuffer, (strlength + 1) * sizeof(char));
+
+ hipMemcpy(inputBuffer, input, (strlength + 1) * sizeof(char), hipMemcpyHostToDevice);
+
+ hipLaunchKernelGGL(helloworld,
+ dim3(1),
+ dim3(strlength),
+ 0, 0,
+ inputBuffer ,outputBuffer );
+
+ hipMemcpy(output, outputBuffer,(strlength + 1) * sizeof(char), hipMemcpyDeviceToHost);
+
+ hipFree(inputBuffer);
+ hipFree(outputBuffer);
+
+ output[strlength] = '\0'; //Add the terminal character to the end of output.
+ cout << "\noutput string:" << endl;
+ cout << output << endl;
+
+ free(output);
+
+ std::cout<<"Passed!\n";
+ roctxRangeStop(range);
+ return SUCCESS;
+}