#!/bin/sh # Miscellaneous Intel PT testing # SPDX-License-Identifier: GPL-2.0 set -e # Skip if no Intel PT perf list | grep -q 'intel_pt//' || exit 2 shelldir=$(dirname "$0") . "${shelldir}"/lib/waiting.sh skip_cnt=0 ok_cnt=0 err_cnt=0 temp_dir=$(mktemp -d /tmp/perf-test-intel-pt-sh.XXXXXXXXXX) tmpfile="${temp_dir}/tmp-perf.data" perfdatafile="${temp_dir}/test-perf.data" outfile="${temp_dir}/test-out.txt" errfile="${temp_dir}/test-err.txt" workload="${temp_dir}/workload" awkscript="${temp_dir}/awkscript" jitdump_workload="${temp_dir}/jitdump_workload" maxbrstack="${temp_dir}/maxbrstack.py" cleanup() { trap - EXIT TERM INT sane=$(echo "${temp_dir}" | cut -b 1-26) if [ "${sane}" = "/tmp/perf-test-intel-pt-sh" ] ; then echo "--- Cleaning up ---" rm -f "${temp_dir}/"* rmdir "${temp_dir}" fi } trap_cleanup() { cleanup exit 1 } trap trap_cleanup EXIT TERM INT # perf record for testing without decoding perf_record_no_decode() { # Options to speed up recording: no post-processing, no build-id cache update, # and no BPF events. perf record -B -N --no-bpf-event "$@" } # perf record for testing should not need BPF events perf_record_no_bpf() { # Options for no BPF events perf record --no-bpf-event "$@" } have_workload=false cat << _end_of_file_ | /usr/bin/cc -o "${workload}" -xc - -pthread && have_workload=true #include <time.h> #include <pthread.h> void work(void) { struct timespec tm = { .tv_nsec = 1000000, }; int i; /* Run for about 30 seconds */ for (i = 0; i < 30000; i++) nanosleep(&tm, NULL); } void *threadfunc(void *arg) { work(); return NULL; } int main(void) { pthread_t th; pthread_create(&th, NULL, threadfunc, NULL); work(); pthread_join(th, NULL); return 0; } _end_of_file_ can_cpu_wide() { echo "Checking for CPU-wide recording on CPU $1" if ! perf_record_no_decode -o "${tmpfile}" -e dummy:u -C "$1" true >/dev/null 2>&1 ; then echo "No so skipping" return 2 fi echo OK return 0 } test_system_wide_side_band() { echo "--- Test system-wide sideband ---" # Need CPU 0 and CPU 1 can_cpu_wide 0 || return $? can_cpu_wide 1 || return $? # Record on CPU 0 a task running on CPU 1 perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname # Should get MMAP events from CPU 1 because they can be needed to decode mmap_cnt=$(perf script -i "${perfdatafile}" --no-itrace --show-mmap-events -C 1 2>/dev/null | grep -c MMAP) if [ "${mmap_cnt}" -gt 0 ] ; then echo OK return 0 fi echo "Failed to record MMAP events on CPU 1 when tracing CPU 0" return 1 } can_kernel() { if [ -z "${can_kernel_trace}" ] ; then can_kernel_trace=0 perf_record_no_decode -o "${tmpfile}" -e dummy:k true >/dev/null 2>&1 && can_kernel_trace=1 fi if [ ${can_kernel_trace} -eq 0 ] ; then echo "SKIP: no kernel tracing" return 2 fi return 0 } test_per_thread() { k="$1" desc="$2" echo "--- Test per-thread ${desc}recording ---" if ! $have_workload ; then echo "No workload, so skipping" return 2 fi if [ "${k}" = "k" ] ; then can_kernel || return 2 fi cat <<- "_end_of_file_" > "${awkscript}" BEGIN { s = "[ ]*" u = s"[0-9]+"s d = s"[0-9-]+"s x = s"[0-9a-fA-FxX]+"s mmapping = "idx"u": mmapping fd"u set_output = "idx"u": set output fd"u"->"u perf_event_open = "sys_perf_event_open: pid"d"cpu"d"group_fd"d"flags"x"="u } /perf record opening and mmapping events/ { if (!done) active = 1 } /perf record done opening and mmapping events/ { active = 0 done = 1 } $0 ~ perf_event_open && active { match($0, perf_event_open) $0 = substr($0, RSTART, RLENGTH) pid = $3 cpu = $5 fd = $11 print "pid " pid " cpu " cpu " fd " fd " : " $0 fd_array[fd] = fd pid_array[fd] = pid cpu_array[fd] = cpu } $0 ~ mmapping && active { match($0, mmapping) $0 = substr($0, RSTART, RLENGTH) fd = $5 print "fd " fd " : " $0 if (fd in fd_array) { mmap_array[fd] = 1 } else { print "Unknown fd " fd exit 1 } } $0 ~ set_output && active { match($0, set_output) $0 = substr($0, RSTART, RLENGTH) fd = $6 fd_to = $8 print "fd " fd " fd_to " fd_to " : " $0 if (fd in fd_array) { if (fd_to in fd_array) { set_output_array[fd] = fd_to } else { print "Unknown fd " fd_to exit 1 } } else { print "Unknown fd " fd exit 1 } } END { print "Checking " length(fd_array) " fds" for (fd in fd_array) { if (fd in mmap_array) { pid = pid_array[fd] if (pid != -1) { if (pid in pids) { print "More than 1 mmap for PID " pid exit 1 } pids[pid] = 1 } cpu = cpu_array[fd] if (cpu != -1) { if (cpu in cpus) { print "More than 1 mmap for CPU " cpu exit 1 } cpus[cpu] = 1 } } else if (!(fd in set_output_array)) { print "No mmap for fd " fd exit 1 } } n = length(pids) if (n != thread_cnt) { print "Expected " thread_cnt " per-thread mmaps - found " n exit 1 } } _end_of_file_ $workload & w1=$! $workload & w2=$! echo "Workload PIDs are $w1 and $w2" wait_for_threads ${w1} 2 wait_for_threads ${w2} 2 perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u"${k}" -vvv --per-thread -p "${w1},${w2}" 2>"${errfile}" >"${outfile}" & ppid=$! echo "perf PID is $ppid" wait_for_perf_to_start ${ppid} "${errfile}" || return 1 kill ${w1} wait_for_process_to_exit ${w1} || return 1 is_running ${ppid} || return 1 kill ${w2} wait_for_process_to_exit ${w2} || return 1 wait_for_process_to_exit ${ppid} || return 1 awk -v thread_cnt=4 -f "${awkscript}" "${errfile}" || return 1 echo OK return 0 } test_jitdump() { echo "--- Test tracing self-modifying code that uses jitdump ---" script_path=$(realpath "$0") script_dir=$(dirname "$script_path") jitdump_incl_dir="${script_dir}/../../util" jitdump_h="${jitdump_incl_dir}/jitdump.h" if [ ! -e "${jitdump_h}" ] ; then echo "SKIP: Include file jitdump.h not found" return 2 fi if [ -z "${have_jitdump_workload}" ] ; then have_jitdump_workload=false # Create a workload that uses self-modifying code and generates its own jitdump file cat <<- "_end_of_file_" | /usr/bin/cc -o "${jitdump_workload}" -I "${jitdump_incl_dir}" -xc - -pthread && have_jitdump_workload=true #define _GNU_SOURCE #include <sys/mman.h> #include <sys/types.h> #include <stddef.h> #include <stdio.h> #include <stdint.h> #include <unistd.h> #include <string.h> #include "jitdump.h" #define CHK_BYTE 0x5a static inline uint64_t rdtsc(void) { unsigned int low, high; asm volatile("rdtsc" : "=a" (low), "=d" (high)); return low | ((uint64_t)high) << 32; } static FILE *open_jitdump(void) { struct jitheader header = { .magic = JITHEADER_MAGIC, .version = JITHEADER_VERSION, .total_size = sizeof(header), .pid = getpid(), .timestamp = rdtsc(), .flags = JITDUMP_FLAGS_ARCH_TIMESTAMP, }; char filename[256]; FILE *f; void *m; snprintf(filename, sizeof(filename), "jit-%d.dump", getpid()); f = fopen(filename, "w+"); if (!f) goto err; /* Create an MMAP event for the jitdump file. That is how perf tool finds it. */ m = mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(f), 0); if (m == MAP_FAILED) goto err_close; munmap(m, 4096); if (fwrite(&header,sizeof(header),1,f) != 1) goto err_close; return f; err_close: fclose(f); err: return NULL; } static int write_jitdump(FILE *f, void *addr, const uint8_t *dat, size_t sz, uint64_t *idx) { struct jr_code_load rec = { .p.id = JIT_CODE_LOAD, .p.total_size = sizeof(rec) + sz, .p.timestamp = rdtsc(), .pid = getpid(), .tid = gettid(), .vma = (unsigned long)addr, .code_addr = (unsigned long)addr, .code_size = sz, .code_index = ++*idx, }; if (fwrite(&rec,sizeof(rec),1,f) != 1 || fwrite(dat, sz, 1, f) != 1) return -1; return 0; } static void close_jitdump(FILE *f) { fclose(f); } int main() { /* Get a memory page to store executable code */ void *addr = mmap(0, 4096, PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); /* Code to execute: mov CHK_BYTE, %eax ; ret */ uint8_t dat[] = {0xb8, CHK_BYTE, 0x00, 0x00, 0x00, 0xc3}; FILE *f = open_jitdump(); uint64_t idx = 0; int ret = 1; if (!f) return 1; /* Copy executable code to executable memory page */ memcpy(addr, dat, sizeof(dat)); /* Record it in the jitdump file */ if (write_jitdump(f, addr, dat, sizeof(dat), &idx)) goto out_close; /* Call it */ ret = ((int (*)(void))addr)() - CHK_BYTE; out_close: close_jitdump(f); return ret; } _end_of_file_ fi if ! $have_jitdump_workload ; then echo "SKIP: No jitdump workload" return 2 fi # Change to temp_dir so jitdump collateral files go there cd "${temp_dir}" perf_record_no_bpf -o "${tmpfile}" -e intel_pt//u "${jitdump_workload}" perf inject -i "${tmpfile}" -o "${perfdatafile}" --jit decode_br_cnt=$(perf script -i "${perfdatafile}" --itrace=b | wc -l) # Note that overflow and lost errors are suppressed for the error count decode_err_cnt=$(perf script -i "${perfdatafile}" --itrace=e-o-l | grep -ci error) cd - # Should be thousands of branches if [ "${decode_br_cnt}" -lt 1000 ] ; then echo "Decode failed, only ${decode_br_cnt} branches" return 1 fi # Should be no errors if [ "${decode_err_cnt}" -ne 0 ] ; then echo "Decode failed, ${decode_err_cnt} errors" perf script -i "${perfdatafile}" --itrace=e-o-l --show-mmap-events | cat return 1 fi echo OK return 0 } test_packet_filter() { echo "--- Test with MTC and TSC disabled ---" # Disable MTC and TSC perf_record_no_decode -o "${perfdatafile}" -e intel_pt/mtc=0,tsc=0/u uname # Should not get MTC packet mtc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "MTC 0x") if [ "${mtc_cnt}" -ne 0 ] ; then echo "Failed to filter with mtc=0" return 1 fi # Should not get TSC package tsc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TSC 0x") if [ "${tsc_cnt}" -ne 0 ] ; then echo "Failed to filter with tsc=0" return 1 fi echo OK return 0 } test_disable_branch() { echo "--- Test with branches disabled ---" # Disable branch perf_record_no_decode -o "${perfdatafile}" -e intel_pt/branch=0/u uname # Should not get branch related packets tnt_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TNT 0x") tip_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TIP 0x") fup_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "FUP 0x") if [ "${tnt_cnt}" -ne 0 ] || [ "${tip_cnt}" -ne 0 ] || [ "${fup_cnt}" -ne 0 ] ; then echo "Failed to disable branches" return 1 fi echo OK return 0 } test_time_cyc() { echo "--- Test with/without CYC ---" # Check if CYC is supported cyc=$(cat /sys/bus/event_source/devices/intel_pt/caps/psb_cyc) if [ "${cyc}" != "1" ] ; then echo "SKIP: CYC is not supported" return 2 fi # Enable CYC perf_record_no_decode -o "${perfdatafile}" -e intel_pt/cyc/u uname # should get CYC packets cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC 0x") if [ "${cyc_cnt}" = "0" ] ; then echo "Failed to get CYC packet" return 1 fi # Without CYC perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u uname # Should not get CYC packets cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC 0x") if [ "${cyc_cnt}" -gt 0 ] ; then echo "Still get CYC packet without cyc" return 1 fi echo OK return 0 } test_sample() { echo "--- Test recording with sample mode ---" # Check if recording with sample mode is working if ! perf_record_no_decode -o "${perfdatafile}" --aux-sample=8192 -e '{intel_pt//u,branch-misses:u}' uname ; then echo "perf record failed with --aux-sample" return 1 fi # Check with event with PMU name if perf_record_no_decode -o "${perfdatafile}" -e br_misp_retired.all_branches:u uname ; then if ! perf_record_no_decode -o "${perfdatafile}" -e '{intel_pt//,br_misp_retired.all_branches/aux-sample-size=8192/}:u' uname ; then echo "perf record failed with --aux-sample-size" return 1 fi fi echo OK return 0 } test_kernel_trace() { echo "--- Test with kernel trace ---" # Check if recording with kernel trace is working can_kernel || return 2 if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt//k -m1,128 uname ; then echo "perf record failed with intel_pt//k" return 1 fi echo OK return 0 } test_virtual_lbr() { echo "--- Test virtual LBR ---" # Check if python script is supported libpython=$(perf version --build-options | grep python | grep -cv OFF) if [ "${libpython}" != "1" ] ; then echo "SKIP: python scripting is not supported" return 2 fi # Python script to determine the maximum size of branch stacks cat << "_end_of_file_" > "${maxbrstack}" from __future__ import print_function bmax = 0 def process_event(param_dict): if "brstack" in param_dict: brstack = param_dict["brstack"] n = len(brstack) global bmax if n > bmax: bmax = n def trace_end(): print("max brstack", bmax) _end_of_file_ # Check if virtual lbr is working perf_record_no_bpf -o "${perfdatafile}" --aux-sample -e '{intel_pt//,cycles}:u' uname times_val=$(perf script -i "${perfdatafile}" --itrace=L -s "${maxbrstack}" 2>/dev/null | grep "max brstack " | cut -d " " -f 3) case "${times_val}" in [0-9]*) ;; *) times_val=0;; esac if [ "${times_val}" -lt 2 ] ; then echo "Failed with virtual lbr" return 1 fi echo OK return 0 } test_power_event() { echo "--- Test power events ---" # Check if power events are supported power_event=$(cat /sys/bus/event_source/devices/intel_pt/caps/power_event_trace) if [ "${power_event}" != "1" ] ; then echo "SKIP: power_event_trace is not supported" return 2 fi if ! perf_record_no_decode -o "${perfdatafile}" -a -e intel_pt/pwr_evt/u uname ; then echo "perf record failed with pwr_evt" return 1 fi echo OK return 0 } test_no_tnt() { echo "--- Test with TNT packets disabled ---" # Check if TNT disable is supported notnt=$(cat /sys/bus/event_source/devices/intel_pt/caps/tnt_disable) if [ "${notnt}" != "1" ] ; then echo "SKIP: tnt_disable is not supported" return 2 fi perf_record_no_decode -o "${perfdatafile}" -e intel_pt/notnt/u uname # Should be no TNT packets tnt_cnt=$(perf script -i "${perfdatafile}" -D | grep -c TNT) if [ "${tnt_cnt}" -ne 0 ] ; then echo "TNT packets still there after notnt" return 1 fi echo OK return 0 } test_event_trace() { echo "--- Test with event_trace ---" # Check if event_trace is supported event_trace=$(cat /sys/bus/event_source/devices/intel_pt/caps/event_trace) if [ "${event_trace}" != 1 ] ; then echo "SKIP: event_trace is not supported" return 2 fi if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt/event/u uname ; then echo "perf record failed with event trace" return 1 fi echo OK return 0 } test_pipe() { echo "--- Test with pipe mode ---" # Check if it works with pipe if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf report -q -i- --itrace=i10000 ; then echo "perf record + report failed with pipe mode" return 1 fi if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf inject -b > /dev/null ; then echo "perf record + inject failed with pipe mode" return 1 fi echo OK return 0 } count_result() { if [ "$1" -eq 2 ] ; then skip_cnt=$((skip_cnt + 1)) return fi if [ "$1" -eq 0 ] ; then ok_cnt=$((ok_cnt + 1)) return fi err_cnt=$((err_cnt + 1)) } ret=0 test_system_wide_side_band || ret=$? ; count_result $ret ; ret=0 test_per_thread "" "" || ret=$? ; count_result $ret ; ret=0 test_per_thread "k" "(incl. kernel) " || ret=$? ; count_result $ret ; ret=0 test_jitdump || ret=$? ; count_result $ret ; ret=0 test_packet_filter || ret=$? ; count_result $ret ; ret=0 test_disable_branch || ret=$? ; count_result $ret ; ret=0 test_time_cyc || ret=$? ; count_result $ret ; ret=0 test_sample || ret=$? ; count_result $ret ; ret=0 test_kernel_trace || ret=$? ; count_result $ret ; ret=0 test_virtual_lbr || ret=$? ; count_result $ret ; ret=0 test_power_event || ret=$? ; count_result $ret ; ret=0 test_no_tnt || ret=$? ; count_result $ret ; ret=0 test_event_trace || ret=$? ; count_result $ret ; ret=0 test_pipe || ret=$? ; count_result $ret ; ret=0 cleanup echo "--- Done ---" if [ ${err_cnt} -gt 0 ] ; then exit 1 fi if [ ${ok_cnt} -gt 0 ] ; then exit 0 fi exit 2