11 files changed, 2068 insertions, 4 deletions
diff --git a/tools/perf/tests/shell/lib/attr.py b/tools/perf/tests/shell/lib/attr.py
new file mode 100644
index 000000000000..bfccc727d9b2
--- /dev/null
+++ b/tools/perf/tests/shell/lib/attr.py
@@ -0,0 +1,470 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import configparser
+import os
+import sys
+import glob
+import optparse
+import platform
+import tempfile
+import logging
+import re
+import shutil
+import subprocess
+
+def data_equal(a, b):
+    # Allow multiple values in assignment separated by '|'
+    a_list = a.split('|')
+    b_list = b.split('|')
+
+    for a_item in a_list:
+        for b_item in b_list:
+            if (a_item == b_item):
+                return True
+            elif (a_item == '*') or (b_item == '*'):
+                return True
+
+    return False
+
+class Fail(Exception):
+    def __init__(self, test, msg):
+        self.msg = msg
+        self.test = test
+    def getMsg(self):
+        return '\'%s\' - %s' % (self.test.path, self.msg)
+
+class Notest(Exception):
+    def __init__(self, test, arch):
+        self.arch = arch
+        self.test = test
+    def getMsg(self):
+        return '[%s] \'%s\'' % (self.arch, self.test.path)
+
+class Unsup(Exception):
+    def __init__(self, test):
+        self.test = test
+    def getMsg(self):
+        return '\'%s\'' % self.test.path
+
+class Event(dict):
+    terms = [
+        'cpu',
+        'flags',
+        'type',
+        'size',
+        'config',
+        'sample_period',
+        'sample_type',
+        'read_format',
+        'disabled',
+        'inherit',
+        'pinned',
+        'exclusive',
+        'exclude_user',
+        'exclude_kernel',
+        'exclude_hv',
+        'exclude_idle',
+        'mmap',
+        'comm',
+        'freq',
+        'inherit_stat',
+        'enable_on_exec',
+        'task',
+        'watermark',
+        'precise_ip',
+        'mmap_data',
+        'sample_id_all',
+        'exclude_host',
+        'exclude_guest',
+        'exclude_callchain_kernel',
+        'exclude_callchain_user',
+        'wakeup_events',
+        'bp_type',
+        'config1',
+        'config2',
+        'branch_sample_type',
+        'sample_regs_user',
+        'sample_stack_user',
+    ]
+
+    def add(self, data):
+        for key, val in data:
+            log.debug("      %s = %s" % (key, val))
+            self[key] = val
+
+    def __init__(self, name, data, base):
+        log.debug("    Event %s" % name);
+        self.name  = name;
+        self.group = ''
+        self.add(base)
+        self.add(data)
+
+    def equal(self, other):
+        for t in Event.terms:
+            log.debug("      [%s] %s %s" % (t, self[t], other[t]));
+            if t not in self or t not in other:
+                return False
+            if not data_equal(self[t], other[t]):
+                return False
+        return True
+
+    def optional(self):
+        if 'optional' in self and self['optional'] == '1':
+            return True
+        return False
+
+    def diff(self, other):
+        for t in Event.terms:
+            if t not in self or t not in other:
+                continue
+            if not data_equal(self[t], other[t]):
+                log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
+
+def parse_version(version):
+    if not version:
+        return None
+    return [int(v) for v in version.split(".")[0:2]]
+
+# Test file description needs to have following sections:
+# [config]
+#   - just single instance in file
+#   - needs to specify:
+#     'command' - perf command name
+#     'args'    - special command arguments
+#     'ret'     - Skip test if Perf doesn't exit with this value (0 by default)
+#     'test_ret'- If set to 'true', fail test instead of skipping for 'ret' argument
+#     'arch'    - architecture specific test (optional)
+#                 comma separated list, ! at the beginning
+#                 negates it.
+#     'auxv'    - Truthy statement that is evaled in the scope of the auxv map. When false,
+#                 the test is skipped. For example 'auxv["AT_HWCAP"] == 10'. (optional)
+#     'kernel_since' - Inclusive kernel version from which the test will start running. Only the
+#                      first two values are supported, for example "6.1" (optional)
+#     'kernel_until' - Exclusive kernel version from which the test will stop running. (optional)
+# [eventX:base]
+#   - one or multiple instances in file
+#   - expected values assignments
+class Test(object):
+    def __init__(self, path, options):
+        parser = configparser.ConfigParser()
+        parser.read(path)
+
+        log.warning("running '%s'" % path)
+
+        self.path     = path
+        self.test_dir = options.test_dir
+        self.perf     = options.perf
+        self.command  = parser.get('config', 'command')
+        self.args     = parser.get('config', 'args')
+
+        try:
+            self.ret  = parser.get('config', 'ret')
+        except:
+            self.ret  = 0
+
+        self.test_ret = parser.getboolean('config', 'test_ret', fallback=False)
+
+        try:
+            self.arch  = parser.get('config', 'arch')
+            log.warning("test limitation '%s'" % self.arch)
+        except:
+            self.arch  = ''
+
+        self.auxv = parser.get('config', 'auxv', fallback=None)
+        self.kernel_since = parse_version(parser.get('config', 'kernel_since', fallback=None))
+        self.kernel_until = parse_version(parser.get('config', 'kernel_until', fallback=None))
+        self.expect   = {}
+        self.result   = {}
+        log.debug("  loading expected events");
+        self.load_events(path, self.expect)
+
+    def is_event(self, name):
+        if name.find("event") == -1:
+            return False
+        else:
+            return True
+
+    def skip_test_kernel_since(self):
+        if not self.kernel_since:
+            return False
+        return not self.kernel_since <= parse_version(platform.release())
+
+    def skip_test_kernel_until(self):
+        if not self.kernel_until:
+            return False
+        return not parse_version(platform.release()) < self.kernel_until
+
+    def skip_test_auxv(self):
+        def new_auxv(a, pattern):
+            items = list(filter(None, pattern.split(a)))
+            # AT_HWCAP is hex but doesn't have a prefix, so special case it
+            if items[0] == "AT_HWCAP":
+                value = int(items[-1], 16)
+            else:
+                try:
+                    value = int(items[-1], 0)
+                except:
+                    value = items[-1]
+            return (items[0], value)
+
+        if not self.auxv:
+            return False
+        auxv = subprocess.check_output("LD_SHOW_AUXV=1 sleep 0", shell=True) \
+               .decode(sys.stdout.encoding)
+        pattern = re.compile(r"[: ]+")
+        auxv = dict([new_auxv(a, pattern) for a in auxv.splitlines()])
+        return not eval(self.auxv)
+
+    def skip_test_arch(self, myarch):
+        # If architecture not set always run test
+        if self.arch == '':
+            # log.warning("test for arch %s is ok" % myarch)
+            return False
+
+        # Allow multiple values in assignment separated by ','
+        arch_list = self.arch.split(',')
+
+        # Handle negated list such as !s390x,ppc
+        if arch_list[0][0] == '!':
+            arch_list[0] = arch_list[0][1:]
+            log.warning("excluded architecture list %s" % arch_list)
+            for arch_item in arch_list:
+                # log.warning("test for %s arch is %s" % (arch_item, myarch))
+                if arch_item == myarch:
+                    return True
+            return False
+
+        for arch_item in arch_list:
+            # log.warning("test for architecture '%s' current '%s'" % (arch_item, myarch))
+            if arch_item == myarch:
+                return False
+        return True
+
+    def restore_sample_rate(self, value=10000):
+        try:
+            # Check value of sample_rate
+            with open("/proc/sys/kernel/perf_event_max_sample_rate", "r") as fIn:
+                curr_value = fIn.readline()
+            # If too low restore to reasonable value
+            if not curr_value or int(curr_value) < int(value):
+                with open("/proc/sys/kernel/perf_event_max_sample_rate", "w") as fOut:
+                    fOut.write(str(value))
+
+        except IOError as e:
+            log.warning("couldn't restore sample_rate value: I/O error %s" % e)
+        except ValueError as e:
+            log.warning("couldn't restore sample_rate value: Value error %s" % e)
+        except TypeError as e:
+            log.warning("couldn't restore sample_rate value: Type error %s" % e)
+
+    def load_events(self, path, events):
+        parser_event = configparser.ConfigParser()
+        parser_event.read(path)
+
+        # The event record section header contains 'event' word,
+        # optionaly followed by ':' allowing to load 'parent
+        # event' first as a base
+        for section in filter(self.is_event, parser_event.sections()):
+
+            parser_items = parser_event.items(section);
+            base_items   = {}
+
+            # Read parent event if there's any
+            if (':' in section):
+                base = section[section.index(':') + 1:]
+                parser_base = configparser.ConfigParser()
+                parser_base.read(self.test_dir + '/' + base)
+                base_items = parser_base.items('event')
+
+            e = Event(section, parser_items, base_items)
+            events[section] = e
+
+    def run_cmd(self, tempdir):
+        junk1, junk2, junk3, junk4, myarch = (os.uname())
+
+        if self.skip_test_arch(myarch):
+            raise Notest(self, myarch)
+
+        if self.skip_test_auxv():
+            raise Notest(self, "auxv skip")
+
+        if self.skip_test_kernel_since():
+            raise Notest(self, "old kernel skip")
+
+        if self.skip_test_kernel_until():
+            raise Notest(self, "new kernel skip")
+
+        self.restore_sample_rate()
+        cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
+              self.perf, self.command, tempdir, self.args)
+        ret = os.WEXITSTATUS(os.system(cmd))
+
+        log.info("  '%s' ret '%s', expected '%s'" % (cmd, str(ret), str(self.ret)))
+
+        if not data_equal(str(ret), str(self.ret)):
+            if self.test_ret:
+                raise Fail(self, "Perf exit code failure")
+            else:
+                raise Unsup(self)
+
+    def compare(self, expect, result):
+        match = {}
+
+        log.debug("  compare");
+
+        # For each expected event find all matching
+        # events in result. Fail if there's not any.
+        for exp_name, exp_event in expect.items():
+            exp_list = []
+            res_event = {}
+            log.debug("    matching [%s]" % exp_name)
+            for res_name, res_event in result.items():
+                log.debug("      to [%s]" % res_name)
+                if (exp_event.equal(res_event)):
+                    exp_list.append(res_name)
+                    log.debug("    ->OK")
+                else:
+                    log.debug("    ->FAIL");
+
+            log.debug("    match: [%s] matches %s" % (exp_name, str(exp_list)))
+
+            # we did not any matching event - fail
+            if not exp_list:
+                if exp_event.optional():
+                    log.debug("    %s does not match, but is optional" % exp_name)
+                else:
+                    if not res_event:
+                        log.debug("    res_event is empty");
+                    else:
+                        exp_event.diff(res_event)
+                    raise Fail(self, 'match failure');
+
+            match[exp_name] = exp_list
+
+        # For each defined group in the expected events
+        # check we match the same group in the result.
+        for exp_name, exp_event in expect.items():
+            group = exp_event.group
+
+            if (group == ''):
+                continue
+
+            for res_name in match[exp_name]:
+                res_group = result[res_name].group
+                if res_group not in match[group]:
+                    raise Fail(self, 'group failure')
+
+                log.debug("    group: [%s] matches group leader %s" %
+                         (exp_name, str(match[group])))
+
+        log.debug("  matched")
+
+    def resolve_groups(self, events):
+        for name, event in events.items():
+            group_fd = event['group_fd'];
+            if group_fd == '-1':
+                continue;
+
+            for iname, ievent in events.items():
+                if (ievent['fd'] == group_fd):
+                    event.group = iname
+                    log.debug('[%s] has group leader [%s]' % (name, iname))
+                    break;
+
+    def run(self):
+        tempdir = tempfile.mkdtemp();
+
+        try:
+            # run the test script
+            self.run_cmd(tempdir);
+
+            # load events expectation for the test
+            log.debug("  loading result events");
+            for f in glob.glob(tempdir + '/event*'):
+                self.load_events(f, self.result);
+
+            # resolve group_fd to event names
+            self.resolve_groups(self.expect);
+            self.resolve_groups(self.result);
+
+            # do the expectation - results matching - both ways
+            self.compare(self.expect, self.result)
+            self.compare(self.result, self.expect)
+
+        finally:
+            # cleanup
+            shutil.rmtree(tempdir)
+
+
+def run_tests(options):
+    for f in glob.glob(options.test_dir + '/' + options.test):
+        try:
+            Test(f, options).run()
+        except Unsup as obj:
+            log.warning("unsupp  %s" % obj.getMsg())
+        except Notest as obj:
+            log.warning("skipped %s" % obj.getMsg())
+
+def setup_log(verbose):
+    global log
+    level = logging.CRITICAL
+
+    if verbose == 1:
+        level = logging.WARNING
+    if verbose == 2:
+        level = logging.INFO
+    if verbose >= 3:
+        level = logging.DEBUG
+
+    log = logging.getLogger('test')
+    log.setLevel(level)
+    ch  = logging.StreamHandler()
+    ch.setLevel(level)
+    formatter = logging.Formatter('%(message)s')
+    ch.setFormatter(formatter)
+    log.addHandler(ch)
+
+USAGE = '''%s [OPTIONS]
+  -d dir  # tests dir
+  -p path # perf binary
+  -t test # single test
+  -v      # verbose level
+''' % sys.argv[0]
+
+def main():
+    parser = optparse.OptionParser(usage=USAGE)
+
+    parser.add_option("-t", "--test",
+                      action="store", type="string", dest="test")
+    parser.add_option("-d", "--test-dir",
+                      action="store", type="string", dest="test_dir")
+    parser.add_option("-p", "--perf",
+                      action="store", type="string", dest="perf")
+    parser.add_option("-v", "--verbose",
+                      default=0, action="count", dest="verbose")
+
+    options, args = parser.parse_args()
+    if args:
+        parser.error('FAILED wrong arguments %s' %  ' '.join(args))
+        return -1
+
+    setup_log(options.verbose)
+
+    if not options.test_dir:
+        print('FAILED no -d option specified')
+        sys.exit(-1)
+
+    if not options.test:
+        options.test = 'test*'
+
+    try:
+        run_tests(options)
+
+    except Fail as obj:
+        print("FAILED %s" % obj.getMsg())
+        sys.exit(-1)
+
+    sys.exit(0)
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/perf/tests/shell/lib/coresight.sh b/tools/perf/tests/shell/lib/coresight.sh
new file mode 100644
index 000000000000..184d62e7e5bd
--- /dev/null
+++ b/tools/perf/tests/shell/lib/coresight.sh
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+# This is sourced from a driver script so no need for #!/bin... etc. at the
+# top - the assumption below is that it runs as part of sourcing after the
+# test sets up some basic env vars to say what it is.
+
+# This currently works with ETMv4 / ETF not any other packet types at thi
+# point. This will need changes if that changes.
+
+# perf record options for the perf tests to use
+PERFRECMEM="-m ,16M"
+PERFRECOPT="$PERFRECMEM -e cs_etm//u"
+
+TOOLS=$(dirname $0)
+DIR="$TOOLS/$TEST"
+BIN="$DIR/$TEST"
+# If the test tool/binary does not exist and is executable then skip the test
+if ! test -x "$BIN"; then exit 2; fi
+# If CoreSight is not available, skip the test
+perf list pmu | grep -q cs_etm || exit 2
+DATD="."
+# If the data dir env is set then make the data dir use that instead of ./
+if test -n "$PERF_TEST_CORESIGHT_DATADIR"; then
+	DATD="$PERF_TEST_CORESIGHT_DATADIR";
+fi
+# If the stat dir env is set then make the data dir use that instead of ./
+STATD="."
+if test -n "$PERF_TEST_CORESIGHT_STATDIR"; then
+	STATD="$PERF_TEST_CORESIGHT_STATDIR";
+fi
+
+# Called if the test fails - error code 1
+err() {
+	echo "$1"
+	exit 1
+}
+
+# Check that some statistics from our perf
+check_val_min() {
+	STATF="$4"
+	if test "$2" -lt "$3"; then
+		echo ", FAILED" >> "$STATF"
+		err "Sanity check number of $1 is too low ($2 < $3)"
+	fi
+}
+
+perf_dump_aux_verify() {
+	# Some basic checking that the AUX chunk contains some sensible data
+	# to see that we are recording something and at least a minimum
+	# amount of it. We should almost always see Fn packets in just about
+	# anything but certainly we will see some trace info and async
+	# packets
+	DUMP="$DATD/perf-tmp-aux-dump.txt"
+	perf report --stdio --dump -i "$1" | \
+		grep -o -e I_ATOM_F -e I_ASYNC -e I_TRACE_INFO > "$DUMP"
+	# Simply count how many of these packets we find to see that we are
+	# producing a reasonable amount of data - exact checks are not sane
+	# as this is a lossy process where we may lose some blocks and the
+	# compiler may produce different code depending on the compiler and
+	# optimization options, so this is rough just to see if we're
+	# either missing almost all the data or all of it
+	ATOM_FX_NUM=$(grep -c I_ATOM_F "$DUMP")
+	ASYNC_NUM=$(grep -c I_ASYNC "$DUMP")
+	TRACE_INFO_NUM=$(grep -c I_TRACE_INFO "$DUMP")
+	rm -f "$DUMP"
+
+	# Arguments provide minimums for a pass
+	CHECK_FX_MIN="$2"
+	CHECK_ASYNC_MIN="$3"
+	CHECK_TRACE_INFO_MIN="$4"
+
+	# Write out statistics, so over time you can track results to see if
+	# there is a pattern - for example we have less "noisy" results that
+	# produce more consistent amounts of data each run, to see if over
+	# time any techinques to  minimize data loss are having an effect or
+	# not
+	STATF="$STATD/stats-$TEST-$DATV.csv"
+	if ! test -f "$STATF"; then
+		echo "ATOM Fx Count, Minimum, ASYNC Count, Minimum, TRACE INFO Count, Minimum" > "$STATF"
+	fi
+	echo -n "$ATOM_FX_NUM, $CHECK_FX_MIN, $ASYNC_NUM, $CHECK_ASYNC_MIN, $TRACE_INFO_NUM, $CHECK_TRACE_INFO_MIN" >> "$STATF"
+
+	# Actually check to see if we passed or failed.
+	check_val_min "ATOM_FX" "$ATOM_FX_NUM" "$CHECK_FX_MIN" "$STATF"
+	check_val_min "ASYNC" "$ASYNC_NUM" "$CHECK_ASYNC_MIN" "$STATF"
+	check_val_min "TRACE_INFO" "$TRACE_INFO_NUM" "$CHECK_TRACE_INFO_MIN" "$STATF"
+	echo ", Ok" >> "$STATF"
+}
+
+perf_dump_aux_tid_verify() {
+	# Specifically crafted test will produce a list of Tread ID's to
+	# stdout that need to be checked to  see that they have had trace
+	# info collected in AUX blocks in the perf data. This will go
+	# through all the TID's that are listed as CID=0xabcdef and see
+	# that all the Thread IDs the test tool reports are  in the perf
+	# data AUX chunks
+
+	# The TID test tools will print a TID per stdout line that are being
+	# tested
+	TIDS=$(cat "$2")
+	# Scan the perf report to find the TIDs that are actually CID in hex
+	# and build a list of the ones found
+	FOUND_TIDS=$(perf report --stdio --dump -i "$1" | \
+			grep -o "CID=0x[0-9a-z]\+" | sed 's/CID=//g' | \
+			uniq | sort | uniq)
+	# No CID=xxx found - maybe your kernel is reporting these as
+	# VMID=xxx so look there
+	if test -z "$FOUND_TIDS"; then
+		FOUND_TIDS=$(perf report --stdio --dump -i "$1" | \
+				grep -o "VMID=0x[0-9a-z]\+" | sed 's/VMID=//g' | \
+				uniq | sort | uniq)
+	fi
+
+	# Iterate over the list of TIDs that the test says it has and find
+	# them in the TIDs found in the perf report
+	MISSING=""
+	for TID2 in $TIDS; do
+		FOUND=""
+		for TIDHEX in $FOUND_TIDS; do
+			TID=$(printf "%i" $TIDHEX)
+			if test "$TID" -eq "$TID2"; then
+				FOUND="y"
+				break
+			fi
+		done
+		if test -z "$FOUND"; then
+			MISSING="$MISSING $TID"
+		fi
+	done
+	if test -n "$MISSING"; then
+		err "Thread IDs $MISSING not found in perf AUX data"
+	fi
+}
diff --git a/tools/perf/tests/shell/lib/perf_has_symbol.sh b/tools/perf/tests/shell/lib/perf_has_symbol.sh
new file mode 100644
index 000000000000..0b35cce0b13d
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_has_symbol.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+perf_has_symbol()
+{
+	if perf test -vv -F "Symbols" 2>&1 | grep "[[:space:]]$1$"; then
+		echo "perf does have symbol '$1'"
+		return 0
+	fi
+	echo "perf does not have symbol '$1'"
+	return 1
+}
+
+skip_test_missing_symbol()
+{
+	if ! perf_has_symbol "$1" ; then
+		echo "perf is missing symbols - skipping test"
+		exit 2
+	fi
+	return 0
+}
diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py
new file mode 100644
index 000000000000..dafbde56cc76
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py
@@ -0,0 +1,114 @@
+#!/usr/bin/python
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Basic sanity check of perf JSON output as specified in the man page.
+
+import argparse
+import sys
+import json
+
+ap = argparse.ArgumentParser()
+ap.add_argument('--no-args', action='store_true')
+ap.add_argument('--interval', action='store_true')
+ap.add_argument('--system-wide-no-aggr', action='store_true')
+ap.add_argument('--system-wide', action='store_true')
+ap.add_argument('--event', action='store_true')
+ap.add_argument('--per-core', action='store_true')
+ap.add_argument('--per-thread', action='store_true')
+ap.add_argument('--per-cache', action='store_true')
+ap.add_argument('--per-cluster', action='store_true')
+ap.add_argument('--per-die', action='store_true')
+ap.add_argument('--per-node', action='store_true')
+ap.add_argument('--per-socket', action='store_true')
+ap.add_argument('--metric-only', action='store_true')
+ap.add_argument('--file', type=argparse.FileType('r'), default=sys.stdin)
+args = ap.parse_args()
+
+Lines = args.file.readlines()
+
+def isfloat(num):
+  try:
+    float(num)
+    return True
+  except ValueError:
+    return False
+
+
+def isint(num):
+  try:
+    int(num)
+    return True
+  except ValueError:
+    return False
+
+def is_counter_value(num):
+  return isfloat(num) or num == '<not counted>' or num == '<not supported>'
+
+def is_metric_value(num):
+  return isfloat(num) or num == 'none'
+
+def check_json_output(expected_items):
+  checks = {
+      'counters': lambda x: isfloat(x),
+      'core': lambda x: True,
+      'counter-value': lambda x: is_counter_value(x),
+      'cgroup': lambda x: True,
+      'cpu': lambda x: isint(x),
+      'cache': lambda x: True,
+      'cluster': lambda x: True,
+      'die': lambda x: True,
+      'event': lambda x: True,
+      'event-runtime': lambda x: isfloat(x),
+      'interval': lambda x: isfloat(x),
+      'metric-unit': lambda x: True,
+      'metric-value': lambda x: is_metric_value(x),
+      'metric-threshold': lambda x: x in ['unknown', 'good', 'less good', 'nearly bad', 'bad'],
+      'metricgroup': lambda x: True,
+      'node': lambda x: True,
+      'pcnt-running': lambda x: isfloat(x),
+      'socket': lambda x: True,
+      'thread': lambda x: True,
+      'unit': lambda x: True,
+  }
+  input = '[\n' + ','.join(Lines) + '\n]'
+  for item in json.loads(input):
+    if expected_items != -1:
+      count = len(item)
+      if count not in expected_items and count >= 1 and count <= 7 and 'metric-value' in item:
+        # Events that generate >1 metric may have isolated metric
+        # values and possibly other prefixes like interval, core,
+        # counters, or event-runtime/pcnt-running from multiplexing.
+        pass
+      elif count not in expected_items and count >= 1 and count <= 5 and 'metricgroup' in item:
+        pass
+      elif count - 1 in expected_items and 'metric-threshold' in item:
+          pass
+      elif count in expected_items and 'insn per cycle' in item:
+          pass
+      elif count not in expected_items:
+        raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}'
+                           f' in \'{item}\'')
+    for key, value in item.items():
+      if key not in checks:
+        if args.metric_only:
+          continue
+        raise RuntimeError(f'Unexpected key: key={key} value={value}')
+      if not checks[key](value):
+        raise RuntimeError(f'Check failed for: key={key} value={value}')
+
+
+try:
+  if args.no_args or args.system_wide or args.event:
+    expected_items = [5, 7]
+  elif args.interval or args.per_thread or args.system_wide_no_aggr:
+    expected_items = [6, 8]
+  elif args.per_core or args.per_socket or args.per_node or args.per_die or args.per_cluster or args.per_cache:
+    expected_items = [7, 9]
+  elif args.metric_only:
+    expected_items = [1, 2]
+  else:
+    # If no option is specified, don't check the number of items.
+    expected_items = -1
+  check_json_output(expected_items)
+except:
+  print('Test failed for input:\n' + '\n'.join(Lines))
+  raise
diff --git a/tools/perf/tests/shell/lib/perf_metric_validation.py b/tools/perf/tests/shell/lib/perf_metric_validation.py
new file mode 100644
index 000000000000..dea8ef1977bf
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_metric_validation.py
@@ -0,0 +1,603 @@
+# SPDX-License-Identifier: GPL-2.0
+import re
+import csv
+import json
+import argparse
+from pathlib import Path
+import subprocess
+
+
+class TestError:
+    def __init__(self, metric: list[str], wl: str, value: list[float], low: float, up=float('nan'), description=str()):
+        self.metric: list = metric  # multiple metrics in relationship type tests
+        self.workloads = [wl]  # multiple workloads possible
+        self.collectedValue: list = value
+        self.valueLowBound = low
+        self.valueUpBound = up
+        self.description = description
+
+    def __repr__(self) -> str:
+        if len(self.metric) > 1:
+            return "\nMetric Relationship Error: \tThe collected value of metric {0}\n\
+                \tis {1} in workload(s): {2} \n\
+                \tbut expected value range is [{3}, {4}]\n\
+                \tRelationship rule description: \'{5}\'".format(self.metric, self.collectedValue, self.workloads,
+                                                                 self.valueLowBound, self.valueUpBound, self.description)
+        elif len(self.collectedValue) == 0:
+            return "\nNo Metric Value Error: \tMetric {0} returns with no value \n\
+                    \tworkload(s): {1}".format(self.metric, self.workloads)
+        else:
+            return "\nWrong Metric Value Error: \tThe collected value of metric {0}\n\
+                    \tis {1} in workload(s): {2}\n\
+                    \tbut expected value range is [{3}, {4}]"\
+                        .format(self.metric, self.collectedValue, self.workloads,
+                                self.valueLowBound, self.valueUpBound)
+
+
+class Validator:
+    def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='',
+                 workload='true', metrics='', cputype='cpu'):
+        self.rulefname = rulefname
+        self.reportfname = reportfname
+        self.rules = None
+        self.collectlist: str = metrics
+        self.metrics = self.__set_metrics(metrics)
+        self.skiplist = set()
+        self.tolerance = t
+        self.cputype = cputype
+
+        self.workloads = [x for x in workload.split(",") if x]
+        self.wlidx = 0  # idx of current workloads
+        self.allresults = dict()  # metric results of all workload
+        self.alltotalcnt = dict()
+        self.allpassedcnt = dict()
+
+        self.results = dict()  # metric results of current workload
+        # vars for test pass/failure statistics
+        # metrics with no results or negative results, neg result counts failed tests
+        self.ignoremetrics = set()
+        self.totalcnt = 0
+        self.passedcnt = 0
+        # vars for errors
+        self.errlist = list()
+
+        # vars for Rule Generator
+        self.pctgmetrics = set()  # Percentage rule
+
+        # vars for debug
+        self.datafname = datafname
+        self.debug = debug
+        self.fullrulefname = fullrulefname
+
+    def __set_metrics(self, metrics=''):
+        if metrics != '':
+            return set(metrics.split(","))
+        else:
+            return set()
+
+    def read_json(self, filename: str) -> dict:
+        try:
+            with open(Path(filename).resolve(), "r") as f:
+                data = json.loads(f.read())
+        except OSError as e:
+            print(f"Error when reading file {e}")
+            sys.exit()
+
+        return data
+
+    def json_dump(self, data, output_file):
+        parent = Path(output_file).parent
+        if not parent.exists():
+            parent.mkdir(parents=True)
+
+        with open(output_file, "w+") as output_file:
+            json.dump(data,
+                      output_file,
+                      ensure_ascii=True,
+                      indent=4)
+
+    def get_results(self, idx: int = 0):
+        return self.results.get(idx)
+
+    def get_bounds(self, lb, ub, error, alias={}, ridx: int = 0) -> list:
+        """
+        Get bounds and tolerance from lb, ub, and error.
+        If missing lb, use 0.0; missing ub, use float('inf); missing error, use self.tolerance.
+
+        @param lb: str/float, lower bound
+        @param ub: str/float, upper bound
+        @param error: float/str, error tolerance
+        @returns: lower bound, return inf if the lower bound is a metric value and is not collected
+                  upper bound, return -1 if the upper bound is a metric value and is not collected
+                  tolerance, denormalized base on upper bound value
+        """
+        # init ubv and lbv to invalid values
+        def get_bound_value(bound, initval, ridx):
+            val = initval
+            if isinstance(bound, int) or isinstance(bound, float):
+                val = bound
+            elif isinstance(bound, str):
+                if bound == '':
+                    val = float("inf")
+                elif bound in alias:
+                    vall = self.get_value(alias[ub], ridx)
+                    if vall:
+                        val = vall[0]
+                elif bound.replace('.', '1').isdigit():
+                    val = float(bound)
+                else:
+                    print("Wrong bound: {0}".format(bound))
+            else:
+                print("Wrong bound: {0}".format(bound))
+            return val
+
+        ubv = get_bound_value(ub, -1, ridx)
+        lbv = get_bound_value(lb, float('inf'), ridx)
+        t = get_bound_value(error, self.tolerance, ridx)
+
+        # denormalize error threshold
+        denormerr = t * ubv / 100 if ubv != 100 and ubv > 0 else t
+
+        return lbv, ubv, denormerr
+
+    def get_value(self, name: str, ridx: int = 0) -> list:
+        """
+        Get value of the metric from self.results.
+        If result of this metric is not provided, the metric name will be added into self.ignoremetics.
+        All future test(s) on this metric will fail.
+
+        @param name: name of the metric
+        @returns: list with value found in self.results; list is empty when value is not found.
+        """
+        results = []
+        data = self.results[ridx] if ridx in self.results else self.results[0]
+        if name not in self.ignoremetrics:
+            if name in data:
+                results.append(data[name])
+            elif name.replace('.', '1').isdigit():
+                results.append(float(name))
+            else:
+                self.ignoremetrics.add(name)
+        return results
+
+    def check_bound(self, val, lb, ub, err):
+        return True if val <= ub + err and val >= lb - err else False
+
+    # Positive Value Sanity check
+    def pos_val_test(self):
+        """
+        Check if metrics value are non-negative.
+        One metric is counted as one test.
+        Failure: when metric value is negative or not provided.
+        Metrics with negative value will be added into self.ignoremetrics.
+        """
+        negmetric = dict()
+        pcnt = 0
+        tcnt = 0
+        rerun = list()
+        results = self.get_results()
+        if not results:
+            return
+        for name, val in results.items():
+            if val < 0:
+                negmetric[name] = val
+                rerun.append(name)
+            else:
+                pcnt += 1
+            tcnt += 1
+        # The first round collect_perf() run these metrics with simple workload
+        # "true". We give metrics a second chance with a longer workload if less
+        # than 20 metrics failed positive test.
+        if len(rerun) > 0 and len(rerun) < 20:
+            second_results = dict()
+            self.second_test(rerun, second_results)
+            for name, val in second_results.items():
+                if name not in negmetric:
+                    continue
+                if val >= 0:
+                    del negmetric[name]
+                    pcnt += 1
+
+        if len(negmetric.keys()):
+            self.ignoremetrics.update(negmetric.keys())
+            self.errlist.extend(
+                [TestError([m], self.workloads[self.wlidx], negmetric[m], 0) for m in negmetric.keys()])
+
+        return
+
+    def evaluate_formula(self, formula: str, alias: dict, ridx: int = 0):
+        """
+        Evaluate the value of formula.
+
+        @param formula: the formula to be evaluated
+        @param alias: the dict has alias to metric name mapping
+        @returns: value of the formula is success; -1 if the one or more metric value not provided
+        """
+        stack = []
+        b = 0
+        errs = []
+        sign = "+"
+        f = str()
+
+        # TODO: support parenthesis?
+        for i in range(len(formula)):
+            if i+1 == len(formula) or formula[i] in ('+', '-', '*', '/'):
+                s = alias[formula[b:i]] if i + \
+                    1 < len(formula) else alias[formula[b:]]
+                v = self.get_value(s, ridx)
+                if not v:
+                    errs.append(s)
+                else:
+                    f = f + "{0}(={1:.4f})".format(s, v[0])
+                    if sign == "*":
+                        stack[-1] = stack[-1] * v
+                    elif sign == "/":
+                        stack[-1] = stack[-1] / v
+                    elif sign == '-':
+                        stack.append(-v[0])
+                    else:
+                        stack.append(v[0])
+                if i + 1 < len(formula):
+                    sign = formula[i]
+                    f += sign
+                    b = i + 1
+
+        if len(errs) > 0:
+            return -1, "Metric value missing: "+','.join(errs)
+
+        val = sum(stack)
+        return val, f
+
+    # Relationships Tests
+    def relationship_test(self, rule: dict):
+        """
+        Validate if the metrics follow the required relationship in the rule.
+        eg. lower_bound <= eval(formula)<= upper_bound
+        One rule is counted as ont test.
+        Failure: when one or more metric result(s) not provided, or when formula evaluated outside of upper/lower bounds.
+
+        @param rule: dict with metric name(+alias), formula, and required upper and lower bounds.
+        """
+        alias = dict()
+        for m in rule['Metrics']:
+            alias[m['Alias']] = m['Name']
+        lbv, ubv, t = self.get_bounds(
+            rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
+        val, f = self.evaluate_formula(
+            rule['Formula'], alias, ridx=rule['RuleIndex'])
+
+        lb = rule['RangeLower']
+        ub = rule['RangeUpper']
+        if isinstance(lb, str):
+            if lb in alias:
+                lb = alias[lb]
+        if isinstance(ub, str):
+            if ub in alias:
+                ub = alias[ub]
+
+        if val == -1:
+            self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [],
+                                lb, ub, rule['Description']))
+        elif not self.check_bound(val, lbv, ubv, t):
+            self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [val],
+                                lb, ub, rule['Description']))
+        else:
+            self.passedcnt += 1
+        self.totalcnt += 1
+
+        return
+
+    # Single Metric Test
+    def single_test(self, rule: dict):
+        """
+        Validate if the metrics are in the required value range.
+        eg. lower_bound <= metrics_value <= upper_bound
+        One metric is counted as one test in this type of test.
+        One rule may include one or more metrics.
+        Failure: when the metric value not provided or the value is outside the bounds.
+        This test updates self.total_cnt.
+
+        @param rule: dict with metrics to validate and the value range requirement
+        """
+        lbv, ubv, t = self.get_bounds(
+            rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
+        metrics = rule['Metrics']
+        passcnt = 0
+        totalcnt = 0
+        failures = dict()
+        rerun = list()
+        for m in metrics:
+            totalcnt += 1
+            result = self.get_value(m['Name'])
+            if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t) or m['Name'] in self.skiplist:
+                passcnt += 1
+            else:
+                failures[m['Name']] = result
+                rerun.append(m['Name'])
+
+        if len(rerun) > 0 and len(rerun) < 20:
+            second_results = dict()
+            self.second_test(rerun, second_results)
+            for name, val in second_results.items():
+                if name not in failures:
+                    continue
+                if self.check_bound(val, lbv, ubv, t):
+                    passcnt += 1
+                    del failures[name]
+                else:
+                    failures[name] = [val]
+                    self.results[0][name] = val
+
+        self.totalcnt += totalcnt
+        self.passedcnt += passcnt
+        if len(failures.keys()) != 0:
+            self.errlist.extend([TestError([name], self.workloads[self.wlidx], val,
+                                rule['RangeLower'], rule['RangeUpper']) for name, val in failures.items()])
+
+        return
+
+    def create_report(self):
+        """
+        Create final report and write into a JSON file.
+        """
+        print(self.errlist)
+
+        if self.debug:
+            allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]}
+                      for i in range(0, len(self.workloads))]
+            self.json_dump(allres, self.datafname)
+
+    def check_rule(self, testtype, metric_list):
+        """
+        Check if the rule uses metric(s) that not exist in current platform.
+
+        @param metric_list: list of metrics from the rule.
+        @return: False when find one metric out in Metric file. (This rule should not skipped.)
+                 True when all metrics used in the rule are found in Metric file.
+        """
+        if testtype == "RelationshipTest":
+            for m in metric_list:
+                if m['Name'] not in self.metrics:
+                    return False
+        return True
+
+    # Start of Collector and Converter
+    def convert(self, data: list, metricvalues: dict):
+        """
+        Convert collected metric data from the -j output to dict of {metric_name:value}.
+        """
+        for json_string in data:
+            try:
+                result = json.loads(json_string)
+                if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
+                    name = result["metric-unit"].split("  ")[1] if len(result["metric-unit"].split("  ")) > 1 \
+                        else result["metric-unit"]
+                    metricvalues[name.lower()] = float(result["metric-value"])
+            except ValueError as error:
+                continue
+        return
+
+    def _run_perf(self, metric, workload: str):
+        tool = 'perf'
+        command = [tool, 'stat', '--cputype', self.cputype, '-j', '-M', f"{metric}", "-a"]
+        wl = workload.split()
+        command.extend(wl)
+        print(" ".join(command))
+        cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
+        data = [x+'}' for x in cmd.stderr.split('}\n') if x]
+        if data[0][0] != '{':
+            data[0] = data[0][data[0].find('{'):]
+        return data
+
+    def collect_perf(self, workload: str):
+        """
+        Collect metric data with "perf stat -M" on given workload with -a and -j.
+        """
+        self.results = dict()
+        print(f"Starting perf collection")
+        print(f"Long workload: {workload}")
+        collectlist = dict()
+        if self.collectlist != "":
+            collectlist[0] = {x for x in self.collectlist.split(",")}
+        else:
+            collectlist[0] = set(list(self.metrics))
+        # Create metric set for relationship rules
+        for rule in self.rules:
+            if rule["TestType"] == "RelationshipTest":
+                metrics = [m["Name"] for m in rule["Metrics"]]
+                if not any(m not in collectlist[0] for m in metrics):
+                    collectlist[rule["RuleIndex"]] = [
+                        ",".join(list(set(metrics)))]
+
+        for idx, metrics in collectlist.items():
+            if idx == 0:
+                wl = "true"
+            else:
+                wl = workload
+            for metric in metrics:
+                data = self._run_perf(metric, wl)
+                if idx not in self.results:
+                    self.results[idx] = dict()
+                self.convert(data, self.results[idx])
+        return
+
+    def second_test(self, collectlist, second_results):
+        workload = self.workloads[self.wlidx]
+        for metric in collectlist:
+            data = self._run_perf(metric, workload)
+            self.convert(data, second_results)
+
+    # End of Collector and Converter
+
+    # Start of Rule Generator
+    def parse_perf_metrics(self):
+        """
+        Read and parse perf metric file:
+        1) find metrics with '1%' or '100%' as ScaleUnit for Percent check
+        2) create metric name list
+        """
+        command = ['perf', 'list', '-j', '--details', 'metrics']
+        cmd = subprocess.run(command, stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE, encoding='utf-8')
+        try:
+            data = json.loads(cmd.stdout)
+            for m in data:
+                if 'MetricName' not in m:
+                    print("Warning: no metric name")
+                    continue
+                if 'Unit' in m and m['Unit'] != self.cputype:
+                    continue
+                name = m['MetricName'].lower()
+                self.metrics.add(name)
+                if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
+                    self.pctgmetrics.add(name.lower())
+        except ValueError as error:
+            print(f"Error when parsing metric data")
+            sys.exit()
+
+        return
+
+    def remove_unsupported_rules(self, rules):
+        new_rules = []
+        for rule in rules:
+            add_rule = True
+            for m in rule["Metrics"]:
+                if m["Name"] in self.skiplist or m["Name"] not in self.metrics:
+                    add_rule = False
+                    break
+            if add_rule:
+                new_rules.append(rule)
+        return new_rules
+
+    def create_rules(self):
+        """
+        Create full rules which includes:
+        1) All the rules from the "relationshi_rules" file
+        2) SingleMetric rule for all the 'percent' metrics
+
+        Reindex all the rules to avoid repeated RuleIndex
+        """
+        data = self.read_json(self.rulefname)
+        rules = data['RelationshipRules']
+        self.skiplist = set([name.lower() for name in data['SkipList']])
+        self.rules = self.remove_unsupported_rules(rules)
+        pctgrule = {'RuleIndex': 0,
+                    'TestType': 'SingleMetricTest',
+                    'RangeLower': '0',
+                    'RangeUpper': '100',
+                    'ErrorThreshold': self.tolerance,
+                    'Description': 'Metrics in percent unit have value with in [0, 100]',
+                    'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
+        self.rules.append(pctgrule)
+
+        # Re-index all rules to avoid repeated RuleIndex
+        idx = 1
+        for r in self.rules:
+            r['RuleIndex'] = idx
+            idx += 1
+
+        if self.debug:
+            # TODO: need to test and generate file name correctly
+            data = {'RelationshipRules': self.rules, 'SupportedMetrics': [
+                {"MetricName": name} for name in self.metrics]}
+            self.json_dump(data, self.fullrulefname)
+
+        return
+    # End of Rule Generator
+
+    def _storewldata(self, key):
+        '''
+        Store all the data of one workload into the corresponding data structure for all workloads.
+        @param key: key to the dictionaries (index of self.workloads).
+        '''
+        self.allresults[key] = self.results
+        self.alltotalcnt[key] = self.totalcnt
+        self.allpassedcnt[key] = self.passedcnt
+
+    # Initialize data structures before data validation of each workload
+    def _init_data(self):
+
+        testtypes = ['PositiveValueTest',
+                     'RelationshipTest', 'SingleMetricTest']
+        self.results = dict()
+        self.ignoremetrics = set()
+        self.errlist = list()
+        self.totalcnt = 0
+        self.passedcnt = 0
+
+    def test(self):
+        '''
+        The real entry point of the test framework.
+        This function loads the validation rule JSON file and Standard Metric file to create rules for
+        testing and namemap dictionaries.
+        It also reads in result JSON file for testing.
+
+        In the test process, it passes through each rule and launch correct test function bases on the
+        'TestType' field of the rule.
+
+        The final report is written into a JSON file.
+        '''
+        if not self.collectlist:
+            self.parse_perf_metrics()
+        if not self.metrics:
+            print("No metric found for testing")
+            return 0
+        self.create_rules()
+        for i in range(0, len(self.workloads)):
+            self.wlidx = i
+            self._init_data()
+            self.collect_perf(self.workloads[i])
+            # Run positive value test
+            self.pos_val_test()
+            for r in self.rules:
+                # skip rules that uses metrics not exist in this platform
+                testtype = r['TestType']
+                if not self.check_rule(testtype, r['Metrics']):
+                    continue
+                if testtype == 'RelationshipTest':
+                    self.relationship_test(r)
+                elif testtype == 'SingleMetricTest':
+                    self.single_test(r)
+                else:
+                    print("Unsupported Test Type: ", testtype)
+            print("Workload: ", self.workloads[i])
+            print("Total Test Count: ", self.totalcnt)
+            print("Passed Test Count: ", self.passedcnt)
+            self._storewldata(i)
+        self.create_report()
+        return len(self.errlist) > 0
+# End of Class Validator
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Launch metric value validation")
+
+    parser.add_argument(
+        "-rule", help="Base validation rule file", required=True)
+    parser.add_argument(
+        "-output_dir", help="Path for validator output file, report file", required=True)
+    parser.add_argument("-debug", help="Debug run, save intermediate data to files",
+                        action="store_true", default=False)
+    parser.add_argument(
+        "-wl", help="Workload to run while data collection", default="true")
+    parser.add_argument("-m", help="Metric list to validate", default="")
+    parser.add_argument("-cputype", help="Only test metrics for the given CPU/PMU type",
+                        default="cpu")
+    args = parser.parse_args()
+    outpath = Path(args.output_dir)
+    reportf = Path.joinpath(outpath, 'perf_report.json')
+    fullrule = Path.joinpath(outpath, 'full_rule.json')
+    datafile = Path.joinpath(outpath, 'perf_data.json')
+
+    validator = Validator(args.rule, reportf, debug=args.debug,
+                          datafname=datafile, fullrulefname=fullrule, workload=args.wl,
+                          metrics=args.m, cputype=args.cputype)
+    ret = validator.test()
+
+    return ret
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())
diff --git a/tools/perf/tests/shell/lib/perf_metric_validation_rules.json b/tools/perf/tests/shell/lib/perf_metric_validation_rules.json
new file mode 100644
index 000000000000..eb6f59e018b7
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_metric_validation_rules.json
@@ -0,0 +1,398 @@
+{
+    "SkipList": [
+        "tsx_aborted_cycles",
+        "tsx_transactional_cycles",
+        "C2_Pkg_Residency",
+        "C6_Pkg_Residency",
+        "C1_Core_Residency",
+        "C6_Core_Residency",
+        "tma_false_sharing",
+        "tma_remote_cache",
+        "tma_contested_accesses"
+    ],
+    "RelationshipRules": [
+        {
+            "RuleIndex": 1,
+            "Formula": "a+b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "c",
+            "RangeUpper": "c",
+            "ErrorThreshold": 5.0,
+            "Description": "Intel(R) Optane(TM) Persistent Memory(PMEM)  bandwidth total includes Intel(R) Optane(TM) Persistent Memory(PMEM) read bandwidth and Intel(R) Optane(TM) Persistent Memory(PMEM) write bandwidth",
+            "Metrics": [
+                {
+                    "Name": "pmem_memory_bandwidth_read",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "pmem_memory_bandwidth_write",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "pmem_memory_bandwidth_total",
+                    "Alias": "c"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 2,
+            "Formula": "a+b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "c",
+            "RangeUpper": "c",
+            "ErrorThreshold": 5.0,
+            "Description": "DDR memory bandwidth total includes DDR memory read bandwidth and DDR memory write bandwidth",
+            "Metrics": [
+                {
+                    "Name": "memory_bandwidth_read",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "memory_bandwidth_write",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "memory_bandwidth_total",
+                    "Alias": "c"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 3,
+            "Formula": "a+b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "100",
+            "RangeUpper": "100",
+            "ErrorThreshold": 5.0,
+            "Description": "Total memory read accesses includes memory reads from last level cache (LLC) addressed to local DRAM and memory reads from the last level cache (LLC) addressed to remote DRAM.",
+            "Metrics": [
+                {
+                    "Name": "numa_reads_addressed_to_local_dram",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "numa_reads_addressed_to_remote_dram",
+                    "Alias": "b"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 4,
+            "Formula": "a",
+            "TestType": "SingleMetricTest",
+            "RangeLower": "0.125",
+            "RangeUpper": "",
+            "ErrorThreshold": "",
+            "Description": "",
+            "Metrics": [
+                {
+                    "Name": "cpi",
+                    "Alias": "a"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 5,
+            "Formula": "",
+            "TestType": "SingleMetricTest",
+            "RangeLower": "0",
+            "RangeUpper": "1",
+            "ErrorThreshold": 5.0,
+            "Description": "Ratio values should be within value range [0,1)",
+            "Metrics": [
+                {
+                    "Name": "loads_per_instr",
+                    "Alias": ""
+                },
+                {
+                    "Name": "stores_per_instr",
+                    "Alias": ""
+                },
+                {
+                    "Name": "l1d_mpi",
+                    "Alias": ""
+                },
+                {
+                    "Name": "l1d_demand_data_read_hits_per_instr",
+                    "Alias": ""
+                },
+                {
+                    "Name": "l1_i_code_read_misses_with_prefetches_per_instr",
+                    "Alias": ""
+                },
+                {
+                    "Name": "l2_demand_data_read_hits_per_instr",
+                    "Alias": ""
+                },
+                {
+                    "Name": "l2_mpi",
+                    "Alias": ""
+                },
+                {
+                    "Name": "l2_demand_data_read_mpi",
+                    "Alias": ""
+                },
+                {
+                    "Name": "l2_demand_code_mpi",
+                    "Alias": ""
+                }
+            ]
+        },
+        {
+            "RuleIndex": 6,
+            "Formula": "a+b+c+d",
+            "TestType": "RelationshipTest",
+            "RangeLower": "100",
+            "RangeUpper": "100",
+            "ErrorThreshold": 5.0,
+            "Description": "Sum of TMA level 1 metrics should be 100%",
+            "Metrics": [
+                {
+                    "Name": "tma_frontend_bound",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "tma_bad_speculation",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "tma_backend_bound",
+                    "Alias": "c"
+                },
+                {
+                    "Name": "tma_retiring",
+                    "Alias": "d"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 7,
+            "Formula": "a+b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "c",
+            "RangeUpper": "c",
+            "ErrorThreshold": 5.0,
+            "Description": "Sum of the level 2 children should equal level 1 parent",
+            "Metrics": [
+                {
+                    "Name": "tma_fetch_latency",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "tma_fetch_bandwidth",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "tma_frontend_bound",
+                    "Alias": "c"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 8,
+            "Formula": "a+b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "c",
+            "RangeUpper": "c",
+            "ErrorThreshold": 5.0,
+            "Description": "Sum of the level 2 children should equal level 1 parent",
+            "Metrics": [
+                {
+                    "Name": "tma_branch_mispredicts",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "tma_machine_clears",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "tma_bad_speculation",
+                    "Alias": "c"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 9,
+            "Formula": "a+b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "c",
+            "RangeUpper": "c",
+            "ErrorThreshold": 5.0,
+            "Description": "Sum of the level 2 children should equal level 1 parent",
+            "Metrics": [
+                {
+                    "Name": "tma_memory_bound",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "tma_core_bound",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "tma_backend_bound",
+                    "Alias": "c"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 10,
+            "Formula": "a+b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "c",
+            "RangeUpper": "c",
+            "ErrorThreshold": 5.0,
+            "Description": "Sum of the level 2 children should equal level 1 parent",
+            "Metrics": [
+                {
+                    "Name": "tma_light_operations",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "tma_heavy_operations",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "tma_retiring",
+                    "Alias": "c"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 11,
+            "Formula": "a+b+c",
+            "TestType": "RelationshipTest",
+            "RangeLower": "100",
+            "RangeUpper": "100",
+            "ErrorThreshold": 5.0,
+            "Description": "The all_requests includes the memory_page_empty, memory_page_misses, and memory_page_hits equals.",
+            "Metrics": [
+                {
+                    "Name": "memory_page_empty_vs_all_requests",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "memory_page_misses_vs_all_requests",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "memory_page_hits_vs_all_requests",
+                    "Alias": "c"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 12,
+            "Formula": "a-b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "0",
+            "RangeUpper": "",
+            "ErrorThreshold": 5.0,
+            "Description": "CPU utilization in kernel mode should always be <= cpu utilization",
+            "Metrics": [
+                {
+                    "Name": "cpu_utilization",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "cpu_utilization_in_kernel_mode",
+                    "Alias": "b"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 13,
+            "Formula": "a-b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "0",
+            "RangeUpper": "",
+            "ErrorThreshold": 5.0,
+            "Description": "Total L2 misses per instruction should be >= L2 demand data read misses per instruction",
+            "Metrics": [
+                {
+                    "Name": "l2_mpi",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "l2_demand_data_read_mpi",
+                    "Alias": "b"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 14,
+            "Formula": "a-b",
+            "TestType": "RelationshipTest",
+            "RangeLower": "0",
+            "RangeUpper": "",
+            "ErrorThreshold": 5.0,
+            "Description": "Total L2 misses per instruction should be >= L2 demand code misses per instruction",
+            "Metrics": [
+                {
+                    "Name": "l2_mpi",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "l2_demand_code_mpi",
+                    "Alias": "b"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 15,
+            "Formula": "b+c+d",
+            "TestType": "RelationshipTest",
+            "RangeLower": "a",
+            "RangeUpper": "a",
+            "ErrorThreshold": 5.0,
+            "Description": "L3 data read, rfo, code misses per instruction equals total L3 misses per instruction.",
+            "Metrics": [
+                {
+                    "Name": "llc_mpi",
+                    "Alias": "a"
+                },
+                {
+                    "Name": "llc_data_read_mpi_demand_plus_prefetch",
+                    "Alias": "b"
+                },
+                {
+                    "Name": "llc_rfo_read_mpi_demand_plus_prefetch",
+                    "Alias": "c"
+                },
+                {
+                    "Name": "llc_code_read_mpi_demand_plus_prefetch",
+                    "Alias": "d"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 16,
+            "Formula": "a",
+            "TestType": "SingleMetricTest",
+            "RangeLower": "0",
+            "RangeUpper": "8",
+            "ErrorThreshold": 0.0,
+            "Description": "Setting generous range for allowable frequencies",
+            "Metrics": [
+                {
+                    "Name": "uncore_freq",
+                    "Alias": "a"
+                }
+            ]
+        },
+        {
+            "RuleIndex": 17,
+            "Formula": "a",
+            "TestType": "SingleMetricTest",
+            "RangeLower": "0",
+            "RangeUpper": "8",
+            "ErrorThreshold": 0.0,
+            "Description": "Setting generous range for allowable frequencies",
+            "Metrics": [
+                {
+                    "Name": "cpu_operating_frequency",
+                    "Alias": "a"
+                }
+            ]
+        }
+    ]
+}
+\ No newline at end of file
diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh
index 6293cc660947..5aa6e2ec5734 100644
--- a/tools/perf/tests/shell/lib/probe.sh
+++ b/tools/perf/tests/shell/lib/probe.sh
@@ -1,6 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
 skip_if_no_perf_probe() {
 	perf probe 2>&1 | grep -q 'is not a perf-command' && return 2
 	return 0
 }
+
+skip_if_no_perf_trace() {
+	perf trace -h 2>&1 | grep -q -e 'is not a perf-command' -e 'trace command not available' && return 2
+	return 0
+}
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 7cb99b433888..88cd0e26d5f6 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
 perf probe -l 2>&1 | grep -q probe:vfs_getname
@@ -10,15 +11,42 @@ cleanup_probe_vfs_getname() {
 }
 
 add_probe_vfs_getname() {
-	local verbose=$1
+	add_probe_verbose=$1
 	if [ $had_vfs_getname -eq 1 ] ; then
-		line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
+		result_initname_re="[[:space:]]+([[:digit:]]+)[[:space:]]+initname.*"
+		line=$(perf probe -L getname_flags 2>&1 | grep -E "$result_initname_re" | sed -r "s/$result_initname_re/\1/")
+
+		# Search the old regular expressions so that this will
+		# pass on older kernels as well.
+		if [ -z "$line" ] ; then
+			result_filename_re="[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*"
+			line=$(perf probe -L getname_flags 2>&1 | grep -E "$result_filename_re" | sed -r "s/$result_filename_re/\1/")
+		fi
+
+		if [ -z "$line" ] ; then
+			result_aname_re="[[:space:]]+([[:digit:]]+)[[:space:]]+result->aname = NULL;"
+			line=$(perf probe -L getname_flags 2>&1 | grep -E "$result_aname_re" | sed -r "s/$result_aname_re/\1/")
+		fi
+
+		if [ -z "$line" ] ; then
+			echo "Could not find probeable line"
+			return 2
+		fi
+
 		perf probe -q       "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
-		perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string"
+		perf probe $add_probe_verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring" || return 1
 	fi
 }
 
 skip_if_no_debuginfo() {
-	add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for kernel|Debuginfo-analysis is not supported)" && return 2
+	add_probe_vfs_getname -v 2>&1 | grep -E -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)|(file has no debug information)" && return 2
 	return 1
 }
+
+# check if perf is compiled with libtraceevent support
+skip_no_probe_record_support() {
+	if [ $had_vfs_getname -eq 1 ] ; then
+		perf check feature -q libtraceevent && return 1
+		return 2
+	fi
+}
diff --git a/tools/perf/tests/shell/lib/setup_python.sh b/tools/perf/tests/shell/lib/setup_python.sh
new file mode 100644
index 000000000000..a58e5536f2ed
--- /dev/null
+++ b/tools/perf/tests/shell/lib/setup_python.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+if [ "x$PYTHON" = "x" ]
+then
+  python3 --version >/dev/null 2>&1 && PYTHON=python3
+fi
+if [ "x$PYTHON" = "x" ]
+then
+  python --version >/dev/null 2>&1 && PYTHON=python
+fi
+if [ "x$PYTHON" = "x" ]
+then
+  echo Skipping test, python not detected please set environment variable PYTHON.
+  exit 2
+fi
diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh
new file mode 100644
index 000000000000..3c36e80fe422
--- /dev/null
+++ b/tools/perf/tests/shell/lib/stat_output.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Return true if perf_event_paranoid is > $1 and not running as root.
+function ParanoidAndNotRoot()
+{
+	 [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ]
+}
+
+# $1 name $2 extra_opt
+check_no_args()
+{
+        echo -n "Checking $1 output: no args "
+        perf stat $2 true
+        commachecker --no-args
+        echo "[Success]"
+}
+
+check_system_wide()
+{
+	echo -n "Checking $1 output: system wide "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat -a $2 true
+	commachecker --system-wide
+	echo "[Success]"
+}
+
+check_system_wide_no_aggr()
+{
+	echo -n "Checking $1 output: system wide no aggregation "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat -A -a --no-merge $2 true
+	commachecker --system-wide-no-aggr
+	echo "[Success]"
+}
+
+check_interval()
+{
+	echo -n "Checking $1 output: interval "
+	perf stat -I 1000 $2 true
+	commachecker --interval
+	echo "[Success]"
+}
+
+check_event()
+{
+	echo -n "Checking $1 output: event "
+	perf stat -e cpu-clock $2 true
+	commachecker --event
+	echo "[Success]"
+}
+
+check_per_core()
+{
+	echo -n "Checking $1 output: per core "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat --per-core -a $2 true
+	commachecker --per-core
+	echo "[Success]"
+}
+
+check_per_thread()
+{
+	echo -n "Checking $1 output: per thread "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat --per-thread -p $$ $2 true
+	commachecker --per-thread
+	echo "[Success]"
+}
+
+check_per_cache_instance()
+{
+	echo -n "Checking $1 output: per cache instance "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat --per-cache -a $2 true
+	commachecker --per-cache
+	echo "[Success]"
+}
+
+check_per_cluster()
+{
+	echo -n "Checking $1 output: per cluster "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat --per-cluster -a $2 true
+	echo "[Success]"
+}
+
+check_per_die()
+{
+	echo -n "Checking $1 output: per die "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat --per-die -a $2 true
+	commachecker --per-die
+	echo "[Success]"
+}
+
+check_per_node()
+{
+	echo -n "Checking $1 output: per node "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat --per-node -a $2 true
+	commachecker --per-node
+	echo "[Success]"
+}
+
+check_per_socket()
+{
+	echo -n "Checking $1 output: per socket "
+	if ParanoidAndNotRoot 0
+	then
+		echo "[Skip] paranoid and not root"
+		return
+	fi
+	perf stat --per-socket -a $2 true
+	commachecker --per-socket
+	echo "[Success]"
+}
+
+check_metric_only()
+{
+	echo -n "Checking $1 output: metric only "
+	if [ "$(uname -m)" = "s390x" ] && ! grep '^facilities' /proc/cpuinfo  | grep -qw 67
+	then
+		echo "[Skip] CPU-measurement counter facility not installed"
+		return
+	fi
+	perf stat --metric-only $2 -M page_faults_per_second true
+	commachecker --metric-only
+	echo "[Success]"
+}
+
+# The perf stat options for per-socket, per-core, per-die
+# and -A ( no_aggr mode ) uses the info fetched from this
+# directory: "/sys/devices/system/cpu/cpu*/topology". For
+# example, socket value is fetched from "physical_package_id"
+# file in topology directory.
+# Reference: cpu__get_topology_int in util/cpumap.c
+# If the platform doesn't expose topology information, values
+# will be set to -1. For example, incase of pSeries platform
+# of powerpc, value for  "physical_package_id" is restricted
+# and set to -1. Check here validates the socket-id read from
+# topology file before proceeding further
+
+FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
+FILE_NAME="physical_package_id"
+
+function check_for_topology()
+{
+	if ! ParanoidAndNotRoot 0
+	then
+		socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
+		[ -z $socket_file ] && {
+			echo 0
+			return
+		}
+		socket_id=`cat $socket_file`
+		[ $socket_id == -1 ] && {
+			echo 1
+			return
+		}
+	fi
+	echo 0
+}
diff --git a/tools/perf/tests/shell/lib/waiting.sh b/tools/perf/tests/shell/lib/waiting.sh
new file mode 100644
index 000000000000..3a152892e077
--- /dev/null
+++ b/tools/perf/tests/shell/lib/waiting.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+tenths=date\ +%s%1N
+
+# Wait for PID $1 to have $2 number of threads started
+# Time out after $3 tenths of a second or 5 seconds if $3 is ""
+wait_for_threads()
+{
+	tm_out=$3 ; [ -n "${tm_out}" ] || tm_out=50
+	start_time=$($tenths)
+	while [ -e "/proc/$1/task" ] ; do
+		th_cnt=$(find "/proc/$1/task" -mindepth 1 -maxdepth 1 -printf x | wc -c)
+		if [ "${th_cnt}" -ge "$2" ] ; then
+			return 0
+		fi
+		# Wait at most tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -ge $tm_out ] ; then
+			echo "PID $1 does not have $2 threads"
+			return 1
+		fi
+	done
+	return 1
+}
+
+# Wait for perf record -vvv 2>$2 with PID $1 to start by looking at file $2
+# It depends on capturing perf record debug message "perf record has started"
+# Time out after $3 tenths of a second or 5 seconds if $3 is ""
+wait_for_perf_to_start()
+{
+	tm_out=$3 ; [ -n "${tm_out}" ] || tm_out=50
+	echo "Waiting for \"perf record has started\" message"
+	start_time=$($tenths)
+	while [ -e "/proc/$1" ] ; do
+		if grep -q "perf record has started" "$2" ; then
+			echo OK
+			break
+		fi
+		# Wait at most tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -ge $tm_out ] ; then
+			echo "perf recording did not start"
+			return 1
+		fi
+	done
+	return 0
+}
+
+# Wait for process PID %1 to exit
+# Time out after $2 tenths of a second or 5 seconds if $2 is ""
+wait_for_process_to_exit()
+{
+	tm_out=$2 ; [ -n "${tm_out}" ] || tm_out=50
+	start_time=$($tenths)
+	while [ -e "/proc/$1" ] ; do
+		# Wait at most tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -ge $tm_out ] ; then
+			echo "PID $1 did not exit as expected"
+			return 1
+		fi
+	done
+	return 0
+}
+
+# Check if PID $1 is still running after $2 tenths of a second
+# or 0.3 seconds if $2 is ""
+is_running()
+{
+	tm_out=$2 ; [ -n "${tm_out}" ] || tm_out=3
+	start_time=$($tenths)
+	while [ -e "/proc/$1" ] ; do
+		# Check for at least tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -gt $tm_out ] ; then
+			return 0
+		fi
+	done
+	echo "PID $1 exited prematurely"
+	return 1
+}