summaryrefslogtreecommitdiff
path: root/tools/perf/util/cpumap.c
diff options
context:
space:
mode:
authorIan Rogers <irogers@google.com>2022-06-14 07:33:51 -0700
committerArnaldo Carvalho de Melo <acme@redhat.com>2022-08-19 15:30:28 -0300
commitb2f10cd4e805eb647773df273eb1a6ff9e6ea45d (patch)
tree0f1d827cedd9d7f752601be9dbffb82d3c86f220 /tools/perf/util/cpumap.c
parent28526478ccae88680645405f4e849d9ed4fbce7f (diff)
perf cpumap: Fix alignment for masks in event encoding
A mask encoding of a cpu map is laid out as: u16 nr u16 long_size unsigned long mask[]; However, the mask may be 8-byte aligned meaning there is a 4-byte pad after long_size. This means 32-bit and 64-bit builds see the mask as being at different offsets. On top of this the structure is in the byte data[] encoded as: u16 type char data[] This means the mask's struct isn't the required 4 or 8 byte aligned, but is offset by 2. Consequently the long reads and writes are causing undefined behavior as the alignment is broken. Fix the mask struct by creating explicit 32 and 64-bit variants, use a union to avoid data[] and casts; the struct must be packed so the layout matches the existing perf.data layout. Taking an address of a member of a packed struct breaks alignment so pass the packed perf_record_cpu_map_data to functions, so they can access variables with the right alignment. As the 64-bit version has 4 bytes of padding, optimizing writing to only write the 32-bit version. Committer notes: Disable warnings about 'packed' that break the build in some arches like riscv64, but just around that specific struct. Signed-off-by: Ian Rogers <irogers@google.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Colin Ian King <colin.king@intel.com> Cc: Dave Marchevsky <davemarchevsky@fb.com> Cc: German Gomez <german.gomez@arm.com> Cc: Gustavo A. R. Silva <gustavoars@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Kees Kook <keescook@chromium.org> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Riccardo Mancini <rickyman7@gmail.com> Cc: Song Liu <songliubraving@fb.com> Cc: Stephane Eranian <eranian@google.com> Link: https://lore.kernel.org/r/20220614143353.1559597-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/cpumap.c')
-rw-r--r--tools/perf/util/cpumap.c80
1 files changed, 64 insertions, 16 deletions
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 12b2243222b0..ae43fb88f444 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -22,54 +22,102 @@ static int max_node_num;
*/
static int *cpunode_map;
-static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
+bool perf_record_cpu_map_data__test_bit(int i,
+ const struct perf_record_cpu_map_data *data)
+{
+ int bit_word32 = i / 32;
+ __u32 bit_mask32 = 1U << (i & 31);
+ int bit_word64 = i / 64;
+ __u64 bit_mask64 = ((__u64)1) << (i & 63);
+
+ return (data->mask32_data.long_size == 4)
+ ? (bit_word32 < data->mask32_data.nr) &&
+ (data->mask32_data.mask[bit_word32] & bit_mask32) != 0
+ : (bit_word64 < data->mask64_data.nr) &&
+ (data->mask64_data.mask[bit_word64] & bit_mask64) != 0;
+}
+
+/* Read ith mask value from data into the given 64-bit sized bitmap */
+static void perf_record_cpu_map_data__read_one_mask(const struct perf_record_cpu_map_data *data,
+ int i, unsigned long *bitmap)
+{
+#if __SIZEOF_LONG__ == 8
+ if (data->mask32_data.long_size == 4)
+ bitmap[0] = data->mask32_data.mask[i];
+ else
+ bitmap[0] = data->mask64_data.mask[i];
+#else
+ if (data->mask32_data.long_size == 4) {
+ bitmap[0] = data->mask32_data.mask[i];
+ bitmap[1] = 0;
+ } else {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ bitmap[0] = (unsigned long)(data->mask64_data.mask[i] >> 32);
+ bitmap[1] = (unsigned long)data->mask64_data.mask[i];
+#else
+ bitmap[0] = (unsigned long)data->mask64_data.mask[i];
+ bitmap[1] = (unsigned long)(data->mask64_data.mask[i] >> 32);
+#endif
+ }
+#endif
+}
+static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_map_data *data)
{
struct perf_cpu_map *map;
- map = perf_cpu_map__empty_new(cpus->nr);
+ map = perf_cpu_map__empty_new(data->cpus_data.nr);
if (map) {
unsigned i;
- for (i = 0; i < cpus->nr; i++) {
+ for (i = 0; i < data->cpus_data.nr; i++) {
/*
* Special treatment for -1, which is not real cpu number,
* and we need to use (int) -1 to initialize map[i],
* otherwise it would become 65535.
*/
- if (cpus->cpu[i] == (u16) -1)
+ if (data->cpus_data.cpu[i] == (u16) -1)
map->map[i].cpu = -1;
else
- map->map[i].cpu = (int) cpus->cpu[i];
+ map->map[i].cpu = (int) data->cpus_data.cpu[i];
}
}
return map;
}
-static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map *mask)
+static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_data *data)
{
+ DECLARE_BITMAP(local_copy, 64);
+ int weight = 0, mask_nr = data->mask32_data.nr;
struct perf_cpu_map *map;
- int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE;
- nr = bitmap_weight(mask->mask, nbits);
+ for (int i = 0; i < mask_nr; i++) {
+ perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
+ weight += bitmap_weight(local_copy, 64);
+ }
+
+ map = perf_cpu_map__empty_new(weight);
+ if (!map)
+ return NULL;
- map = perf_cpu_map__empty_new(nr);
- if (map) {
- int cpu, i = 0;
+ for (int i = 0, j = 0; i < mask_nr; i++) {
+ int cpus_per_i = (i * data->mask32_data.long_size * BITS_PER_BYTE);
+ int cpu;
- for_each_set_bit(cpu, mask->mask, nbits)
- map->map[i++].cpu = cpu;
+ perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
+ for_each_set_bit(cpu, local_copy, 64)
+ map->map[j++].cpu = cpu + cpus_per_i;
}
return map;
}
-struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data)
+struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data)
{
if (data->type == PERF_CPU_MAP__CPUS)
- return cpu_map__from_entries((struct cpu_map_entries *)data->data);
+ return cpu_map__from_entries(data);
else
- return cpu_map__from_mask((struct perf_record_record_cpu_map *)data->data);
+ return cpu_map__from_mask(data);
}
size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp)