diff options
Diffstat (limited to 'drivers/gpu/drm/v3d')
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_bo.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_debugfs.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_drv.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_drv.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_mmu.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_perfmon.c | 57 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_performance_counters.h | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_regs.h | 29 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_sched.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_submit.c | 19 |
10 files changed, 119 insertions, 51 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 73ab7dd31b17..bb7815599435 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -13,10 +13,6 @@ * Display engines requiring physically contiguous allocations should * look into Mesa's "renderonly" support (as used by the Mesa pl111 * driver) for an example of how to integrate with V3D. - * - * Long term, we should support evicting pages from the MMU when under - * memory pressure (thus the v3d_bo_get_pages() refcounting), but - * that's not a high priority since our systems tend to not have swap. */ #include <linux/dma-buf.h> diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 19e3ee7ac897..76816f2551c1 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -237,8 +237,8 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) if (v3d->ver >= 40) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, - V3D_SET_FIELD(cycle_count_reg, - V3D_PCTR_S0)); + V3D_SET_FIELD_VER(cycle_count_reg, + V3D_PCTR_S0, v3d->ver)); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1); } else { diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index bee51c942a56..930737a9347b 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -31,7 +31,6 @@ #define DRIVER_NAME "v3d" #define DRIVER_DESC "Broadcom V3D graphics" -#define DRIVER_DATE "20180419" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 @@ -224,6 +223,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), }; static const struct drm_driver v3d_drm_driver = { @@ -248,7 +248,6 @@ static const struct drm_driver v3d_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index de73eefff9ac..dc1cfe2e14be 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -183,6 +183,12 @@ struct v3d_dev { u32 num_allocated; u32 pages_allocated; } bo_stats; + + /* To support a performance analysis tool in user space, we require + * a single, globally configured performance monitor (perfmon) for + * all jobs. + */ + struct v3d_perfmon *global_perfmon; }; static inline struct v3d_dev * @@ -594,6 +600,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* v3d_sysfs.c */ int v3d_sysfs_init(struct device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index 0f564fd7160c..a25d25a8ae61 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -4,7 +4,7 @@ /** * DOC: Broadcom V3D MMU * - * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has + * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has * a single level of page tables for the V3D's 4GB address space to * map to AXI bus addresses, thus it could need up to 4MB of * physically contiguous memory to store the PTEs. @@ -15,14 +15,14 @@ * * To protect clients from each other, we should use the GMP to * quickly mask out (at 128kb granularity) what pages are available to - * each client. This is not yet implemented. + * each client. This is not yet implemented. */ #include "v3d_drv.h" #include "v3d_regs.h" -/* Note: All PTEs for the 1MB superpage must be filled with the - * superpage bit set. +/* Note: All PTEs for the 64KB bigpage or 1MB superpage must be filled + * with the bigpage/superpage bit set. */ #define V3D_PTE_SUPERPAGE BIT(31) #define V3D_PTE_BIGPAGE BIT(30) diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 924814cab46a..3ebda2fa46fc 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -240,17 +240,18 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) for (i = 0; i < ncounters; i++) { u32 source = i / 4; - u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0); + u32 channel = V3D_SET_FIELD_VER(perfmon->counters[i], V3D_PCTR_S0, + v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S1); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S1, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S2); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S2, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S3); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S3, v3d->ver); V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel); } @@ -312,6 +313,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) if (perfmon == v3d->active_perfmon) v3d_perfmon_stop(v3d, perfmon, false); + /* If the global perfmon is being destroyed, set it to NULL */ + cmpxchg(&v3d->global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -383,6 +387,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, { struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct drm_v3d_perfmon_destroy *req = data; + struct v3d_dev *v3d = v3d_priv->v3d; struct v3d_perfmon *perfmon; mutex_lock(&v3d_priv->perfmon.lock); @@ -392,6 +397,13 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, if (!perfmon) return -EINVAL; + /* If the active perfmon is being destroyed, stop it first */ + if (perfmon == v3d->active_perfmon) + v3d_perfmon_stop(v3d, perfmon, false); + + /* If the global perfmon is being destroyed, set it to NULL */ + cmpxchg(&v3d->global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -451,3 +463,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, return 0; } + +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_set_global *req = data; + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_perfmon *perfmon; + + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL) + return -EINVAL; + + perfmon = v3d_perfmon_find(v3d_priv, req->id); + if (!perfmon) + return -EINVAL; + + /* If the request is to clear the global performance monitor */ + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) { + if (!v3d->global_perfmon) + return -EINVAL; + + xchg(&v3d->global_perfmon, NULL); + + return 0; + } + + if (cmpxchg(&v3d->global_perfmon, NULL, perfmon)) + return -EBUSY; + + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_performance_counters.h b/drivers/gpu/drm/v3d/v3d_performance_counters.h index d919a2fc9449..2bc4cce0744a 100644 --- a/drivers/gpu/drm/v3d/v3d_performance_counters.h +++ b/drivers/gpu/drm/v3d/v3d_performance_counters.h @@ -2,11 +2,12 @@ /* * Copyright (C) 2024 Raspberry Pi */ + #ifndef V3D_PERFORMANCE_COUNTERS_H #define V3D_PERFORMANCE_COUNTERS_H -/* Holds a description of a given performance counter. The index of performance - * counter is given by the array on v3d_performance_counter.h +/* Holds a description of a given performance counter. The index of + * performance counter is given by the array on `v3d_performance_counter.c`. */ struct v3d_perf_counter_desc { /* Category of the counter */ @@ -20,15 +21,12 @@ struct v3d_perf_counter_desc { }; struct v3d_perfmon_info { - /* - * Different revisions of V3D have different total number of + /* Different revisions of V3D have different total number of * performance counters. */ unsigned int max_counters; - /* - * Array of counters valid for the platform. - */ + /* Array of counters valid for the platform. */ const struct v3d_perf_counter_desc *counters; }; diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 1b1a62ad9585..6da3c69082bd 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -15,6 +15,14 @@ fieldval & field##_MASK; \ }) +#define V3D_SET_FIELD_VER(value, field, ver) \ + ({ \ + typeof(ver) _ver = (ver); \ + u32 fieldval = (value) << field##_SHIFT(_ver); \ + WARN_ON((fieldval & ~field##_MASK(_ver)) != 0); \ + fieldval & field##_MASK(_ver); \ + }) + #define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ field##_SHIFT) @@ -354,18 +362,15 @@ #define V3D_V4_PCTR_0_SRC_28_31 0x0067c #define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \ 4 * (x)) -# define V3D_PCTR_S0_MASK V3D_MASK(6, 0) -# define V3D_V7_PCTR_S0_MASK V3D_MASK(7, 0) -# define V3D_PCTR_S0_SHIFT 0 -# define V3D_PCTR_S1_MASK V3D_MASK(14, 8) -# define V3D_V7_PCTR_S1_MASK V3D_MASK(15, 8) -# define V3D_PCTR_S1_SHIFT 8 -# define V3D_PCTR_S2_MASK V3D_MASK(22, 16) -# define V3D_V7_PCTR_S2_MASK V3D_MASK(23, 16) -# define V3D_PCTR_S2_SHIFT 16 -# define V3D_PCTR_S3_MASK V3D_MASK(30, 24) -# define V3D_V7_PCTR_S3_MASK V3D_MASK(31, 24) -# define V3D_PCTR_S3_SHIFT 24 +# define V3D_PCTR_S0_MASK(ver) (((ver) >= 71) ? V3D_MASK(7, 0) : V3D_MASK(6, 0)) +# define V3D_PCTR_S0_SHIFT(ver) 0 +# define V3D_PCTR_S1_MASK(ver) (((ver) >= 71) ? V3D_MASK(15, 8) : V3D_MASK(14, 8)) +# define V3D_PCTR_S1_SHIFT(ver) 8 +# define V3D_PCTR_S2_MASK(ver) (((ver) >= 71) ? V3D_MASK(23, 16) : V3D_MASK(22, 16)) +# define V3D_PCTR_S2_SHIFT(ver) 16 +# define V3D_PCTR_S3_MASK(ver) (((ver) >= 71) ? V3D_MASK(31, 24) : V3D_MASK(30, 24)) +# define V3D_PCTR_S3_SHIFT(ver) 24 + #define V3D_PCTR_CYCLE_COUNT(ver) ((ver >= 71) ? 0 : 32) /* Output values of the counters. */ diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 99ac4995b5a1..da08ddb01d21 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -5,16 +5,16 @@ * DOC: Broadcom V3D scheduling * * The shared DRM GPU scheduler is used to coordinate submitting jobs - * to the hardware. Each DRM fd (roughly a client process) gets its - * own scheduler entity, which will process jobs in order. The GPU - * scheduler will round-robin between clients to submit the next job. + * to the hardware. Each DRM fd (roughly a client process) gets its + * own scheduler entity, which will process jobs in order. The GPU + * scheduler will schedule the clients with a FIFO scheduling algorithm. * * For simplicity, and in order to keep latency low for interactive * jobs when bulk background jobs are queued up, we submit a new job * to the HW only when it has completed the last one, instead of - * filling up the CT[01]Q FIFOs with jobs. Similarly, we use - * drm_sched_job_add_dependency() to manage the dependency between bin and - * render, instead of having the clients submit jobs using the HW's + * filling up the CT[01]Q FIFOs with jobs. Similarly, we use + * `drm_sched_job_add_dependency()` to manage the dependency between bin + * and render, instead of having the clients submit jobs using the HW's * semaphores to interlock between them. */ @@ -120,11 +120,19 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job) static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { - if (job->perfmon != v3d->active_perfmon) + struct v3d_perfmon *perfmon = v3d->global_perfmon; + + if (!perfmon) + perfmon = job->perfmon; + + if (perfmon == v3d->active_perfmon) + return; + + if (perfmon != v3d->active_perfmon) v3d_perfmon_stop(v3d, v3d->active_perfmon, true); - if (job->perfmon && v3d->active_perfmon != job->perfmon) - v3d_perfmon_start(v3d, job->perfmon); + if (perfmon && v3d->active_perfmon != perfmon) + v3d_perfmon_start(v3d, perfmon); } static void diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index d607aa9c4ec2..4ff5de46fb22 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -11,10 +11,11 @@ #include "v3d_trace.h" /* Takes the reservation lock on all the BOs being referenced, so that - * at queue submit time we can update the reservations. + * we can attach fences and update the reservations after pushing the job + * to the queue. * * We don't lock the RCL the tile alloc/state BOs, or overflow memory - * (all of which are on exec->unref_list). They're entirely private + * (all of which are on render->unref_list). They're entirely private * to v3d, so we don't attach dma-buf fences to them. */ static int @@ -55,11 +56,11 @@ fail: * @bo_count: Number of GEM handles passed in * * The command validator needs to reference BOs by their index within - * the submitted job's BO list. This does the validation of the job's + * the submitted job's BO list. This does the validation of the job's * BO list and reference counting for the lifetime of the job. * * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at v3d_exec_cleanup() time. + * failure, because that will happen at `v3d_job_free()`. */ static int v3d_lookup_bos(struct drm_device *dev, @@ -981,6 +982,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + render->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); @@ -1196,6 +1202,11 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + job->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); if (!job->base.perfmon) { |