summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/panfrost/panfrost_drv.c
diff options
context:
space:
mode:
authorAdrián Larumbe <adrian.larumbe@collabora.com>2023-09-29 19:14:28 +0100
committerBoris Brezillon <boris.brezillon@collabora.com>2023-10-04 13:04:15 +0200
commitf11b0417eec21ade937da866363433cc0b8b396c (patch)
tree8c70bc3a8aa70859d7c083646aa9e84fc13cfa77 /drivers/gpu/drm/panfrost/panfrost_drv.c
parent51dcc15b5b0fb548c47cf5188f88d86fe79312bd (diff)
drm/panfrost: Add fdinfo support GPU load metrics
The drm-stats fdinfo tags made available to user space are drm-engine, drm-cycles, drm-max-freq and drm-curfreq, one per job slot. This deviates from standard practice in other DRM drivers, where a single set of key:value pairs is provided for the whole render engine. However, Panfrost has separate queues for fragment and vertex/tiler jobs, so a decision was made to calculate bus cycles and workload times separately. Maximum operating frequency is calculated at devfreq initialisation time. Current frequency is made available to user space because nvtop uses it when performing engine usage calculations. It is important to bear in mind that both GPU cycle and kernel time numbers provided are at best rough estimations, and always reported in excess from the actual figure because of two reasons: - Excess time because of the delay between the end of a job processing, the subsequent job IRQ and the actual time of the sample. - Time spent in the engine queue waiting for the GPU to pick up the next job. To avoid race conditions during enablement/disabling, a reference counting mechanism was introduced, and a job flag that tells us whether a given job increased the refcount. This is necessary, because user space can toggle cycle counting through a debugfs file, and a given job might have been in flight by the time cycle counting was disabled. The main goal of the debugfs cycle counter knob is letting tools like nvtop or IGT's gputop switch it at any time, to avoid power waste in case no engine usage measuring is necessary. Also add a documentation file explaining the possible values for fdinfo's engine keystrings and Panfrost-specific drm-curfreq-<keystr> pairs. Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Steven Price <steven.price@arm.com> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230929181616.2769345-3-adrian.larumbe@collabora.com
Diffstat (limited to 'drivers/gpu/drm/panfrost/panfrost_drv.c')
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c58
1 files changed, 57 insertions, 1 deletions
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index a2ab99698ca8..97e5bc4a82c8 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -20,6 +20,7 @@
#include "panfrost_job.h"
#include "panfrost_gpu.h"
#include "panfrost_perfcnt.h"
+#include "panfrost_debugfs.h"
static bool unstable_ioctls;
module_param_unsafe(unstable_ioctls, bool, 0600);
@@ -267,6 +268,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
job->requirements = args->requirements;
job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
job->mmu = file_priv->mmu;
+ job->engine_usage = &file_priv->engine_usage;
slot = panfrost_job_get_slot(job);
@@ -523,7 +525,56 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW),
};
-DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
+static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
+ struct panfrost_file_priv *panfrost_priv,
+ struct drm_printer *p)
+{
+ int i;
+
+ /*
+ * IMPORTANT NOTE: drm-cycles and drm-engine measurements are not
+ * accurate, as they only provide a rough estimation of the number of
+ * GPU cycles and CPU time spent in a given context. This is due to two
+ * different factors:
+ * - Firstly, we must consider the time the CPU and then the kernel
+ * takes to process the GPU interrupt, which means additional time and
+ * GPU cycles will be added in excess to the real figure.
+ * - Secondly, the pipelining done by the Job Manager (2 job slots per
+ * engine) implies there is no way to know exactly how much time each
+ * job spent on the GPU.
+ */
+
+ static const char * const engine_names[] = {
+ "fragment", "vertex-tiler", "compute-only"
+ };
+
+ BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
+
+ for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
+ drm_printf(p, "drm-engine-%s:\t%llu ns\n",
+ engine_names[i], panfrost_priv->engine_usage.elapsed_ns[i]);
+ drm_printf(p, "drm-cycles-%s:\t%llu\n",
+ engine_names[i], panfrost_priv->engine_usage.cycles[i]);
+ drm_printf(p, "drm-maxfreq-%s:\t%lu Hz\n",
+ engine_names[i], pfdev->pfdevfreq.fast_rate);
+ drm_printf(p, "drm-curfreq-%s:\t%lu Hz\n",
+ engine_names[i], pfdev->pfdevfreq.current_frequency);
+ }
+}
+
+static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+ struct drm_device *dev = file->minor->dev;
+ struct panfrost_device *pfdev = dev->dev_private;
+
+ panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p);
+}
+
+static const struct file_operations panfrost_drm_driver_fops = {
+ .owner = THIS_MODULE,
+ DRM_GEM_FOPS,
+ .show_fdinfo = drm_show_fdinfo,
+};
/*
* Panfrost driver version:
@@ -535,6 +586,7 @@ static const struct drm_driver panfrost_drm_driver = {
.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
.open = panfrost_open,
.postclose = panfrost_postclose,
+ .show_fdinfo = panfrost_show_fdinfo,
.ioctls = panfrost_drm_driver_ioctls,
.num_ioctls = ARRAY_SIZE(panfrost_drm_driver_ioctls),
.fops = &panfrost_drm_driver_fops,
@@ -546,6 +598,10 @@ static const struct drm_driver panfrost_drm_driver = {
.gem_create_object = panfrost_gem_create_object,
.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
+
+#ifdef CONFIG_DEBUG_FS
+ .debugfs_init = panfrost_debugfs_init,
+#endif
};
static int panfrost_probe(struct platform_device *pdev)