summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/vc4
diff options
context:
space:
mode:
authorSean Paul <seanpaul@chromium.org>2018-04-16 10:47:13 -0400
committerSean Paul <seanpaul@chromium.org>2018-04-16 10:47:13 -0400
commit8089f9f5a32938ddefb1767b8ee14bb7996e5e2f (patch)
treec6c8924fda51c7f54bebffa63a41ae15954995bf /drivers/gpu/drm/vc4
parentc0db1b677e1d584fab5d7ac76a32e1c0157542e0 (diff)
parenta10beabba213924d876f2d10ca9351aeab93f58a (diff)
Merge airlied/drm-next into drm-misc-fixes
Fast forwarding -fixes for 4.17. Signed-off-by: Sean Paul <seanpaul@chromium.org>
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r--drivers/gpu/drm/vc4/Makefile1
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c25
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c26
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h128
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c48
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_irq.c40
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c1
-rw-r--r--drivers/gpu/drm/vc4/vc4_perfmon.c188
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c125
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h36
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c64
-rw-r--r--drivers/gpu/drm/vc4/vc4_validate.c2
13 files changed, 544 insertions, 142 deletions
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index f5500df51686..4a3a868235f8 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -15,6 +15,7 @@ vc4-y := \
vc4_vec.o \
vc4_hvs.o \
vc4_irq.o \
+ vc4_perfmon.o \
vc4_plane.o \
vc4_render_cl.o \
vc4_trace_points.o \
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index ce1e3b9e14c9..bf4667481935 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -643,9 +643,12 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
{
struct drm_device *dev = crtc->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
struct drm_plane *plane;
+ struct vc4_plane_state *vc4_plane_state;
bool debug_dump_regs = false;
+ bool enable_bg_fill = false;
u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
u32 __iomem *dlist_next = dlist_start;
@@ -656,6 +659,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
/* Copy all the active planes' dlist contents to the hardware dlist. */
drm_atomic_crtc_for_each_plane(plane, crtc) {
+ /* Is this the first active plane? */
+ if (dlist_next == dlist_start) {
+ /* We need to enable background fill when a plane
+ * could be alpha blending from the background, i.e.
+ * where no other plane is underneath. It suffices to
+ * consider the first active plane here since we set
+ * needs_bg_fill such that either the first plane
+ * already needs it or all planes on top blend from
+ * the first or a lower plane.
+ */
+ vc4_plane_state = to_vc4_plane_state(plane->state);
+ enable_bg_fill = vc4_plane_state->needs_bg_fill;
+ }
+
dlist_next += vc4_plane_write_dlist(plane, dlist_next);
}
@@ -664,6 +681,14 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
+ if (enable_bg_fill)
+ /* This sets a black background color fill, as is the case
+ * with other DRM drivers.
+ */
+ HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel),
+ HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel)) |
+ SCALER_DISPBKGND_FILL);
+
/* Only update DISPLIST if the CRTC was already running and is not
* being disabled.
* vc4_crtc_enable() takes care of updating the dlist just after
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index ceb385fd69c5..94b99c90425a 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -101,6 +101,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
case DRM_VC4_PARAM_SUPPORTS_MADVISE:
+ case DRM_VC4_PARAM_SUPPORTS_PERFMON:
args->value = true;
break;
default:
@@ -111,6 +112,26 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
return 0;
}
+static int vc4_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct vc4_file *vc4file;
+
+ vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL);
+ if (!vc4file)
+ return -ENOMEM;
+
+ vc4_perfmon_open_file(vc4file);
+ file->driver_priv = vc4file;
+ return 0;
+}
+
+static void vc4_close(struct drm_device *dev, struct drm_file *file)
+{
+ struct vc4_file *vc4file = file->driver_priv;
+
+ vc4_perfmon_close_file(vc4file);
+}
+
static const struct vm_operations_struct vc4_vm_ops = {
.fault = vc4_fault,
.open = drm_gem_vm_open,
@@ -143,6 +164,9 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_PERFMON_CREATE, vc4_perfmon_create_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_PERFMON_DESTROY, vc4_perfmon_destroy_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
};
static struct drm_driver vc4_drm_driver = {
@@ -153,6 +177,8 @@ static struct drm_driver vc4_drm_driver = {
DRIVER_RENDER |
DRIVER_PRIME),
.lastclose = drm_fb_helper_lastclose,
+ .open = vc4_open,
+ .postclose = vc4_close,
.irq_handler = vc4_irq,
.irq_preinstall = vc4_irq_preinstall,
.irq_postinstall = vc4_irq_postinstall,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 3af22936d9b3..1b4cd1fabf56 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -11,6 +11,8 @@
#include <drm/drm_encoder.h>
#include <drm/drm_gem_cma_helper.h>
+#include "uapi/drm/vc4_drm.h"
+
/* Don't forget to update vc4_bo.c: bo_type_names[] when adding to
* this.
*/
@@ -29,6 +31,36 @@ enum vc4_kernel_bo_type {
VC4_BO_TYPE_COUNT
};
+/* Performance monitor object. The perform lifetime is controlled by userspace
+ * using perfmon related ioctls. A perfmon can be attached to a submit_cl
+ * request, and when this is the case, HW perf counters will be activated just
+ * before the submit_cl is submitted to the GPU and disabled when the job is
+ * done. This way, only events related to a specific job will be counted.
+ */
+struct vc4_perfmon {
+ /* Tracks the number of users of the perfmon, when this counter reaches
+ * zero the perfmon is destroyed.
+ */
+ refcount_t refcnt;
+
+ /* Number of counters activated in this perfmon instance
+ * (should be less than DRM_VC4_MAX_PERF_COUNTERS).
+ */
+ u8 ncounters;
+
+ /* Events counted by the HW perf counters. */
+ u8 events[DRM_VC4_MAX_PERF_COUNTERS];
+
+ /* Storage for counter values. Counters are incremented by the HW
+ * perf counter values every time the perfmon is attached to a GPU job.
+ * This way, perfmon users don't have to retrieve the results after
+ * each job if they want to track events covering several submissions.
+ * Note that counter values can't be reset, but you can fake a reset by
+ * destroying the perfmon and creating a new one.
+ */
+ u64 counters[0];
+};
+
struct vc4_dev {
struct drm_device *dev;
@@ -121,6 +153,11 @@ struct vc4_dev {
wait_queue_head_t job_wait_queue;
struct work_struct job_done_work;
+ /* Used to track the active perfmon if any. Access to this field is
+ * protected by job_lock.
+ */
+ struct vc4_perfmon *active_perfmon;
+
/* List of struct vc4_seqno_cb for callbacks to be made from a
* workqueue when the given seqno is passed.
*/
@@ -273,6 +310,66 @@ to_vc4_plane(struct drm_plane *plane)
return (struct vc4_plane *)plane;
}
+enum vc4_scaling_mode {
+ VC4_SCALING_NONE,
+ VC4_SCALING_TPZ,
+ VC4_SCALING_PPF,
+};
+
+struct vc4_plane_state {
+ struct drm_plane_state base;
+ /* System memory copy of the display list for this element, computed
+ * at atomic_check time.
+ */
+ u32 *dlist;
+ u32 dlist_size; /* Number of dwords allocated for the display list */
+ u32 dlist_count; /* Number of used dwords in the display list. */
+
+ /* Offset in the dlist to various words, for pageflip or
+ * cursor updates.
+ */
+ u32 pos0_offset;
+ u32 pos2_offset;
+ u32 ptr0_offset;
+
+ /* Offset where the plane's dlist was last stored in the
+ * hardware at vc4_crtc_atomic_flush() time.
+ */
+ u32 __iomem *hw_dlist;
+
+ /* Clipped coordinates of the plane on the display. */
+ int crtc_x, crtc_y, crtc_w, crtc_h;
+ /* Clipped area being scanned from in the FB. */
+ u32 src_x, src_y;
+
+ u32 src_w[2], src_h[2];
+
+ /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
+ enum vc4_scaling_mode x_scaling[2], y_scaling[2];
+ bool is_unity;
+ bool is_yuv;
+
+ /* Offset to start scanning out from the start of the plane's
+ * BO.
+ */
+ u32 offsets[3];
+
+ /* Our allocation in LBM for temporary storage during scaling. */
+ struct drm_mm_node lbm;
+
+ /* Set when the plane has per-pixel alpha content or does not cover
+ * the entire screen. This is a hint to the CRTC that it might need
+ * to enable background color fill.
+ */
+ bool needs_bg_fill;
+};
+
+static inline struct vc4_plane_state *
+to_vc4_plane_state(struct drm_plane_state *state)
+{
+ return (struct vc4_plane_state *)state;
+}
+
enum vc4_encoder_type {
VC4_ENCODER_TYPE_NONE,
VC4_ENCODER_TYPE_HDMI,
@@ -406,6 +503,21 @@ struct vc4_exec_info {
void *uniforms_v;
uint32_t uniforms_p;
uint32_t uniforms_size;
+
+ /* Pointer to a performance monitor object if the user requested it,
+ * NULL otherwise.
+ */
+ struct vc4_perfmon *perfmon;
+};
+
+/* Per-open file private data. Any driver-specific resource that has to be
+ * released when the DRM file is closed should be placed here.
+ */
+struct vc4_file {
+ struct {
+ struct idr idr;
+ struct mutex lock;
+ } perfmon;
};
static inline struct vc4_exec_info *
@@ -646,3 +758,19 @@ bool vc4_check_tex_size(struct vc4_exec_info *exec,
/* vc4_validate_shader.c */
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
+
+/* vc4_perfmon.c */
+void vc4_perfmon_get(struct vc4_perfmon *perfmon);
+void vc4_perfmon_put(struct vc4_perfmon *perfmon);
+void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon);
+void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
+ bool capture);
+struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id);
+void vc4_perfmon_open_file(struct vc4_file *vc4file);
+void vc4_perfmon_close_file(struct vc4_file *vc4file);
+int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index c94cce96544c..2107b0daf8ef 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -467,14 +467,30 @@ again:
vc4_flush_caches(dev);
+ /* Only start the perfmon if it was not already started by a previous
+ * job.
+ */
+ if (exec->perfmon && vc4->active_perfmon != exec->perfmon)
+ vc4_perfmon_start(vc4, exec->perfmon);
+
/* Either put the job in the binner if it uses the binner, or
* immediately move it to the to-be-rendered queue.
*/
if (exec->ct0ca != exec->ct0ea) {
submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
} else {
+ struct vc4_exec_info *next;
+
vc4_move_job_to_render(dev, exec);
- goto again;
+ next = vc4_first_bin_job(vc4);
+
+ /* We can't start the next bin job if the previous job had a
+ * different perfmon instance attached to it. The same goes
+ * if one of them had a perfmon attached to it and the other
+ * one doesn't.
+ */
+ if (next && next->perfmon == exec->perfmon)
+ goto again;
}
}
@@ -642,6 +658,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
struct ww_acquire_ctx *acquire_ctx)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_exec_info *renderjob;
uint64_t seqno;
unsigned long irqflags;
struct vc4_fence *fence;
@@ -667,11 +684,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
list_add_tail(&exec->head, &vc4->bin_job_list);
- /* If no job was executing, kick ours off. Otherwise, it'll
- * get started when the previous job's flush done interrupt
- * occurs.
+ /* If no bin job was executing and if the render job (if any) has the
+ * same perfmon as our job attached to it (or if both jobs don't have
+ * perfmon activated), then kick ours off. Otherwise, it'll get
+ * started when the previous job's flush/render done interrupt occurs.
*/
- if (vc4_first_bin_job(vc4) == exec) {
+ renderjob = vc4_first_render_job(vc4);
+ if (vc4_first_bin_job(vc4) == exec &&
+ (!renderjob || renderjob->perfmon == exec->perfmon)) {
vc4_submit_next_bin_job(dev);
vc4_queue_hangcheck(dev);
}
@@ -936,6 +956,9 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
vc4->bin_alloc_used &= ~exec->bin_slots;
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ /* Release the reference we had on the perf monitor. */
+ vc4_perfmon_put(exec->perfmon);
+
mutex_lock(&vc4->power_lock);
if (--vc4->power_refcount == 0) {
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
@@ -1088,6 +1111,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_file *vc4file = file_priv->driver_priv;
struct drm_vc4_submit_cl *args = data;
struct vc4_exec_info *exec;
struct ww_acquire_ctx acquire_ctx;
@@ -1101,6 +1125,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
+ if (args->pad2 != 0) {
+ DRM_DEBUG("->pad2 must be set to zero\n");
+ return -EINVAL;
+ }
+
exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
if (!exec) {
DRM_ERROR("malloc failure on exec struct\n");
@@ -1126,6 +1155,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;
+ if (args->perfmonid) {
+ exec->perfmon = vc4_perfmon_find(vc4file,
+ args->perfmonid);
+ if (!exec->perfmon) {
+ ret = -ENOENT;
+ goto fail;
+ }
+ }
+
if (exec->args->bin_cl_size != 0) {
ret = vc4_get_bcl(dev, exec);
if (ret)
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 984501e3f0b0..1a6db291d48b 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -681,7 +681,7 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
drift & ~VC4_HDMI_FIFO_CTL_RECENTER);
HDMI_WRITE(VC4_HDMI_FIFO_CTL,
drift | VC4_HDMI_FIFO_CTL_RECENTER);
- udelay(1000);
+ usleep_range(1000, 1100);
HDMI_WRITE(VC4_HDMI_FIFO_CTL,
drift & ~VC4_HDMI_FIFO_CTL_RECENTER);
HDMI_WRITE(VC4_HDMI_FIFO_CTL,
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 3dd62d75f531..4cd2ccfe15f4 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -104,13 +104,20 @@ static void
vc4_irq_finish_bin_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
- struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
+ struct vc4_exec_info *next, *exec = vc4_first_bin_job(vc4);
if (!exec)
return;
vc4_move_job_to_render(dev, exec);
- vc4_submit_next_bin_job(dev);
+ next = vc4_first_bin_job(vc4);
+
+ /* Only submit the next job in the bin list if it matches the perfmon
+ * attached to the one that just finished (or if both jobs don't have
+ * perfmon attached to them).
+ */
+ if (next && next->perfmon == exec->perfmon)
+ vc4_submit_next_bin_job(dev);
}
static void
@@ -122,6 +129,10 @@ vc4_cancel_bin_job(struct drm_device *dev)
if (!exec)
return;
+ /* Stop the perfmon so that the next bin job can be started. */
+ if (exec->perfmon)
+ vc4_perfmon_stop(vc4, exec->perfmon, false);
+
list_move_tail(&exec->head, &vc4->bin_job_list);
vc4_submit_next_bin_job(dev);
}
@@ -131,18 +142,41 @@ vc4_irq_finish_render_job(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec = vc4_first_render_job(vc4);
+ struct vc4_exec_info *nextbin, *nextrender;
if (!exec)
return;
vc4->finished_seqno++;
list_move_tail(&exec->head, &vc4->job_done_list);
+
+ nextbin = vc4_first_bin_job(vc4);
+ nextrender = vc4_first_render_job(vc4);
+
+ /* Only stop the perfmon if following jobs in the queue don't expect it
+ * to be enabled.
+ */
+ if (exec->perfmon && !nextrender &&
+ (!nextbin || nextbin->perfmon != exec->perfmon))
+ vc4_perfmon_stop(vc4, exec->perfmon, true);
+
+ /* If there's a render job waiting, start it. If this is not the case
+ * we may have to unblock the binner if it's been stalled because of
+ * perfmon (this can be checked by comparing the perfmon attached to
+ * the finished renderjob to the one attached to the next bin job: if
+ * they don't match, this means the binner is stalled and should be
+ * restarted).
+ */
+ if (nextrender)
+ vc4_submit_next_render_job(dev);
+ else if (nextbin && nextbin->perfmon != exec->perfmon)
+ vc4_submit_next_bin_job(dev);
+
if (exec->fence) {
dma_fence_signal_locked(exec->fence);
dma_fence_put(exec->fence);
exec->fence = NULL;
}
- vc4_submit_next_render_job(dev);
wake_up_all(&vc4->job_wait_queue);
schedule_work(&vc4->job_done_work);
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 4256f294c346..ba60153dddb5 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -215,6 +215,7 @@ int vc4_kms_load(struct drm_device *dev)
dev->mode_config.funcs = &vc4_mode_funcs;
dev->mode_config.preferred_depth = 24;
dev->mode_config.async_page_flip = true;
+ dev->mode_config.allow_fb_modifiers = true;
drm_mode_config_reset(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c
new file mode 100644
index 000000000000..437e7a27f21d
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Broadcom
+ */
+
+/**
+ * DOC: VC4 V3D performance monitor module
+ *
+ * The V3D block provides 16 hardware counters which can count various events.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define VC4_PERFMONID_MIN 1
+#define VC4_PERFMONID_MAX U32_MAX
+
+void vc4_perfmon_get(struct vc4_perfmon *perfmon)
+{
+ if (perfmon)
+ refcount_inc(&perfmon->refcnt);
+}
+
+void vc4_perfmon_put(struct vc4_perfmon *perfmon)
+{
+ if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
+ kfree(perfmon);
+}
+
+void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon)
+{
+ unsigned int i;
+ u32 mask;
+
+ if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
+ return;
+
+ for (i = 0; i < perfmon->ncounters; i++)
+ V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]);
+
+ mask = GENMASK(perfmon->ncounters - 1, 0);
+ V3D_WRITE(V3D_PCTRC, mask);
+ V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask);
+ vc4->active_perfmon = perfmon;
+}
+
+void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
+ bool capture)
+{
+ unsigned int i;
+
+ if (WARN_ON_ONCE(!vc4->active_perfmon ||
+ perfmon != vc4->active_perfmon))
+ return;
+
+ if (capture) {
+ for (i = 0; i < perfmon->ncounters; i++)
+ perfmon->counters[i] += V3D_READ(V3D_PCTR(i));
+ }
+
+ V3D_WRITE(V3D_PCTRE, 0);
+ vc4->active_perfmon = NULL;
+}
+
+struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
+{
+ struct vc4_perfmon *perfmon;
+
+ mutex_lock(&vc4file->perfmon.lock);
+ perfmon = idr_find(&vc4file->perfmon.idr, id);
+ vc4_perfmon_get(perfmon);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ return perfmon;
+}
+
+void vc4_perfmon_open_file(struct vc4_file *vc4file)
+{
+ mutex_init(&vc4file->perfmon.lock);
+ idr_init(&vc4file->perfmon.idr);
+}
+
+static int vc4_perfmon_idr_del(int id, void *elem, void *data)
+{
+ struct vc4_perfmon *perfmon = elem;
+
+ vc4_perfmon_put(perfmon);
+
+ return 0;
+}
+
+void vc4_perfmon_close_file(struct vc4_file *vc4file)
+{
+ mutex_lock(&vc4file->perfmon.lock);
+ idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
+ idr_destroy(&vc4file->perfmon.idr);
+ mutex_unlock(&vc4file->perfmon.lock);
+}
+
+int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_file *vc4file = file_priv->driver_priv;
+ struct drm_vc4_perfmon_create *req = data;
+ struct vc4_perfmon *perfmon;
+ unsigned int i;
+ int ret;
+
+ /* Number of monitored counters cannot exceed HW limits. */
+ if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
+ !req->ncounters)
+ return -EINVAL;
+
+ /* Make sure all events are valid. */
+ for (i = 0; i < req->ncounters; i++) {
+ if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS)
+ return -EINVAL;
+ }
+
+ perfmon = kzalloc(sizeof(*perfmon) + (req->ncounters * sizeof(u64)),
+ GFP_KERNEL);
+ if (!perfmon)
+ return -ENOMEM;
+
+ for (i = 0; i < req->ncounters; i++)
+ perfmon->events[i] = req->events[i];
+
+ perfmon->ncounters = req->ncounters;
+
+ refcount_set(&perfmon->refcnt, 1);
+
+ mutex_lock(&vc4file->perfmon.lock);
+ ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN,
+ VC4_PERFMONID_MAX, GFP_KERNEL);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ if (ret < 0) {
+ kfree(perfmon);
+ return ret;
+ }
+
+ req->id = ret;
+ return 0;
+}
+
+int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_file *vc4file = file_priv->driver_priv;
+ struct drm_vc4_perfmon_destroy *req = data;
+ struct vc4_perfmon *perfmon;
+
+ mutex_lock(&vc4file->perfmon.lock);
+ perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ vc4_perfmon_put(perfmon);
+ return 0;
+}
+
+int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct vc4_file *vc4file = file_priv->driver_priv;
+ struct drm_vc4_perfmon_get_values *req = data;
+ struct vc4_perfmon *perfmon;
+ int ret;
+
+ mutex_lock(&vc4file->perfmon.lock);
+ perfmon = idr_find(&vc4file->perfmon.idr, req->id);
+ vc4_perfmon_get(perfmon);
+ mutex_unlock(&vc4file->perfmon.lock);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters,
+ perfmon->ncounters * sizeof(u64)))
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ vc4_perfmon_put(perfmon);
+ return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 515f97997624..ce39390be389 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -27,105 +27,50 @@
#include "vc4_drv.h"
#include "vc4_regs.h"
-enum vc4_scaling_mode {
- VC4_SCALING_NONE,
- VC4_SCALING_TPZ,
- VC4_SCALING_PPF,
-};
-
-struct vc4_plane_state {
- struct drm_plane_state base;
- /* System memory copy of the display list for this element, computed
- * at atomic_check time.
- */
- u32 *dlist;
- u32 dlist_size; /* Number of dwords allocated for the display list */
- u32 dlist_count; /* Number of used dwords in the display list. */
-
- /* Offset in the dlist to various words, for pageflip or
- * cursor updates.
- */
- u32 pos0_offset;
- u32 pos2_offset;
- u32 ptr0_offset;
-
- /* Offset where the plane's dlist was last stored in the
- * hardware at vc4_crtc_atomic_flush() time.
- */
- u32 __iomem *hw_dlist;
-
- /* Clipped coordinates of the plane on the display. */
- int crtc_x, crtc_y, crtc_w, crtc_h;
- /* Clipped area being scanned from in the FB. */
- u32 src_x, src_y;
-
- u32 src_w[2], src_h[2];
-
- /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
- enum vc4_scaling_mode x_scaling[2], y_scaling[2];
- bool is_unity;
- bool is_yuv;
-
- /* Offset to start scanning out from the start of the plane's
- * BO.
- */
- u32 offsets[3];
-
- /* Our allocation in LBM for temporary storage during scaling. */
- struct drm_mm_node lbm;
-};
-
-static inline struct vc4_plane_state *
-to_vc4_plane_state(struct drm_plane_state *state)
-{
- return (struct vc4_plane_state *)state;
-}
-
static const struct hvs_format {
u32 drm; /* DRM_FORMAT_* */
u32 hvs; /* HVS_FORMAT_* */
u32 pixel_order;
- bool has_alpha;
} hvs_formats[] = {
{
.drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
- .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
},
{
.drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
- .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
},
{
.drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
- .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = true,
+ .pixel_order = HVS_PIXEL_ORDER_ARGB,
},
{
.drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
- .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = false,
+ .pixel_order = HVS_PIXEL_ORDER_ARGB,
},
{
.drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
- .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false,
+ .pixel_order = HVS_PIXEL_ORDER_XRGB,
},
{
.drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565,
- .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false,
+ .pixel_order = HVS_PIXEL_ORDER_XBGR,
},
{
.drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
- .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
},
{
.drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
- .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
+ .pixel_order = HVS_PIXEL_ORDER_ABGR,
},
{
.drm = DRM_FORMAT_RGB888, .hvs = HVS_PIXEL_FORMAT_RGB888,
- .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false,
+ .pixel_order = HVS_PIXEL_ORDER_XRGB,
},
{
.drm = DRM_FORMAT_BGR888, .hvs = HVS_PIXEL_FORMAT_RGB888,
- .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false,
+ .pixel_order = HVS_PIXEL_ORDER_XBGR,
},
{
.drm = DRM_FORMAT_YUV422,
@@ -522,6 +467,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
u32 ctl0_offset = vc4_state->dlist_count;
const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
int num_planes = drm_format_num_planes(format->drm);
+ bool covers_screen;
u32 scl0, scl1, pitch0;
u32 lbm_size, tiling;
unsigned long irqflags;
@@ -619,13 +565,14 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
SCALER_POS1_SCL_HEIGHT));
}
- /* Position Word 2: Source Image Size, Alpha Mode */
+ /* Position Word 2: Source Image Size, Alpha */
vc4_state->pos2_offset = vc4_state->dlist_count;
vc4_dlist_write(vc4_state,
- VC4_SET_FIELD(format->has_alpha ?
+ VC4_SET_FIELD(fb->format->has_alpha ?
SCALER_POS2_ALPHA_MODE_PIPELINE :
SCALER_POS2_ALPHA_MODE_FIXED,
SCALER_POS2_ALPHA_MODE) |
+ (fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) |
VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
@@ -701,6 +648,16 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
vc4_state->dlist[ctl0_offset] |=
VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
+ /* crtc_* are already clipped coordinates. */
+ covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
+ vc4_state->crtc_w == state->crtc->mode.hdisplay &&
+ vc4_state->crtc_h == state->crtc->mode.vdisplay;
+ /* Background fill might be necessary when the plane has per-pixel
+ * alpha content and blends from the background or does not cover
+ * the entire screen.
+ */
+ vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen;
+
return 0;
}
@@ -907,6 +864,32 @@ out:
ctx);
}
+static bool vc4_format_mod_supported(struct drm_plane *plane,
+ uint32_t format,
+ uint64_t modifier)
+{
+ /* Support T_TILING for RGB formats only. */
+ switch (format) {
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ case DRM_FORMAT_ABGR8888:
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_RGB565:
+ case DRM_FORMAT_BGR565:
+ case DRM_FORMAT_ARGB1555:
+ case DRM_FORMAT_XRGB1555:
+ return true;
+ case DRM_FORMAT_YUV422:
+ case DRM_FORMAT_YVU422:
+ case DRM_FORMAT_YUV420:
+ case DRM_FORMAT_YVU420:
+ case DRM_FORMAT_NV12:
+ case DRM_FORMAT_NV16:
+ default:
+ return (modifier == DRM_FORMAT_MOD_LINEAR);
+ }
+}
+
static const struct drm_plane_funcs vc4_plane_funcs = {
.update_plane = vc4_update_plane,
.disable_plane = drm_atomic_helper_disable_plane,
@@ -915,6 +898,7 @@ static const struct drm_plane_funcs vc4_plane_funcs = {
.reset = vc4_plane_reset,
.atomic_duplicate_state = vc4_plane_duplicate_state,
.atomic_destroy_state = vc4_plane_destroy_state,
+ .format_mod_supported = vc4_format_mod_supported,
};
struct drm_plane *vc4_plane_init(struct drm_device *dev,
@@ -926,6 +910,11 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
u32 num_formats = 0;
int ret = 0;
unsigned i;
+ static const uint64_t modifiers[] = {
+ DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_INVALID
+ };
vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
GFP_KERNEL);
@@ -946,7 +935,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
ret = drm_universal_plane_init(dev, plane, 0,
&vc4_plane_funcs,
formats, num_formats,
- NULL, type, NULL);
+ modifiers, type, NULL);
drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 55677bd50f66..a141496104a6 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -122,38 +122,9 @@
#define V3D_VPMBASE 0x00504
#define V3D_PCTRC 0x00670
#define V3D_PCTRE 0x00674
-#define V3D_PCTR0 0x00680
-#define V3D_PCTRS0 0x00684
-#define V3D_PCTR1 0x00688
-#define V3D_PCTRS1 0x0068c
-#define V3D_PCTR2 0x00690
-#define V3D_PCTRS2 0x00694
-#define V3D_PCTR3 0x00698
-#define V3D_PCTRS3 0x0069c
-#define V3D_PCTR4 0x006a0
-#define V3D_PCTRS4 0x006a4
-#define V3D_PCTR5 0x006a8
-#define V3D_PCTRS5 0x006ac
-#define V3D_PCTR6 0x006b0
-#define V3D_PCTRS6 0x006b4
-#define V3D_PCTR7 0x006b8
-#define V3D_PCTRS7 0x006bc
-#define V3D_PCTR8 0x006c0
-#define V3D_PCTRS8 0x006c4
-#define V3D_PCTR9 0x006c8
-#define V3D_PCTRS9 0x006cc
-#define V3D_PCTR10 0x006d0
-#define V3D_PCTRS10 0x006d4
-#define V3D_PCTR11 0x006d8
-#define V3D_PCTRS11 0x006dc
-#define V3D_PCTR12 0x006e0
-#define V3D_PCTRS12 0x006e4
-#define V3D_PCTR13 0x006e8
-#define V3D_PCTRS13 0x006ec
-#define V3D_PCTR14 0x006f0
-#define V3D_PCTRS14 0x006f4
-#define V3D_PCTR15 0x006f8
-#define V3D_PCTRS15 0x006fc
+# define V3D_PCTRE_EN BIT(31)
+#define V3D_PCTR(x) (0x00680 + ((x) * 8))
+#define V3D_PCTRS(x) (0x00684 + ((x) * 8))
#define V3D_DBGE 0x00f00
#define V3D_FDBGO 0x00f04
#define V3D_FDBGB 0x00f08
@@ -877,6 +848,7 @@ enum hvs_pixel_format {
#define SCALER_POS2_ALPHA_MODE_FIXED 1
#define SCALER_POS2_ALPHA_MODE_FIXED_NONZERO 2
#define SCALER_POS2_ALPHA_MODE_FIXED_OVER_0x07 3
+#define SCALER_POS2_ALPHA_PREMULT BIT(29)
#define SCALER_POS2_HEIGHT_MASK VC4_MASK(27, 16)
#define SCALER_POS2_HEIGHT_SHIFT 16
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 493f392b3a0a..bfc2fa73d2ae 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -68,38 +68,38 @@ static const struct {
REGDEF(V3D_VPMBASE),
REGDEF(V3D_PCTRC),
REGDEF(V3D_PCTRE),
- REGDEF(V3D_PCTR0),
- REGDEF(V3D_PCTRS0),
- REGDEF(V3D_PCTR1),
- REGDEF(V3D_PCTRS1),
- REGDEF(V3D_PCTR2),
- REGDEF(V3D_PCTRS2),
- REGDEF(V3D_PCTR3),
- REGDEF(V3D_PCTRS3),
- REGDEF(V3D_PCTR4),
- REGDEF(V3D_PCTRS4),
- REGDEF(V3D_PCTR5),
- REGDEF(V3D_PCTRS5),
- REGDEF(V3D_PCTR6),
- REGDEF(V3D_PCTRS6),
- REGDEF(V3D_PCTR7),
- REGDEF(V3D_PCTRS7),
- REGDEF(V3D_PCTR8),
- REGDEF(V3D_PCTRS8),
- REGDEF(V3D_PCTR9),
- REGDEF(V3D_PCTRS9),
- REGDEF(V3D_PCTR10),
- REGDEF(V3D_PCTRS10),
- REGDEF(V3D_PCTR11),
- REGDEF(V3D_PCTRS11),
- REGDEF(V3D_PCTR12),
- REGDEF(V3D_PCTRS12),
- REGDEF(V3D_PCTR13),
- REGDEF(V3D_PCTRS13),
- REGDEF(V3D_PCTR14),
- REGDEF(V3D_PCTRS14),
- REGDEF(V3D_PCTR15),
- REGDEF(V3D_PCTRS15),
+ REGDEF(V3D_PCTR(0)),
+ REGDEF(V3D_PCTRS(0)),
+ REGDEF(V3D_PCTR(1)),
+ REGDEF(V3D_PCTRS(1)),
+ REGDEF(V3D_PCTR(2)),
+ REGDEF(V3D_PCTRS(2)),
+ REGDEF(V3D_PCTR(3)),
+ REGDEF(V3D_PCTRS(3)),
+ REGDEF(V3D_PCTR(4)),
+ REGDEF(V3D_PCTRS(4)),
+ REGDEF(V3D_PCTR(5)),
+ REGDEF(V3D_PCTRS(5)),
+ REGDEF(V3D_PCTR(6)),
+ REGDEF(V3D_PCTRS(6)),
+ REGDEF(V3D_PCTR(7)),
+ REGDEF(V3D_PCTRS(7)),
+ REGDEF(V3D_PCTR(8)),
+ REGDEF(V3D_PCTRS(8)),
+ REGDEF(V3D_PCTR(9)),
+ REGDEF(V3D_PCTRS(9)),
+ REGDEF(V3D_PCTR(10)),
+ REGDEF(V3D_PCTRS(10)),
+ REGDEF(V3D_PCTR(11)),
+ REGDEF(V3D_PCTRS(11)),
+ REGDEF(V3D_PCTR(12)),
+ REGDEF(V3D_PCTRS(12)),
+ REGDEF(V3D_PCTR(13)),
+ REGDEF(V3D_PCTRS(13)),
+ REGDEF(V3D_PCTR(14)),
+ REGDEF(V3D_PCTRS(14)),
+ REGDEF(V3D_PCTR(15)),
+ REGDEF(V3D_PCTRS(15)),
REGDEF(V3D_DBGE),
REGDEF(V3D_FDBGO),
REGDEF(V3D_FDBGB),
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index 2db485abb186..eec76af49f04 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -753,7 +753,7 @@ validate_gl_shader_rec(struct drm_device *dev,
28, /* cs */
};
uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
- struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+ struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
uint32_t nr_attributes, nr_relocs, packet_size;
int i;