From 32a3dbeb2b951044111042ce6ccb2dcd7bca35de Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 25 Jan 2016 22:16:55 +0100 Subject: drm/vc4: Nuke preclose hook Again since the drm core takes care of event unlinking/disarming this is now just needless code. v2: Fixup misplaced hunk. Cc: Eric Anholt Acked-by: Daniel Stone Reviewed-by: Alex Deucher (v1) Acked-by: Eric Anholt Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1453756616-28942-14-git-send-email-daniel.vetter@ffwll.ch --- drivers/gpu/drm/vc4/vc4_crtc.c | 20 -------------------- drivers/gpu/drm/vc4/vc4_drv.c | 10 ---------- drivers/gpu/drm/vc4/vc4_drv.h | 1 - 3 files changed, 31 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 018145e0b87d..937409792b97 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -593,26 +593,6 @@ static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = { .atomic_flush = vc4_crtc_atomic_flush, }; -/* Frees the page flip event when the DRM device is closed with the - * event still outstanding. - */ -void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file) -{ - struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); - struct drm_device *dev = crtc->dev; - unsigned long flags; - - spin_lock_irqsave(&dev->event_lock, flags); - - if (vc4_crtc->event && vc4_crtc->event->base.file_priv == file) { - vc4_crtc->event->base.destroy(&vc4_crtc->event->base); - drm_crtc_vblank_put(crtc); - vc4_crtc->event = NULL; - } - - spin_unlock_irqrestore(&dev->event_lock, flags); -} - static const struct vc4_crtc_data pv0_data = { .hvs_channel = 0, .encoder0_type = VC4_ENCODER_TYPE_DSI0, diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index f1655fff8425..b7d2ff0e6e1f 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -43,14 +43,6 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index) return map; } -static void vc4_drm_preclose(struct drm_device *dev, struct drm_file *file) -{ - struct drm_crtc *crtc; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) - vc4_cancel_page_flip(crtc, file); -} - static void vc4_lastclose(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -91,8 +83,6 @@ static struct drm_driver vc4_drm_driver = { DRIVER_HAVE_IRQ | DRIVER_PRIME), .lastclose = vc4_lastclose, - .preclose = vc4_drm_preclose, - .irq_handler = vc4_irq, .irq_preinstall = vc4_irq_preinstall, .irq_postinstall = vc4_irq_postinstall, diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 080865ec2bae..4c734d087d7f 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -376,7 +376,6 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); extern struct platform_driver vc4_crtc_driver; int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id); void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id); -void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file); int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg); /* vc4_debugfs.c */ -- cgit From f427fb16cf756548c39256b569cf083f39bcc4e9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:14:09 -0800 Subject: drm/vc4: Improve comments on vc4_plane_state members. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 0addbad15832..45e353d65c3d 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -26,16 +26,19 @@ struct vc4_plane_state { struct drm_plane_state base; + /* System memory copy of the display list for this element, computed + * at atomic_check time. + */ u32 *dlist; - u32 dlist_size; /* Number of dwords in allocated for the display list */ + u32 dlist_size; /* Number of dwords allocated for the display list */ u32 dlist_count; /* Number of used dwords in the display list. */ /* Offset in the dlist to pointer word 0. */ u32 pw0_offset; /* Offset where the plane's dlist was last stored in the - hardware at vc4_crtc_atomic_flush() time. - */ + * hardware at vc4_crtc_atomic_flush() time. + */ u32 *hw_dlist; }; -- cgit From 17eac75111ebda33e13d8d8d98aaedfc1a9c2abf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:14:57 -0800 Subject: drm/vc4: Add missing __iomem annotation to hw_dlist. This is the pointer to the HVS device's memory where we stored the contents of *dlist. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 45e353d65c3d..ed07ee57e1bf 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -39,7 +39,7 @@ struct vc4_plane_state { /* Offset where the plane's dlist was last stored in the * hardware at vc4_crtc_atomic_flush() time. */ - u32 *hw_dlist; + u32 __iomem *hw_dlist; }; static inline struct vc4_plane_state * -- cgit From 5c6799942003df91801b1d2277bba34d71f99603 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:34:44 -0800 Subject: drm/vc4: Move the plane clipping/scaling setup to a separate function. As we add actual scaling, this is going to get way more complicated. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 78 +++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index ed07ee57e1bf..554ed54cc8a7 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -40,6 +40,14 @@ struct vc4_plane_state { * hardware at vc4_crtc_atomic_flush() time. */ u32 __iomem *hw_dlist; + + /* Clipped coordinates of the plane on the display. */ + int crtc_x, crtc_y, crtc_w, crtc_h; + + /* Offset to start scanning out from the start of the plane's + * BO. + */ + u32 offset; }; static inline struct vc4_plane_state * @@ -151,22 +159,17 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) vc4_state->dlist[vc4_state->dlist_count++] = val; } -/* Writes out a full display list for an active plane to the plane's - * private dlist state. - */ -static int vc4_plane_mode_set(struct drm_plane *plane, - struct drm_plane_state *state) +static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; - struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); - u32 ctl0_offset = vc4_state->dlist_count; - const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); - uint32_t offset = fb->offsets[0]; - int crtc_x = state->crtc_x; - int crtc_y = state->crtc_y; - int crtc_w = state->crtc_w; - int crtc_h = state->crtc_h; + + vc4_state->offset = fb->offsets[0]; + + vc4_state->crtc_x = state->crtc_x; + vc4_state->crtc_y = state->crtc_y; + vc4_state->crtc_w = state->crtc_w; + vc4_state->crtc_h = state->crtc_h; if (state->crtc_w << 16 != state->src_w || state->crtc_h << 16 != state->src_h) { @@ -178,18 +181,41 @@ static int vc4_plane_mode_set(struct drm_plane *plane, return -EINVAL; } - if (crtc_x < 0) { - offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x; - crtc_w += crtc_x; - crtc_x = 0; + if (vc4_state->crtc_x < 0) { + vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, + 0) * + -vc4_state->crtc_x); + vc4_state->crtc_w += vc4_state->crtc_x; + vc4_state->crtc_x = 0; } - if (crtc_y < 0) { - offset += fb->pitches[0] * -crtc_y; - crtc_h += crtc_y; - crtc_y = 0; + if (vc4_state->crtc_y < 0) { + vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y; + vc4_state->crtc_h += vc4_state->crtc_y; + vc4_state->crtc_y = 0; } + return 0; +} + + +/* Writes out a full display list for an active plane to the plane's + * private dlist state. + */ +static int vc4_plane_mode_set(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct drm_framebuffer *fb = state->fb; + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); + u32 ctl0_offset = vc4_state->dlist_count; + const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); + int ret; + + ret = vc4_plane_setup_clipping_and_scaling(state); + if (ret) + return ret; + vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | @@ -199,8 +225,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 0: Image Positions and Alpha Value */ vc4_dlist_write(vc4_state, VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | - VC4_SET_FIELD(crtc_x, SCALER_POS0_START_X) | - VC4_SET_FIELD(crtc_y, SCALER_POS0_START_Y)); + VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | + VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); /* Position Word 1: Scaled Image Dimensions. * Skipped due to SCALER_CTL0_UNITY scaling. @@ -212,8 +238,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | - VC4_SET_FIELD(crtc_w, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(crtc_h, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(vc4_state->crtc_w, SCALER_POS2_WIDTH) | + VC4_SET_FIELD(vc4_state->crtc_h, SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -221,7 +247,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->pw0_offset = vc4_state->dlist_count; /* Pointer Word 0: RGB / Y Pointer */ - vc4_dlist_write(vc4_state, bo->paddr + offset); + vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset); /* Pointer Context Word 0: Written by the HVS */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); -- cgit From 6674a904d68041d982ffb284d2827410765a097a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 30 Dec 2015 11:50:22 -0800 Subject: drm/vc4: Add a proper short-circut path for legacy cursor updates. Previously, on every modeset we would allocate new display list memory, recompute changed planes, write all of them to the new memory, and pointed scanout at the new list (which will latch approximately at the next line of scanout). We let drm_atomic_helper_wait_for_vblanks() decide whether we needed to wait for a vblank after a modeset before cleaning up the old state and letting the next modeset proceed, and on legacy cursor updates we wouldn't wait. If you moved the cursor fast enough, we could potentially wrap around the display list memory area and overwrite the existing display list while it was still being scanned out, resulting in the HVS scanning out garbage or just halting. Instead of making cursor updates wait for scanout to move to the new display list area (which introduces significant cursor lag in X), we just rewrite our current display list. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_kms.c | 9 ++++ drivers/gpu/drm/vc4/vc4_plane.c | 94 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 96 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index f95f2df5f8d1..4718ae5176cc 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -49,6 +49,15 @@ vc4_atomic_complete_commit(struct vc4_commit *c) drm_atomic_helper_commit_modeset_enables(dev, state); + /* Make sure that drm_atomic_helper_wait_for_vblanks() + * actually waits for vblank. If we're doing a full atomic + * modeset (as opposed to a vc4_update_plane() short circuit), + * then we need to wait for scanout to be done with our + * display lists before we free it and potentially reallocate + * and overwrite the dlist memory with a new modeset. + */ + state->legacy_cursor_update = false; + drm_atomic_helper_wait_for_vblanks(dev, state); drm_atomic_helper_cleanup_planes(dev, state); diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 554ed54cc8a7..713ec006baa9 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -33,8 +33,12 @@ struct vc4_plane_state { u32 dlist_size; /* Number of dwords allocated for the display list */ u32 dlist_count; /* Number of used dwords in the display list. */ - /* Offset in the dlist to pointer word 0. */ - u32 pw0_offset; + /* Offset in the dlist to various words, for pageflip or + * cursor updates. + */ + u32 pos0_offset; + u32 pos2_offset; + u32 ptr0_offset; /* Offset where the plane's dlist was last stored in the * hardware at vc4_crtc_atomic_flush() time. @@ -223,6 +227,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_CTL0_UNITY); /* Position Word 0: Image Positions and Alpha Value */ + vc4_state->pos0_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | @@ -233,6 +238,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, */ /* Position Word 2: Source Image Size, Alpha Mode */ + vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, VC4_SET_FIELD(format->has_alpha ? SCALER_POS2_ALPHA_MODE_PIPELINE : @@ -244,9 +250,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); - vc4_state->pw0_offset = vc4_state->dlist_count; - /* Pointer Word 0: RGB / Y Pointer */ + vc4_state->ptr0_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset); /* Pointer Context Word 0: Written by the HVS */ @@ -332,13 +337,13 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) * scanout will start from this address as soon as the FIFO * needs to refill with pixels. */ - writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]); + writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); /* Also update the CPU-side dlist copy, so that any later * atomic updates that don't do a new modeset on our plane * also use our updated address. */ - vc4_state->dlist[vc4_state->pw0_offset] = addr; + vc4_state->dlist[vc4_state->ptr0_offset] = addr; } static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { @@ -354,8 +359,83 @@ static void vc4_plane_destroy(struct drm_plane *plane) drm_plane_cleanup(plane); } +/* Implements immediate (non-vblank-synced) updates of the cursor + * position, or falls back to the atomic helper otherwise. + */ +static int +vc4_update_plane(struct drm_plane *plane, + struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int crtc_x, int crtc_y, + unsigned int crtc_w, unsigned int crtc_h, + uint32_t src_x, uint32_t src_y, + uint32_t src_w, uint32_t src_h) +{ + struct drm_plane_state *plane_state; + struct vc4_plane_state *vc4_state; + + if (plane != crtc->cursor) + goto out; + + plane_state = plane->state; + vc4_state = to_vc4_plane_state(plane_state); + + if (!plane_state) + goto out; + + /* If we're changing the cursor contents, do that in the + * normal vblank-synced atomic path. + */ + if (fb != plane_state->fb) + goto out; + + /* No configuring new scaling in the fast path. */ + if (crtc_w != plane_state->crtc_w || + crtc_h != plane_state->crtc_h || + src_w != plane_state->src_w || + src_h != plane_state->src_h) { + goto out; + } + + /* Set the cursor's position on the screen. This is the + * expected change from the drm_mode_cursor_universal() + * helper. + */ + plane_state->crtc_x = crtc_x; + plane_state->crtc_y = crtc_y; + + /* Allow changing the start position within the cursor BO, if + * that matters. + */ + plane_state->src_x = src_x; + plane_state->src_y = src_y; + + /* Update the display list based on the new crtc_x/y. */ + vc4_plane_atomic_check(plane, plane_state); + + /* Note that we can't just call vc4_plane_write_dlist() + * because that would smash the context data that the HVS is + * currently using. + */ + writel(vc4_state->dlist[vc4_state->pos0_offset], + &vc4_state->hw_dlist[vc4_state->pos0_offset]); + writel(vc4_state->dlist[vc4_state->pos2_offset], + &vc4_state->hw_dlist[vc4_state->pos2_offset]); + writel(vc4_state->dlist[vc4_state->ptr0_offset], + &vc4_state->hw_dlist[vc4_state->ptr0_offset]); + + return 0; + +out: + return drm_atomic_helper_update_plane(plane, crtc, fb, + crtc_x, crtc_y, + crtc_w, crtc_h, + src_x, src_y, + src_w, src_h); +} + static const struct drm_plane_funcs vc4_plane_funcs = { - .update_plane = drm_atomic_helper_update_plane, + .update_plane = vc4_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = vc4_plane_destroy, .set_property = NULL, -- cgit From d8dbf44f13b91185c618219d912b246817a8d132 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 13:25:41 -0800 Subject: drm/vc4: Make the CRTCs cooperate on allocating display lists. So far, we've only ever lit up one CRTC, so this has been fine. To extend to more displays or more planes, we need to make sure we don't run our display lists into each other. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 115 +++++++++++++++++++++++------------------ drivers/gpu/drm/vc4/vc4_drv.h | 8 ++- drivers/gpu/drm/vc4/vc4_hvs.c | 13 +++++ 3 files changed, 84 insertions(+), 52 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 937409792b97..6cf931557e97 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -49,22 +49,27 @@ struct vc4_crtc { /* Which HVS channel we're using for our CRTC. */ int channel; - /* Pointer to the actual hardware display list memory for the - * crtc. - */ - u32 __iomem *dlist; - - u32 dlist_size; /* in dwords */ - struct drm_pending_vblank_event *event; }; +struct vc4_crtc_state { + struct drm_crtc_state base; + /* Dlist area for this CRTC configuration. */ + struct drm_mm_node mm; +}; + static inline struct vc4_crtc * to_vc4_crtc(struct drm_crtc *crtc) { return (struct vc4_crtc *)crtc; } +static inline struct vc4_crtc_state * +to_vc4_crtc_state(struct drm_crtc_state *crtc_state) +{ + return (struct vc4_crtc_state *)crtc_state; +} + struct vc4_crtc_data { /* Which channel of the HVS this pixelvalve sources from. */ int hvs_channel; @@ -319,11 +324,13 @@ static void vc4_crtc_enable(struct drm_crtc *crtc) static int vc4_crtc_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) { + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state); struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_plane *plane; - struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + unsigned long flags; u32 dlist_count = 0; + int ret; /* The pixelvalve can only feed one encoder (and encoders are * 1:1 with connectors.) @@ -346,18 +353,12 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, dlist_count++; /* Account for SCALER_CTL0_END. */ - if (!vc4_crtc->dlist || dlist_count > vc4_crtc->dlist_size) { - vc4_crtc->dlist = ((u32 __iomem *)vc4->hvs->dlist + - HVS_BOOTLOADER_DLIST_END); - vc4_crtc->dlist_size = ((SCALER_DLIST_SIZE >> 2) - - HVS_BOOTLOADER_DLIST_END); - - if (dlist_count > vc4_crtc->dlist_size) { - DRM_DEBUG_KMS("dlist too large for CRTC (%d > %d).\n", - dlist_count, vc4_crtc->dlist_size); - return -EINVAL; - } - } + spin_lock_irqsave(&vc4->hvs->mm_lock, flags); + ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm, + dlist_count, 1, 0); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); + if (ret) + return ret; return 0; } @@ -368,47 +369,29 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); struct drm_plane *plane; bool debug_dump_regs = false; - u32 __iomem *dlist_next = vc4_crtc->dlist; + u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start; + u32 __iomem *dlist_next = dlist_start; if (debug_dump_regs) { DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc)); vc4_hvs_dump_state(dev); } - /* Copy all the active planes' dlist contents to the hardware dlist. - * - * XXX: If the new display list was large enough that it - * overlapped a currently-read display list, we need to do - * something like disable scanout before putting in the new - * list. For now, we're safe because we only have the two - * planes. - */ + /* Copy all the active planes' dlist contents to the hardware dlist. */ drm_atomic_crtc_for_each_plane(plane, crtc) { dlist_next += vc4_plane_write_dlist(plane, dlist_next); } - if (dlist_next == vc4_crtc->dlist) { - /* If no planes were enabled, use the SCALER_CTL0_END - * at the start of the display list memory (in the - * bootloader section). We'll rewrite that - * SCALER_CTL0_END, just in case, though. - */ - writel(SCALER_CTL0_END, vc4->hvs->dlist); - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), 0); - } else { - writel(SCALER_CTL0_END, dlist_next); - dlist_next++; - - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - (u32 __iomem *)vc4_crtc->dlist - - (u32 __iomem *)vc4->hvs->dlist); - - /* Make the next display list start after ours. */ - vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist); - vc4_crtc->dlist = dlist_next; - } + writel(SCALER_CTL0_END, dlist_next); + dlist_next++; + + WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); + + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); if (debug_dump_regs) { DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); @@ -573,6 +556,36 @@ static int vc4_page_flip(struct drm_crtc *crtc, return drm_atomic_helper_page_flip(crtc, fb, event, flags); } +static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc) +{ + struct vc4_crtc_state *vc4_state; + + vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); + if (!vc4_state) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, &vc4_state->base); + return &vc4_state->base; +} + +static void vc4_crtc_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct vc4_dev *vc4 = to_vc4_dev(crtc->dev); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state); + + if (vc4_state->mm.allocated) { + unsigned long flags; + + spin_lock_irqsave(&vc4->hvs->mm_lock, flags); + drm_mm_remove_node(&vc4_state->mm); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); + + } + + __drm_atomic_helper_crtc_destroy_state(crtc, state); +} + static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = vc4_crtc_destroy, @@ -581,8 +594,8 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ .reset = drm_atomic_helper_crtc_reset, - .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .atomic_duplicate_state = vc4_crtc_duplicate_state, + .atomic_destroy_state = vc4_crtc_destroy_state, }; static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 4c734d087d7f..ae9802486080 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -149,7 +149,13 @@ struct vc4_v3d { struct vc4_hvs { struct platform_device *pdev; void __iomem *regs; - void __iomem *dlist; + u32 __iomem *dlist; + + /* Memory manager for CRTCs to allocate space in the display + * list. Units are dwords. + */ + struct drm_mm dlist_mm; + spinlock_t mm_lock; }; struct vc4_plane { diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 8098c5b21ba4..9e435545b3b6 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -119,6 +119,17 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) hvs->dlist = hvs->regs + SCALER_DLIST_START; + spin_lock_init(&hvs->mm_lock); + + /* Set up the HVS display list memory manager. We never + * overwrite the setup from the bootloader (just 128b out of + * our 16K), since we don't want to scramble the screen when + * transitioning from the firmware's boot setup to runtime. + */ + drm_mm_init(&hvs->dlist_mm, + HVS_BOOTLOADER_DLIST_END, + (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END); + vc4->hvs = hvs; return 0; } @@ -129,6 +140,8 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master, struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = drm->dev_private; + drm_mm_takedown(&vc4->hvs->dlist_mm); + vc4->hvs = NULL; } -- cgit From fc2d6f1eabee9d971453da2a27a72471c2a347dd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 20 Oct 2015 14:18:56 +0100 Subject: drm/vc4: Add more display planes to each CRTC. Previously we only did the primary and cursor plane, but overlay planes are useful and just require this setup to add, since all planes go into the HVS display list in the same way. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 56 ++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 6cf931557e97..619dc781c517 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -657,9 +657,9 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_crtc *vc4_crtc; struct drm_crtc *crtc; - struct drm_plane *primary_plane, *cursor_plane; + struct drm_plane *primary_plane, *cursor_plane, *destroy_plane, *temp; const struct of_device_id *match; - int ret; + int ret, i; vc4_crtc = devm_kzalloc(dev, sizeof(*vc4_crtc), GFP_KERNEL); if (!vc4_crtc) @@ -688,27 +688,49 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) goto err; } - cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); - if (IS_ERR(cursor_plane)) { - dev_err(dev, "failed to construct cursor plane\n"); - ret = PTR_ERR(cursor_plane); - goto err_primary; - } - - drm_crtc_init_with_planes(drm, crtc, primary_plane, cursor_plane, + drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL, &vc4_crtc_funcs, NULL); drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs); primary_plane->crtc = crtc; - cursor_plane->crtc = crtc; vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc; vc4_crtc->channel = vc4_crtc->data->hvs_channel; + /* Set up some arbitrary number of planes. We're not limited + * by a set number of physical registers, just the space in + * the HVS (16k) and how small an plane can be (28 bytes). + * However, each plane we set up takes up some memory, and + * increases the cost of looping over planes, which atomic + * modesetting does quite a bit. As a result, we pick a + * modest number of planes to expose, that should hopefully + * still cover any sane usecase. + */ + for (i = 0; i < 8; i++) { + struct drm_plane *plane = + vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); + + if (IS_ERR(plane)) + continue; + + plane->possible_crtcs = 1 << drm_crtc_index(crtc); + } + + /* Set up the legacy cursor after overlay initialization, + * since we overlay planes on the CRTC in the order they were + * initialized. + */ + cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); + if (!IS_ERR(cursor_plane)) { + cursor_plane->possible_crtcs = 1 << drm_crtc_index(crtc); + cursor_plane->crtc = crtc; + crtc->cursor = cursor_plane; + } + CRTC_WRITE(PV_INTEN, 0); CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START); ret = devm_request_irq(dev, platform_get_irq(pdev, 0), vc4_crtc_irq_handler, 0, "vc4 crtc", vc4_crtc); if (ret) - goto err_cursor; + goto err_destroy_planes; vc4_set_crtc_possible_masks(drm, crtc); @@ -716,10 +738,12 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) return 0; -err_cursor: - cursor_plane->funcs->destroy(cursor_plane); -err_primary: - primary_plane->funcs->destroy(primary_plane); +err_destroy_planes: + list_for_each_entry_safe(destroy_plane, temp, + &drm->mode_config.plane_list, head) { + if (destroy_plane->possible_crtcs == 1 << drm_crtc_index(crtc)) + destroy_plane->funcs->destroy(destroy_plane); + } err: return ret; } -- cgit From f863e356013d628fa65b1cd89aa298eed26fc936 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:45:25 -0800 Subject: drm/vc4: Fix which value is being used for source image size. This doesn't matter yet since we only allow 1:1 scaling, but the comment clearly says we should be using the source size. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 713ec006baa9..d9c929096164 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -47,6 +47,8 @@ struct vc4_plane_state { /* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; + /* Clipped size of the area scanned from in the FB. */ + u32 src_w, src_h; /* Offset to start scanning out from the start of the plane's * BO. @@ -170,11 +172,6 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->offset = fb->offsets[0]; - vc4_state->crtc_x = state->crtc_x; - vc4_state->crtc_y = state->crtc_y; - vc4_state->crtc_w = state->crtc_w; - vc4_state->crtc_h = state->crtc_h; - if (state->crtc_w << 16 != state->src_w || state->crtc_h << 16 != state->src_h) { /* We don't support scaling yet, which involves @@ -185,17 +182,25 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) return -EINVAL; } + vc4_state->src_w = state->src_w >> 16; + vc4_state->src_h = state->src_h >> 16; + + vc4_state->crtc_x = state->crtc_x; + vc4_state->crtc_y = state->crtc_y; + vc4_state->crtc_w = state->crtc_w; + vc4_state->crtc_h = state->crtc_h; + if (vc4_state->crtc_x < 0) { vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, 0) * -vc4_state->crtc_x); - vc4_state->crtc_w += vc4_state->crtc_x; + vc4_state->src_w += vc4_state->crtc_x; vc4_state->crtc_x = 0; } if (vc4_state->crtc_y < 0) { vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y; - vc4_state->crtc_h += vc4_state->crtc_y; + vc4_state->src_h += vc4_state->crtc_y; vc4_state->crtc_y = 0; } @@ -244,8 +249,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | - VC4_SET_FIELD(vc4_state->crtc_w, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(vc4_state->crtc_h, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(vc4_state->src_w, SCALER_POS2_WIDTH) | + VC4_SET_FIELD(vc4_state->src_h, SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); -- cgit From 21af94cf1a4c2d3450ab7fead58e6e2291ab92a9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 20 Oct 2015 16:06:57 +0100 Subject: drm/vc4: Add support for scaling of display planes. This implements a simple policy for choosing scaling modes (trapezoidal for decimation, PPF for magnification), and a single PPF filter (Mitchell/Netravali's recommendation). Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_drv.h | 4 + drivers/gpu/drm/vc4/vc4_hvs.c | 84 +++++++++++++ drivers/gpu/drm/vc4/vc4_plane.c | 253 +++++++++++++++++++++++++++++++++++++--- drivers/gpu/drm/vc4/vc4_regs.h | 46 ++++++++ 4 files changed, 374 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index ae9802486080..3d1df6b1c4d3 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -155,7 +155,11 @@ struct vc4_hvs { * list. Units are dwords. */ struct drm_mm dlist_mm; + /* Memory manager for the LBM memory used by HVS scaling. */ + struct drm_mm lbm_mm; spinlock_t mm_lock; + + struct drm_mm_node mitchell_netravali_filter; }; struct vc4_plane { diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 9e435545b3b6..6fbab1c82cb1 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -100,12 +100,76 @@ int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused) } #endif +/* The filter kernel is composed of dwords each containing 3 9-bit + * signed integers packed next to each other. + */ +#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff) +#define VC4_PPF_FILTER_WORD(c0, c1, c2) \ + ((((c0) & 0x1ff) << 0) | \ + (((c1) & 0x1ff) << 9) | \ + (((c2) & 0x1ff) << 18)) + +/* The whole filter kernel is arranged as the coefficients 0-16 going + * up, then a pad, then 17-31 going down and reversed within the + * dwords. This means that a linear phase kernel (where it's + * symmetrical at the boundary between 15 and 16) has the last 5 + * dwords matching the first 5, but reversed. + */ +#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15) \ + {VC4_PPF_FILTER_WORD(c0, c1, c2), \ + VC4_PPF_FILTER_WORD(c3, c4, c5), \ + VC4_PPF_FILTER_WORD(c6, c7, c8), \ + VC4_PPF_FILTER_WORD(c9, c10, c11), \ + VC4_PPF_FILTER_WORD(c12, c13, c14), \ + VC4_PPF_FILTER_WORD(c15, c15, 0)} + +#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6 +#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1) + +/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali. + * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf + */ +static const u32 mitchell_netravali_1_3_1_3_kernel[] = + VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18, + 50, 82, 119, 155, 187, 213, 227); + +static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs, + struct drm_mm_node *space, + const u32 *kernel) +{ + int ret, i; + u32 __iomem *dst_kernel; + + ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1, + 0); + if (ret) { + DRM_ERROR("Failed to allocate space for filter kernel: %d\n", + ret); + return ret; + } + + dst_kernel = hvs->dlist + space->start; + + for (i = 0; i < VC4_KERNEL_DWORDS; i++) { + if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS) + writel(kernel[i], &dst_kernel[i]); + else { + writel(kernel[VC4_KERNEL_DWORDS - i - 1], + &dst_kernel[i]); + } + } + + return 0; +} + static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = drm->dev_private; struct vc4_hvs *hvs = NULL; + int ret; hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL); if (!hvs) @@ -130,6 +194,22 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) HVS_BOOTLOADER_DLIST_END, (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END); + /* Set up the HVS LBM memory manager. We could have some more + * complicated data structure that allowed reuse of LBM areas + * between planes when they don't overlap on the screen, but + * for now we just allocate globally. + */ + drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024); + + /* Upload filter kernels. We only have the one for now, so we + * keep it around for the lifetime of the driver. + */ + ret = vc4_hvs_upload_linear_kernel(hvs, + &hvs->mitchell_netravali_filter, + mitchell_netravali_1_3_1_3_kernel); + if (ret) + return ret; + vc4->hvs = hvs; return 0; } @@ -140,7 +220,11 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master, struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = drm->dev_private; + if (vc4->hvs->mitchell_netravali_filter.allocated) + drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter); + drm_mm_takedown(&vc4->hvs->dlist_mm); + drm_mm_takedown(&vc4->hvs->lbm_mm); vc4->hvs = NULL; } diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index d9c929096164..7c2d697e8715 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -24,6 +24,12 @@ #include "drm_fb_cma_helper.h" #include "drm_plane_helper.h" +enum vc4_scaling_mode { + VC4_SCALING_NONE, + VC4_SCALING_TPZ, + VC4_SCALING_PPF, +}; + struct vc4_plane_state { struct drm_plane_state base; /* System memory copy of the display list for this element, computed @@ -47,13 +53,19 @@ struct vc4_plane_state { /* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; - /* Clipped size of the area scanned from in the FB. */ - u32 src_w, src_h; + /* Clipped area being scanned from in the FB. */ + u32 src_x, src_y, src_w, src_h; + + enum vc4_scaling_mode x_scaling, y_scaling; + bool is_unity; /* Offset to start scanning out from the start of the plane's * BO. */ u32 offset; + + /* Our allocation in LBM for temporary storage during scaling. */ + struct drm_mm_node lbm; }; static inline struct vc4_plane_state * @@ -90,6 +102,16 @@ static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) return NULL; } +static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) +{ + if (dst > src) + return VC4_SCALING_PPF; + else if (dst < src) + return VC4_SCALING_TPZ; + else + return VC4_SCALING_NONE; +} + static bool plane_enabled(struct drm_plane_state *state) { return state->fb && state->crtc; @@ -106,6 +128,8 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane if (!vc4_state) return NULL; + memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); + __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); if (vc4_state->dlist) { @@ -125,8 +149,17 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane static void vc4_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + if (vc4_state->lbm.allocated) { + unsigned long irqflags; + + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); + drm_mm_remove_node(&vc4_state->lbm); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); + } + kfree(vc4_state->dlist); __drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base); kfree(state); @@ -165,23 +198,60 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) vc4_state->dlist[vc4_state->dlist_count++] = val; } +/* Returns the scl0/scl1 field based on whether the dimensions need to + * be up/down/non-scaled. + * + * This is a replication of a table from the spec. + */ +static u32 vc4_get_scl_field(struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + + switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) { + case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: + return SCALER_CTL0_SCL_H_PPF_V_PPF; + case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: + return SCALER_CTL0_SCL_H_TPZ_V_PPF; + case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: + return SCALER_CTL0_SCL_H_PPF_V_TPZ; + case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: + return SCALER_CTL0_SCL_H_TPZ_V_TPZ; + case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: + return SCALER_CTL0_SCL_H_PPF_V_NONE; + case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: + return SCALER_CTL0_SCL_H_NONE_V_PPF; + case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: + return SCALER_CTL0_SCL_H_NONE_V_TPZ; + case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: + return SCALER_CTL0_SCL_H_TPZ_V_NONE; + default: + case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: + /* The unity case is independently handled by + * SCALER_CTL0_UNITY. + */ + return 0; + } +} + static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) { + struct drm_plane *plane = state->plane; struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; + u32 subpixel_src_mask = (1 << 16) - 1; vc4_state->offset = fb->offsets[0]; - if (state->crtc_w << 16 != state->src_w || - state->crtc_h << 16 != state->src_h) { - /* We don't support scaling yet, which involves - * allocating the LBM memory for scaling temporary - * storage, and putting filter kernels in the HVS - * context. - */ + /* We don't support subpixel source positioning for scaling. */ + if ((state->src_x & subpixel_src_mask) || + (state->src_y & subpixel_src_mask) || + (state->src_w & subpixel_src_mask) || + (state->src_h & subpixel_src_mask)) { return -EINVAL; } + vc4_state->src_x = state->src_x >> 16; + vc4_state->src_y = state->src_y >> 16; vc4_state->src_w = state->src_w >> 16; vc4_state->src_h = state->src_h >> 16; @@ -190,6 +260,23 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->crtc_w = state->crtc_w; vc4_state->crtc_h = state->crtc_h; + vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w, + vc4_state->crtc_w); + vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h, + vc4_state->crtc_h); + vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE && + vc4_state->y_scaling == VC4_SCALING_NONE); + + /* No configuring scaling on the cursor plane, since it gets + non-vblank-synced updates, and scaling requires requires + LBM changes which have to be vblank-synced. + */ + if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity) + return -EINVAL; + + /* Clamp the on-screen start x/y to 0. The hardware doesn't + * support negative y, and negative x wastes bandwidth. + */ if (vc4_state->crtc_x < 0) { vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, 0) * @@ -207,6 +294,87 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) return 0; } +static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) +{ + u32 scale, recip; + + scale = (1 << 16) * src / dst; + + /* The specs note that while the reciprocal would be defined + * as (1<<32)/scale, ~0 is close enough. + */ + recip = ~0 / scale; + + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | + VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); +} + +static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) +{ + u32 scale = (1 << 16) * src / dst; + + vc4_dlist_write(vc4_state, + SCALER_PPF_AGC | + VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | + VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); +} + +static u32 vc4_lbm_size(struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + /* This is the worst case number. One of the two sizes will + * be used depending on the scaling configuration. + */ + u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w); + u32 lbm; + + if (vc4_state->is_unity) + return 0; + else if (vc4_state->y_scaling == VC4_SCALING_TPZ) + lbm = pix_per_line * 8; + else { + /* In special cases, this multiplier might be 12. */ + lbm = pix_per_line * 16; + } + + lbm = roundup(lbm, 32); + + return lbm; +} + +static void vc4_write_scaling_parameters(struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + + /* Ch0 H-PPF Word 0: Scaling Parameters */ + if (vc4_state->x_scaling == VC4_SCALING_PPF) { + vc4_write_ppf(vc4_state, + vc4_state->src_w, vc4_state->crtc_w); + } + + /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ + if (vc4_state->y_scaling == VC4_SCALING_PPF) { + vc4_write_ppf(vc4_state, + vc4_state->src_h, vc4_state->crtc_h); + vc4_dlist_write(vc4_state, 0xc0c0c0c0); + } + + /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ + if (vc4_state->x_scaling == VC4_SCALING_TPZ) { + vc4_write_tpz(vc4_state, + vc4_state->src_w, vc4_state->crtc_w); + } + + /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ + if (vc4_state->y_scaling == VC4_SCALING_TPZ) { + vc4_write_tpz(vc4_state, + vc4_state->src_h, vc4_state->crtc_h); + vc4_dlist_write(vc4_state, 0xc0c0c0c0); + } +} /* Writes out a full display list for an active plane to the plane's * private dlist state. @@ -214,22 +382,50 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) static int vc4_plane_mode_set(struct drm_plane *plane, struct drm_plane_state *state) { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); + u32 scl; + u32 lbm_size; + unsigned long irqflags; int ret; ret = vc4_plane_setup_clipping_and_scaling(state); if (ret) return ret; + /* Allocate the LBM memory that the HVS will use for temporary + * storage due to our scaling/format conversion. + */ + lbm_size = vc4_lbm_size(state); + if (lbm_size) { + if (!vc4_state->lbm.allocated) { + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); + ret = drm_mm_insert_node(&vc4->hvs->lbm_mm, + &vc4_state->lbm, + lbm_size, 32, 0); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); + } else { + WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); + } + } + + if (ret) + return ret; + + scl = vc4_get_scl_field(state); + + /* Control word */ vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | - SCALER_CTL0_UNITY); + (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | + VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) | + VC4_SET_FIELD(scl, SCALER_CTL0_SCL1)); /* Position Word 0: Image Positions and Alpha Value */ vc4_state->pos0_offset = vc4_state->dlist_count; @@ -238,9 +434,14 @@ static int vc4_plane_mode_set(struct drm_plane *plane, VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); - /* Position Word 1: Scaled Image Dimensions. - * Skipped due to SCALER_CTL0_UNITY scaling. - */ + /* Position Word 1: Scaled Image Dimensions. */ + if (!vc4_state->is_unity) { + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(vc4_state->crtc_w, + SCALER_POS1_SCL_WIDTH) | + VC4_SET_FIELD(vc4_state->crtc_h, + SCALER_POS1_SCL_HEIGHT)); + } /* Position Word 2: Source Image Size, Alpha Mode */ vc4_state->pos2_offset = vc4_state->dlist_count; @@ -266,6 +467,32 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_dlist_write(vc4_state, VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH)); + if (!vc4_state->is_unity) { + /* LBM Base Address. */ + if (vc4_state->y_scaling != VC4_SCALING_NONE) + vc4_dlist_write(vc4_state, vc4_state->lbm.start); + + vc4_write_scaling_parameters(state); + + /* If any PPF setup was done, then all the kernel + * pointers get uploaded. + */ + if (vc4_state->x_scaling == VC4_SCALING_PPF || + vc4_state->y_scaling == VC4_SCALING_PPF) { + u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, + SCALER_PPF_KERNEL_OFFSET); + + /* HPPF plane 0 */ + vc4_dlist_write(vc4_state, kernel); + /* VPPF plane 0 */ + vc4_dlist_write(vc4_state, kernel); + /* HPPF plane 1 */ + vc4_dlist_write(vc4_state, kernel); + /* VPPF plane 1 */ + vc4_dlist_write(vc4_state, kernel); + } + } + vc4_state->dlist[ctl0_offset] |= VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 4e52a0a88551..037c7fe67187 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -536,6 +536,21 @@ enum hvs_pixel_format { #define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13) #define SCALER_CTL0_ORDER_SHIFT 13 +#define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8) +#define SCALER_CTL0_SCL1_SHIFT 8 + +#define SCALER_CTL0_SCL0_MASK VC4_MASK(7, 5) +#define SCALER_CTL0_SCL0_SHIFT 5 + +#define SCALER_CTL0_SCL_H_PPF_V_PPF 0 +#define SCALER_CTL0_SCL_H_TPZ_V_PPF 1 +#define SCALER_CTL0_SCL_H_PPF_V_TPZ 2 +#define SCALER_CTL0_SCL_H_TPZ_V_TPZ 3 +#define SCALER_CTL0_SCL_H_PPF_V_NONE 4 +#define SCALER_CTL0_SCL_H_NONE_V_PPF 5 +#define SCALER_CTL0_SCL_H_NONE_V_TPZ 6 +#define SCALER_CTL0_SCL_H_TPZ_V_NONE 7 + /* Set to indicate no scaling. */ #define SCALER_CTL0_UNITY BIT(4) @@ -551,6 +566,12 @@ enum hvs_pixel_format { #define SCALER_POS0_START_X_MASK VC4_MASK(11, 0) #define SCALER_POS0_START_X_SHIFT 0 +#define SCALER_POS1_SCL_HEIGHT_MASK VC4_MASK(27, 16) +#define SCALER_POS1_SCL_HEIGHT_SHIFT 16 + +#define SCALER_POS1_SCL_WIDTH_MASK VC4_MASK(11, 0) +#define SCALER_POS1_SCL_WIDTH_SHIFT 0 + #define SCALER_POS2_ALPHA_MODE_MASK VC4_MASK(31, 30) #define SCALER_POS2_ALPHA_MODE_SHIFT 30 #define SCALER_POS2_ALPHA_MODE_PIPELINE 0 @@ -564,6 +585,31 @@ enum hvs_pixel_format { #define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0) #define SCALER_POS2_WIDTH_SHIFT 0 +#define SCALER_TPZ0_VERT_RECALC BIT(31) +#define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8) +#define SCALER_TPZ0_SCALE_SHIFT 8 +#define SCALER_TPZ0_IPHASE_MASK VC4_MASK(7, 0) +#define SCALER_TPZ0_IPHASE_SHIFT 0 +#define SCALER_TPZ1_RECIP_MASK VC4_MASK(15, 0) +#define SCALER_TPZ1_RECIP_SHIFT 0 + +/* Skips interpolating coefficients to 64 phases, so just 8 are used. + * Required for nearest neighbor. + */ +#define SCALER_PPF_NOINTERP BIT(31) +/* Replaes the highest valued coefficient with one that makes all 4 + * sum to unity. + */ +#define SCALER_PPF_AGC BIT(30) +#define SCALER_PPF_SCALE_MASK VC4_MASK(24, 8) +#define SCALER_PPF_SCALE_SHIFT 8 +#define SCALER_PPF_IPHASE_MASK VC4_MASK(6, 0) +#define SCALER_PPF_IPHASE_SHIFT 0 + +#define SCALER_PPF_KERNEL_OFFSET_MASK VC4_MASK(13, 0) +#define SCALER_PPF_KERNEL_OFFSET_SHIFT 0 +#define SCALER_PPF_KERNEL_UNCACHED BIT(31) + #define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0) #define SCALER_SRC_PITCH_SHIFT 0 -- cgit From fe4cd8476928a66e109ab50a430362fcee8a5716 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 20 Oct 2015 13:59:15 +0100 Subject: drm/vc4: Add support a few more RGB display plane formats. These were all touch-tested with modetest. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 7c2d697e8715..013ebff60fb5 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -88,6 +88,22 @@ static const struct hvs_format { .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, }, + { + .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565, + .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, + }, + { + .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565, + .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false, + }, + { + .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, + .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, + }, + { + .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, + .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, + }, }; static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) -- cgit From fc04023fafecf19ebd09278d8d67dc5ed1f68b46 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 30 Dec 2015 12:25:44 -0800 Subject: drm/vc4: Add support for YUV planes. This supports 420 and 422 subsampling with 2 or 3 planes, tested with modetest. It doesn't set up chroma subsampling position (which it appears KMS doesn't deal with yet). The LBM memory is overallocated in many cases, but apparently the docs aren't quite correct and I'll probably need to look at the hardware source to really figure it out. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 256 +++++++++++++++++++++++++++++++--------- drivers/gpu/drm/vc4/vc4_regs.h | 56 ++++++++- 2 files changed, 253 insertions(+), 59 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 013ebff60fb5..7b0c72ae02a0 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -54,15 +54,19 @@ struct vc4_plane_state { /* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; /* Clipped area being scanned from in the FB. */ - u32 src_x, src_y, src_w, src_h; + u32 src_x, src_y; - enum vc4_scaling_mode x_scaling, y_scaling; + u32 src_w[2], src_h[2]; + + /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ + enum vc4_scaling_mode x_scaling[2], y_scaling[2]; bool is_unity; + bool is_yuv; /* Offset to start scanning out from the start of the plane's * BO. */ - u32 offset; + u32 offsets[3]; /* Our allocation in LBM for temporary storage during scaling. */ struct drm_mm_node lbm; @@ -79,6 +83,7 @@ static const struct hvs_format { u32 hvs; /* HVS_FORMAT_* */ u32 pixel_order; bool has_alpha; + bool flip_cbcr; } hvs_formats[] = { { .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, @@ -104,6 +109,32 @@ static const struct hvs_format { .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, }, + { + .drm = DRM_FORMAT_YUV422, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, + }, + { + .drm = DRM_FORMAT_YVU422, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, + .flip_cbcr = true, + }, + { + .drm = DRM_FORMAT_YUV420, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, + }, + { + .drm = DRM_FORMAT_YVU420, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, + .flip_cbcr = true, + }, + { + .drm = DRM_FORMAT_NV12, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, + }, + { + .drm = DRM_FORMAT_NV16, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, + }, }; static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) @@ -219,11 +250,11 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) * * This is a replication of a table from the spec. */ -static u32 vc4_get_scl_field(struct drm_plane_state *state) +static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); - switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) { + switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: return SCALER_CTL0_SCL_H_PPF_V_PPF; case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: @@ -254,9 +285,16 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) struct drm_plane *plane = state->plane; struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); u32 subpixel_src_mask = (1 << 16) - 1; + u32 format = fb->pixel_format; + int num_planes = drm_format_num_planes(format); + u32 h_subsample = 1; + u32 v_subsample = 1; + int i; - vc4_state->offset = fb->offsets[0]; + for (i = 0; i < num_planes; i++) + vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; /* We don't support subpixel source positioning for scaling. */ if ((state->src_x & subpixel_src_mask) || @@ -268,20 +306,48 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->src_x = state->src_x >> 16; vc4_state->src_y = state->src_y >> 16; - vc4_state->src_w = state->src_w >> 16; - vc4_state->src_h = state->src_h >> 16; + vc4_state->src_w[0] = state->src_w >> 16; + vc4_state->src_h[0] = state->src_h >> 16; vc4_state->crtc_x = state->crtc_x; vc4_state->crtc_y = state->crtc_y; vc4_state->crtc_w = state->crtc_w; vc4_state->crtc_h = state->crtc_h; - vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w, - vc4_state->crtc_w); - vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h, - vc4_state->crtc_h); - vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE && - vc4_state->y_scaling == VC4_SCALING_NONE); + vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], + vc4_state->crtc_w); + vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], + vc4_state->crtc_h); + + if (num_planes > 1) { + vc4_state->is_yuv = true; + + h_subsample = drm_format_horz_chroma_subsampling(format); + v_subsample = drm_format_vert_chroma_subsampling(format); + vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; + vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; + + vc4_state->x_scaling[1] = + vc4_get_scaling_mode(vc4_state->src_w[1], + vc4_state->crtc_w); + vc4_state->y_scaling[1] = + vc4_get_scaling_mode(vc4_state->src_h[1], + vc4_state->crtc_h); + + /* YUV conversion requires that scaling be enabled, + * even on a plane that's otherwise 1:1. Choose TPZ + * for simplicity. + */ + if (vc4_state->x_scaling[0] == VC4_SCALING_NONE) + vc4_state->x_scaling[0] = VC4_SCALING_TPZ; + if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) + vc4_state->y_scaling[0] = VC4_SCALING_TPZ; + } + + vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && + vc4_state->y_scaling[0] == VC4_SCALING_NONE && + vc4_state->x_scaling[1] == VC4_SCALING_NONE && + vc4_state->y_scaling[1] == VC4_SCALING_NONE); /* No configuring scaling on the cursor plane, since it gets non-vblank-synced updates, and scaling requires requires @@ -294,16 +360,27 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) * support negative y, and negative x wastes bandwidth. */ if (vc4_state->crtc_x < 0) { - vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, - 0) * - -vc4_state->crtc_x); - vc4_state->src_w += vc4_state->crtc_x; + for (i = 0; i < num_planes; i++) { + u32 cpp = drm_format_plane_cpp(fb->pixel_format, i); + u32 subs = ((i == 0) ? 1 : h_subsample); + + vc4_state->offsets[i] += (cpp * + (-vc4_state->crtc_x) / subs); + } + vc4_state->src_w[0] += vc4_state->crtc_x; + vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample; vc4_state->crtc_x = 0; } if (vc4_state->crtc_y < 0) { - vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y; - vc4_state->src_h += vc4_state->crtc_y; + for (i = 0; i < num_planes; i++) { + u32 subs = ((i == 0) ? 1 : v_subsample); + + vc4_state->offsets[i] += (fb->pitches[i] * + (-vc4_state->crtc_y) / subs); + } + vc4_state->src_h[0] += vc4_state->crtc_y; + vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample; vc4_state->crtc_y = 0; } @@ -344,15 +421,23 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) /* This is the worst case number. One of the two sizes will * be used depending on the scaling configuration. */ - u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w); + u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w); u32 lbm; - if (vc4_state->is_unity) - return 0; - else if (vc4_state->y_scaling == VC4_SCALING_TPZ) - lbm = pix_per_line * 8; - else { - /* In special cases, this multiplier might be 12. */ + if (!vc4_state->is_yuv) { + if (vc4_state->is_unity) + return 0; + else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) + lbm = pix_per_line * 8; + else { + /* In special cases, this multiplier might be 12. */ + lbm = pix_per_line * 16; + } + } else { + /* There are cases for this going down to a multiplier + * of 2, but according to the firmware source, the + * table in the docs is somewhat wrong. + */ lbm = pix_per_line * 16; } @@ -361,33 +446,34 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) return lbm; } -static void vc4_write_scaling_parameters(struct drm_plane_state *state) +static void vc4_write_scaling_parameters(struct drm_plane_state *state, + int channel) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); /* Ch0 H-PPF Word 0: Scaling Parameters */ - if (vc4_state->x_scaling == VC4_SCALING_PPF) { + if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { vc4_write_ppf(vc4_state, - vc4_state->src_w, vc4_state->crtc_w); + vc4_state->src_w[channel], vc4_state->crtc_w); } /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ - if (vc4_state->y_scaling == VC4_SCALING_PPF) { + if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { vc4_write_ppf(vc4_state, - vc4_state->src_h, vc4_state->crtc_h); + vc4_state->src_h[channel], vc4_state->crtc_h); vc4_dlist_write(vc4_state, 0xc0c0c0c0); } /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ - if (vc4_state->x_scaling == VC4_SCALING_TPZ) { + if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { vc4_write_tpz(vc4_state, - vc4_state->src_w, vc4_state->crtc_w); + vc4_state->src_w[channel], vc4_state->crtc_w); } /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ - if (vc4_state->y_scaling == VC4_SCALING_TPZ) { + if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { vc4_write_tpz(vc4_state, - vc4_state->src_h, vc4_state->crtc_h); + vc4_state->src_h[channel], vc4_state->crtc_h); vc4_dlist_write(vc4_state, 0xc0c0c0c0); } } @@ -401,13 +487,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane, struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; - struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); - u32 scl; + int num_planes = drm_format_num_planes(format->drm); + u32 scl0, scl1; u32 lbm_size; unsigned long irqflags; - int ret; + int ret, i; ret = vc4_plane_setup_clipping_and_scaling(state); if (ret) @@ -432,7 +518,19 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (ret) return ret; - scl = vc4_get_scl_field(state); + /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB + * and 4:4:4, scl1 should be set to scl0 so both channels of + * the scaler do the same thing. For YUV, the Y plane needs + * to be put in channel 1 and Cb/Cr in channel 0, so we swap + * the scl fields here. + */ + if (num_planes == 1) { + scl0 = vc4_get_scl_field(state, 1); + scl1 = scl0; + } else { + scl0 = vc4_get_scl_field(state, 1); + scl1 = vc4_get_scl_field(state, 0); + } /* Control word */ vc4_dlist_write(vc4_state, @@ -440,8 +538,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | - VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) | - VC4_SET_FIELD(scl, SCALER_CTL0_SCL1)); + VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | + VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); /* Position Word 0: Image Positions and Alpha Value */ vc4_state->pos0_offset = vc4_state->dlist_count; @@ -466,35 +564,68 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | - VC4_SET_FIELD(vc4_state->src_w, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(vc4_state->src_h, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) | + VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); - /* Pointer Word 0: RGB / Y Pointer */ + + /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers + * + * The pointers may be any byte address. + */ vc4_state->ptr0_offset = vc4_state->dlist_count; - vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset); + if (!format->flip_cbcr) { + for (i = 0; i < num_planes; i++) + vc4_dlist_write(vc4_state, vc4_state->offsets[i]); + } else { + WARN_ON_ONCE(num_planes != 3); + vc4_dlist_write(vc4_state, vc4_state->offsets[0]); + vc4_dlist_write(vc4_state, vc4_state->offsets[2]); + vc4_dlist_write(vc4_state, vc4_state->offsets[1]); + } - /* Pointer Context Word 0: Written by the HVS */ - vc4_dlist_write(vc4_state, 0xc0c0c0c0); + /* Pointer Context Word 0/1/2: Written by the HVS */ + for (i = 0; i < num_planes; i++) + vc4_dlist_write(vc4_state, 0xc0c0c0c0); - /* Pitch word 0: Pointer 0 Pitch */ - vc4_dlist_write(vc4_state, - VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH)); + /* Pitch word 0/1/2 */ + for (i = 0; i < num_planes; i++) { + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH)); + } + + /* Colorspace conversion words */ + if (vc4_state->is_yuv) { + vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); + vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); + vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); + } if (!vc4_state->is_unity) { /* LBM Base Address. */ - if (vc4_state->y_scaling != VC4_SCALING_NONE) + if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { vc4_dlist_write(vc4_state, vc4_state->lbm.start); + } - vc4_write_scaling_parameters(state); + if (num_planes > 1) { + /* Emit Cb/Cr as channel 0 and Y as channel + * 1. This matches how we set up scl0/scl1 + * above. + */ + vc4_write_scaling_parameters(state, 1); + } + vc4_write_scaling_parameters(state, 0); /* If any PPF setup was done, then all the kernel * pointers get uploaded. */ - if (vc4_state->x_scaling == VC4_SCALING_PPF || - vc4_state->y_scaling == VC4_SCALING_PPF) { + if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || + vc4_state->y_scaling[0] == VC4_SCALING_PPF || + vc4_state->x_scaling[1] == VC4_SCALING_PPF || + vc4_state->y_scaling[1] == VC4_SCALING_PPF) { u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, SCALER_PPF_KERNEL_OFFSET); @@ -698,6 +829,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, struct drm_plane *plane = NULL; struct vc4_plane *vc4_plane; u32 formats[ARRAY_SIZE(hvs_formats)]; + u32 num_formats = 0; int ret = 0; unsigned i; @@ -708,12 +840,20 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, goto fail; } - for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) - formats[i] = hvs_formats[i].drm; + for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { + /* Don't allow YUV in cursor planes, since that means + * tuning on the scaler, which we don't allow for the + * cursor. + */ + if (type != DRM_PLANE_TYPE_CURSOR || + hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) { + formats[num_formats++] = hvs_formats[i].drm; + } + } plane = &vc4_plane->base; ret = drm_universal_plane_init(dev, plane, 0xff, &vc4_plane_funcs, - formats, ARRAY_SIZE(formats), + formats, num_formats, type, NULL); drm_plane_helper_add(plane, &vc4_plane_helper_funcs); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 037c7fe67187..25df20ef939c 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -503,7 +503,12 @@ enum hvs_pixel_format { HVS_PIXEL_FORMAT_RGB888 = 5, HVS_PIXEL_FORMAT_RGBA6666 = 6, /* 32bpp */ - HVS_PIXEL_FORMAT_RGBA8888 = 7 + HVS_PIXEL_FORMAT_RGBA8888 = 7, + + HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE = 8, + HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9, + HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10, + HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11, }; /* Note: the LSB is the rightmost character shown. Only valid for @@ -585,6 +590,55 @@ enum hvs_pixel_format { #define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0) #define SCALER_POS2_WIDTH_SHIFT 0 +/* Color Space Conversion words. Some values are S2.8 signed + * integers, except that the 2 integer bits map as {0x0: 0, 0x1: 1, + * 0x2: 2, 0x3: -1} + */ +/* bottom 8 bits of S2.8 contribution of Cr to Blue */ +#define SCALER_CSC0_COEF_CR_BLU_MASK VC4_MASK(31, 24) +#define SCALER_CSC0_COEF_CR_BLU_SHIFT 24 +/* Signed offset to apply to Y before CSC. (Y' = Y + YY_OFS) */ +#define SCALER_CSC0_COEF_YY_OFS_MASK VC4_MASK(23, 16) +#define SCALER_CSC0_COEF_YY_OFS_SHIFT 16 +/* Signed offset to apply to CB before CSC (Cb' = Cb - 128 + CB_OFS). */ +#define SCALER_CSC0_COEF_CB_OFS_MASK VC4_MASK(15, 8) +#define SCALER_CSC0_COEF_CB_OFS_SHIFT 8 +/* Signed offset to apply to CB before CSC (Cr' = Cr - 128 + CR_OFS). */ +#define SCALER_CSC0_COEF_CR_OFS_MASK VC4_MASK(7, 0) +#define SCALER_CSC0_COEF_CR_OFS_SHIFT 0 +#define SCALER_CSC0_ITR_R_601_5 0x00f00000 +#define SCALER_CSC0_ITR_R_709_3 0x00f00000 +#define SCALER_CSC0_JPEG_JFIF 0x00000000 + +/* S2.8 contribution of Cb to Green */ +#define SCALER_CSC1_COEF_CB_GRN_MASK VC4_MASK(31, 22) +#define SCALER_CSC1_COEF_CB_GRN_SHIFT 22 +/* S2.8 contribution of Cr to Green */ +#define SCALER_CSC1_COEF_CR_GRN_MASK VC4_MASK(21, 12) +#define SCALER_CSC1_COEF_CR_GRN_SHIFT 12 +/* S2.8 contribution of Y to all of RGB */ +#define SCALER_CSC1_COEF_YY_ALL_MASK VC4_MASK(11, 2) +#define SCALER_CSC1_COEF_YY_ALL_SHIFT 2 +/* top 2 bits of S2.8 contribution of Cr to Blue */ +#define SCALER_CSC1_COEF_CR_BLU_MASK VC4_MASK(1, 0) +#define SCALER_CSC1_COEF_CR_BLU_SHIFT 0 +#define SCALER_CSC1_ITR_R_601_5 0xe73304a8 +#define SCALER_CSC1_ITR_R_709_3 0xf2b784a8 +#define SCALER_CSC1_JPEG_JFIF 0xea34a400 + +/* S2.8 contribution of Cb to Red */ +#define SCALER_CSC2_COEF_CB_RED_MASK VC4_MASK(29, 20) +#define SCALER_CSC2_COEF_CB_RED_SHIFT 20 +/* S2.8 contribution of Cr to Red */ +#define SCALER_CSC2_COEF_CR_RED_MASK VC4_MASK(19, 10) +#define SCALER_CSC2_COEF_CR_RED_SHIFT 10 +/* S2.8 contribution of Cb to Blue */ +#define SCALER_CSC2_COEF_CB_BLU_MASK VC4_MASK(19, 10) +#define SCALER_CSC2_COEF_CB_BLU_SHIFT 10 +#define SCALER_CSC2_ITR_R_601_5 0x00066204 +#define SCALER_CSC2_ITR_R_709_3 0x00072a1c +#define SCALER_CSC2_JPEG_JFIF 0x000599c5 + #define SCALER_TPZ0_VERT_RECALC BIT(31) #define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8) #define SCALER_TPZ0_SCALE_SHIFT 8 -- cgit From 48627eb8dc55c60d35794105f6f79fb627347dbd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Feb 2016 15:06:15 -0800 Subject: drm/vc4: Fix a framebuffer reference leak on async flip interrupt. We'd need X to queue up an async pageflip while another is outstanding, and then take a SIGIO. I think X actually avoids sending out the next pageflip while one's already queued, but I'm not sure. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 018145e0b87d..989ee728e2b0 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -544,6 +544,7 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, /* Make sure all other async modesetes have landed. */ ret = down_interruptible(&vc4->async_modeset); if (ret) { + drm_framebuffer_unreference(fb); kfree(flip_state); return ret; } -- cgit From 851479ad5927b7b1aa141ca9dedb897a7bce2b1d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Feb 2016 14:15:14 -0800 Subject: drm/vc4: Bring HDMI up from power off if necessary. If the firmware hadn't brought up HDMI for us, we need to do its power-on reset sequence (reset HD and and clear its STANDBY bits, reset HDMI, and leave the PHY disabled). Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_hdmi.c | 29 ++++++++++++++++++++++++++++- drivers/gpu/drm/vc4/vc4_regs.h | 2 ++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index c69c0460196b..6e55760511bf 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -495,6 +495,16 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) goto err_put_i2c; } + /* This is the rate that is set by the firmware. The number + * needs to be a bit higher than the pixel clock rate + * (generally 148.5Mhz). + */ + ret = clk_set_rate(hdmi->hsm_clock, 163682864); + if (ret) { + DRM_ERROR("Failed to set HSM clock rate: %d\n", ret); + goto err_unprepare_pix; + } + ret = clk_prepare_enable(hdmi->hsm_clock); if (ret) { DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n", @@ -516,7 +526,24 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4->hdmi = hdmi; /* HDMI core must be enabled. */ - WARN_ON_ONCE((HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE) == 0); + if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) { + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST); + udelay(1); + HD_WRITE(VC4_HD_M_CTL, 0); + + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE); + + HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, + VC4_HDMI_SW_RESET_HDMI | + VC4_HDMI_SW_RESET_FORMAT_DETECT); + + HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0); + + /* PHY should be in reset, like + * vc4_hdmi_encoder_disable() does. + */ + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); + } drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 4e52a0a88551..85c36d238669 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -456,6 +456,8 @@ #define VC4_HDMI_TX_PHY_RESET_CTL 0x2c0 #define VC4_HD_M_CTL 0x00c +# define VC4_HD_M_REGISTER_FILE_STANDBY (3 << 6) +# define VC4_HD_M_RAM_STANDBY (3 << 4) # define VC4_HD_M_SW_RST BIT(2) # define VC4_HD_M_ENABLE BIT(0) -- cgit From 936f1a53f32148cc6164fad7c9a26ebf144e5ffb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 12 Feb 2016 15:16:56 -0800 Subject: drm/vc4: Add another reg to HDMI debug dumping. This is also involved in the HDMI setup sequence so it's nice to see it. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_hdmi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 6e55760511bf..56272ca98ab7 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -95,6 +95,7 @@ static const struct { HDMI_REG(VC4_HDMI_SW_RESET_CONTROL), HDMI_REG(VC4_HDMI_HOTPLUG_INT), HDMI_REG(VC4_HDMI_HOTPLUG), + HDMI_REG(VC4_HDMI_RAM_PACKET_CONFIG), HDMI_REG(VC4_HDMI_HORZA), HDMI_REG(VC4_HDMI_HORZB), HDMI_REG(VC4_HDMI_FIFO_CTL), -- cgit From c31806fbdda910d337b60896040afa708bdfa2bd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Feb 2016 17:06:02 -0800 Subject: drm/vc4: Fix the name of the VSYNCD_EVEN register. It's used for delaying vsync in interlaced mode. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- drivers/gpu/drm/vc4/vc4_regs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 989ee728e2b0..5e84be2e97d0 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -83,7 +83,7 @@ static const struct { } crtc_regs[] = { CRTC_REG(PV_CONTROL), CRTC_REG(PV_V_CONTROL), - CRTC_REG(PV_VSYNCD), + CRTC_REG(PV_VSYNCD_EVEN), CRTC_REG(PV_HORZA), CRTC_REG(PV_HORZB), CRTC_REG(PV_VERTA), diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 85c36d238669..d529665d43cd 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -187,7 +187,7 @@ # define PV_VCONTROL_CONTINUOUS BIT(1) # define PV_VCONTROL_VIDEN BIT(0) -#define PV_VSYNCD 0x08 +#define PV_VSYNCD_EVEN 0x08 #define PV_HORZA 0x0c # define PV_HORZA_HBP_MASK VC4_MASK(31, 16) -- cgit From a7c5047d1ce178dd2b1fa577fc8909ad663d56d5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Feb 2016 17:31:41 -0800 Subject: drm/vc4: Fix setting of vertical timings in the CRTC. It looks like when I went to add the interlaced bits, I just took the existing PV_VERT* block and indented it, instead of copy and pasting it first. Without this, changing resolution never worked. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 5e84be2e97d0..93d53c278486 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -212,6 +212,16 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) PV_HORZB_HFP) | VC4_SET_FIELD(mode->hdisplay, PV_HORZB_HACTIVE)); + CRTC_WRITE(PV_VERTA, + VC4_SET_FIELD(mode->vtotal - mode->vsync_end, + PV_VERTA_VBP) | + VC4_SET_FIELD(mode->vsync_end - mode->vsync_start, + PV_VERTA_VSYNC)); + CRTC_WRITE(PV_VERTB, + VC4_SET_FIELD(mode->vsync_start - mode->vdisplay, + PV_VERTB_VFP) | + VC4_SET_FIELD(vactive, PV_VERTB_VACTIVE)); + if (interlace) { CRTC_WRITE(PV_VERTA_EVEN, VC4_SET_FIELD(mode->vtotal - mode->vsync_end - 1, -- cgit From 6a609209865247cc748e90158c99f374f79b494c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 16 Feb 2016 10:24:08 -0800 Subject: drm/vc4: Initialize scaler DISPBKGND on modeset. We weren't updating the interlaced bit, so we'd scan out incorrectly if the firmware had brought up the TV encoder and we were switching to HDMI. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 6 ++++++ drivers/gpu/drm/vc4/vc4_regs.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 93d53c278486..6ae5abcabe62 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -183,6 +183,8 @@ static int vc4_get_clock_select(struct drm_crtc *crtc) static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); struct drm_crtc_state *state = crtc->state; struct drm_display_mode *mode = &state->adjusted_mode; @@ -251,6 +253,10 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) PV_CONTROL_FIFO_CLR | PV_CONTROL_EN); + HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), + SCALER_DISPBKGND_AUTOHS | + (interlace ? SCALER_DISPBKGND_INTERLACE : 0)); + if (debug_dump_regs) { DRM_INFO("CRTC %d regs after:\n", drm_crtc_index(crtc)); vc4_crtc_dump_regs(vc4_crtc); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index d529665d43cd..7c29993a3316 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -350,6 +350,17 @@ # define SCALER_DISPCTRLX_HEIGHT_SHIFT 0 #define SCALER_DISPBKGND0 0x00000044 +# define SCALER_DISPBKGND_AUTOHS BIT(31) +# define SCALER_DISPBKGND_INTERLACE BIT(30) +# define SCALER_DISPBKGND_GAMMA BIT(29) +# define SCALER_DISPBKGND_TESTMODE_MASK VC4_MASK(28, 25) +# define SCALER_DISPBKGND_TESTMODE_SHIFT 25 +/* Enables filling the scaler line with the RGB value in the low 24 + * bits before compositing. Costs cycles, so should be skipped if + * opaque display planes will cover everything. + */ +# define SCALER_DISPBKGND_FILL BIT(24) + #define SCALER_DISPSTAT0 0x00000048 #define SCALER_DISPBASE0 0x0000004c # define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30) @@ -362,6 +373,9 @@ # define SCALER_DISPSTATX_EMPTY BIT(28) #define SCALER_DISPCTRL1 0x00000050 #define SCALER_DISPBKGND1 0x00000054 +#define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \ + (x) * (SCALER_DISPBKGND1 - \ + SCALER_DISPBKGND0)) #define SCALER_DISPSTAT1 0x00000058 #define SCALER_DISPSTATX(x) (SCALER_DISPSTAT0 + \ (x) * (SCALER_DISPSTAT1 - \ -- cgit From ca26d28bbaa39f31d5e7e4812603b015c8d54207 Mon Sep 17 00:00:00 2001 From: Varad Gautam Date: Wed, 17 Feb 2016 19:08:21 +0530 Subject: drm/vc4: improve throughput by pipelining binning and rendering jobs The hardware provides us with separate threads for binning and rendering, and the existing model waits for them both to complete before submitting the next job. Splitting the binning and rendering submissions reduces idle time and gives us approx 20-30% speedup with some x11perf tests such as -line10 and -tilerect1. Improves openarena performance by 1.01897% +/- 0.247857% (n=16). Thanks to anholt for suggesting this. v2: Rebase on the spurious resets fix (change by anholt). Signed-off-by: Varad Gautam Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_drv.h | 37 +++++++++---- drivers/gpu/drm/vc4/vc4_gem.c | 123 ++++++++++++++++++++++++++++++------------ drivers/gpu/drm/vc4/vc4_irq.c | 58 ++++++++++++++++---- 3 files changed, 166 insertions(+), 52 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index f53fe6cd72be..fa2ad15d4f62 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -52,7 +52,7 @@ struct vc4_dev { /* Protects bo_cache and the BO stats. */ struct mutex bo_lock; - /* Sequence number for the last job queued in job_list. + /* Sequence number for the last job queued in bin_job_list. * Starts at 0 (no jobs emitted). */ uint64_t emit_seqno; @@ -62,11 +62,19 @@ struct vc4_dev { */ uint64_t finished_seqno; - /* List of all struct vc4_exec_info for jobs to be executed. - * The first job in the list is the one currently programmed - * into ct0ca/ct1ca for execution. + /* List of all struct vc4_exec_info for jobs to be executed in + * the binner. The first job in the list is the one currently + * programmed into ct0ca for execution. */ - struct list_head job_list; + struct list_head bin_job_list; + + /* List of all struct vc4_exec_info for jobs that have + * completed binning and are ready for rendering. The first + * job in the list is the one currently programmed into ct1ca + * for execution. + */ + struct list_head render_job_list; + /* List of the finished vc4_exec_infos waiting to be freed by * job_done_work. */ @@ -296,11 +304,20 @@ struct vc4_exec_info { }; static inline struct vc4_exec_info * -vc4_first_job(struct vc4_dev *vc4) +vc4_first_bin_job(struct vc4_dev *vc4) +{ + if (list_empty(&vc4->bin_job_list)) + return NULL; + return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head); +} + +static inline struct vc4_exec_info * +vc4_first_render_job(struct vc4_dev *vc4) { - if (list_empty(&vc4->job_list)) + if (list_empty(&vc4->render_job_list)) return NULL; - return list_first_entry(&vc4->job_list, struct vc4_exec_info, head); + return list_first_entry(&vc4->render_job_list, + struct vc4_exec_info, head); } /** @@ -414,7 +431,9 @@ int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void vc4_submit_next_job(struct drm_device *dev); +void vc4_submit_next_bin_job(struct drm_device *dev); +void vc4_submit_next_render_job(struct drm_device *dev); +void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec); int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, bool interruptible); void vc4_job_handle_completed(struct vc4_dev *vc4); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 202aa1544acc..8d4384f8b78d 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -141,10 +141,10 @@ vc4_save_hang_state(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_get_hang_state *state; struct vc4_hang_state *kernel_state; - struct vc4_exec_info *exec; + struct vc4_exec_info *exec[2]; struct vc4_bo *bo; unsigned long irqflags; - unsigned int i, unref_list_count; + unsigned int i, j, unref_list_count, prev_idx; kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); if (!kernel_state) @@ -153,37 +153,55 @@ vc4_save_hang_state(struct drm_device *dev) state = &kernel_state->user_state; spin_lock_irqsave(&vc4->job_lock, irqflags); - exec = vc4_first_job(vc4); - if (!exec) { + exec[0] = vc4_first_bin_job(vc4); + exec[1] = vc4_first_render_job(vc4); + if (!exec[0] && !exec[1]) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); return; } - unref_list_count = 0; - list_for_each_entry(bo, &exec->unref_list, unref_head) - unref_list_count++; + /* Get the bos from both binner and renderer into hang state. */ + state->bo_count = 0; + for (i = 0; i < 2; i++) { + if (!exec[i]) + continue; + + unref_list_count = 0; + list_for_each_entry(bo, &exec[i]->unref_list, unref_head) + unref_list_count++; + state->bo_count += exec[i]->bo_count + unref_list_count; + } + + kernel_state->bo = kcalloc(state->bo_count, + sizeof(*kernel_state->bo), GFP_ATOMIC); - state->bo_count = exec->bo_count + unref_list_count; - kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo), - GFP_ATOMIC); if (!kernel_state->bo) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); return; } - for (i = 0; i < exec->bo_count; i++) { - drm_gem_object_reference(&exec->bo[i]->base); - kernel_state->bo[i] = &exec->bo[i]->base; - } + prev_idx = 0; + for (i = 0; i < 2; i++) { + if (!exec[i]) + continue; - list_for_each_entry(bo, &exec->unref_list, unref_head) { - drm_gem_object_reference(&bo->base.base); - kernel_state->bo[i] = &bo->base.base; - i++; + for (j = 0; j < exec[i]->bo_count; j++) { + drm_gem_object_reference(&exec[i]->bo[j]->base); + kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; + } + + list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { + drm_gem_object_reference(&bo->base.base); + kernel_state->bo[j + prev_idx] = &bo->base.base; + j++; + } + prev_idx = j + 1; } - state->start_bin = exec->ct0ca; - state->start_render = exec->ct1ca; + if (exec[0]) + state->start_bin = exec[0]->ct0ca; + if (exec[1]) + state->start_render = exec[1]->ct1ca; spin_unlock_irqrestore(&vc4->job_lock, irqflags); @@ -267,13 +285,15 @@ vc4_hangcheck_elapsed(unsigned long data) struct vc4_dev *vc4 = to_vc4_dev(dev); uint32_t ct0ca, ct1ca; unsigned long irqflags; - struct vc4_exec_info *exec; + struct vc4_exec_info *bin_exec, *render_exec; spin_lock_irqsave(&vc4->job_lock, irqflags); - exec = vc4_first_job(vc4); + + bin_exec = vc4_first_bin_job(vc4); + render_exec = vc4_first_render_job(vc4); /* If idle, we can stop watching for hangs. */ - if (!exec) { + if (!bin_exec && !render_exec) { spin_unlock_irqrestore(&vc4->job_lock, irqflags); return; } @@ -284,9 +304,12 @@ vc4_hangcheck_elapsed(unsigned long data) /* If we've made any progress in execution, rearm the timer * and wait. */ - if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) { - exec->last_ct0ca = ct0ca; - exec->last_ct1ca = ct1ca; + if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || + (render_exec && ct1ca != render_exec->last_ct1ca)) { + if (bin_exec) + bin_exec->last_ct0ca = ct0ca; + if (render_exec) + render_exec->last_ct1ca = ct1ca; spin_unlock_irqrestore(&vc4->job_lock, irqflags); vc4_queue_hangcheck(dev); return; @@ -386,11 +409,13 @@ vc4_flush_caches(struct drm_device *dev) * The job_lock should be held during this. */ void -vc4_submit_next_job(struct drm_device *dev) +vc4_submit_next_bin_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_exec_info *exec = vc4_first_job(vc4); + struct vc4_exec_info *exec; +again: + exec = vc4_first_bin_job(vc4); if (!exec) return; @@ -400,11 +425,40 @@ vc4_submit_next_job(struct drm_device *dev) V3D_WRITE(V3D_BPOA, 0); V3D_WRITE(V3D_BPOS, 0); - if (exec->ct0ca != exec->ct0ea) + /* Either put the job in the binner if it uses the binner, or + * immediately move it to the to-be-rendered queue. + */ + if (exec->ct0ca != exec->ct0ea) { submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); + } else { + vc4_move_job_to_render(dev, exec); + goto again; + } +} + +void +vc4_submit_next_render_job(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *exec = vc4_first_render_job(vc4); + + if (!exec) + return; + submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); } +void +vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + bool was_empty = list_empty(&vc4->render_job_list); + + list_move_tail(&exec->head, &vc4->render_job_list); + if (was_empty) + vc4_submit_next_render_job(dev); +} + static void vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) { @@ -443,14 +497,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) exec->seqno = seqno; vc4_update_bo_seqnos(exec, seqno); - list_add_tail(&exec->head, &vc4->job_list); + list_add_tail(&exec->head, &vc4->bin_job_list); /* If no job was executing, kick ours off. Otherwise, it'll - * get started when the previous job's frame done interrupt + * get started when the previous job's flush done interrupt * occurs. */ - if (vc4_first_job(vc4) == exec) { - vc4_submit_next_job(dev); + if (vc4_first_bin_job(vc4) == exec) { + vc4_submit_next_bin_job(dev); vc4_queue_hangcheck(dev); } @@ -859,7 +913,8 @@ vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - INIT_LIST_HEAD(&vc4->job_list); + INIT_LIST_HEAD(&vc4->bin_job_list); + INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->job_done_list); INIT_LIST_HEAD(&vc4->seqno_cb_list); spin_lock_init(&vc4->job_lock); diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 78a21357fb2d..b0104a346a74 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -30,6 +30,10 @@ * disables that specific interrupt, and 0s written are ignored * (reading either one returns the set of enabled interrupts). * + * When we take a binning flush done interrupt, we need to submit the + * next frame for binning and move the finished frame to the render + * thread. + * * When we take a render frame interrupt, we need to wake the * processes waiting for some frame to be done, and get the next frame * submitted ASAP (so the hardware doesn't sit idle when there's work @@ -44,6 +48,7 @@ #include "vc4_regs.h" #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \ + V3D_INT_FLDONE | \ V3D_INT_FRDONE) DECLARE_WAIT_QUEUE_HEAD(render_wait); @@ -77,7 +82,7 @@ vc4_overflow_mem_work(struct work_struct *work) unsigned long irqflags; spin_lock_irqsave(&vc4->job_lock, irqflags); - current_exec = vc4_first_job(vc4); + current_exec = vc4_first_bin_job(vc4); if (current_exec) { vc4->overflow_mem->seqno = vc4->finished_seqno + 1; list_add_tail(&vc4->overflow_mem->unref_head, @@ -98,17 +103,43 @@ vc4_overflow_mem_work(struct work_struct *work) } static void -vc4_irq_finish_job(struct drm_device *dev) +vc4_irq_finish_bin_job(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *exec = vc4_first_bin_job(vc4); + + if (!exec) + return; + + vc4_move_job_to_render(dev, exec); + vc4_submit_next_bin_job(dev); +} + +static void +vc4_cancel_bin_job(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *exec = vc4_first_bin_job(vc4); + + if (!exec) + return; + + list_move_tail(&exec->head, &vc4->bin_job_list); + vc4_submit_next_bin_job(dev); +} + +static void +vc4_irq_finish_render_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_exec_info *exec = vc4_first_job(vc4); + struct vc4_exec_info *exec = vc4_first_render_job(vc4); if (!exec) return; vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list); - vc4_submit_next_job(dev); + vc4_submit_next_render_job(dev); wake_up_all(&vc4->job_wait_queue); schedule_work(&vc4->job_done_work); @@ -125,9 +156,10 @@ vc4_irq(int irq, void *arg) barrier(); intctl = V3D_READ(V3D_INTCTL); - /* Acknowledge the interrupts we're handling here. The render - * frame done interrupt will be cleared, while OUTOMEM will - * stay high until the underlying cause is cleared. + /* Acknowledge the interrupts we're handling here. The binner + * last flush / render frame done interrupt will be cleared, + * while OUTOMEM will stay high until the underlying cause is + * cleared. */ V3D_WRITE(V3D_INTCTL, intctl); @@ -138,9 +170,16 @@ vc4_irq(int irq, void *arg) status = IRQ_HANDLED; } + if (intctl & V3D_INT_FLDONE) { + spin_lock(&vc4->job_lock); + vc4_irq_finish_bin_job(dev); + spin_unlock(&vc4->job_lock); + status = IRQ_HANDLED; + } + if (intctl & V3D_INT_FRDONE) { spin_lock(&vc4->job_lock); - vc4_irq_finish_job(dev); + vc4_irq_finish_render_job(dev); spin_unlock(&vc4->job_lock); status = IRQ_HANDLED; } @@ -205,6 +244,7 @@ void vc4_irq_reset(struct drm_device *dev) V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); spin_lock_irqsave(&vc4->job_lock, irqflags); - vc4_irq_finish_job(dev); + vc4_cancel_bin_job(dev); + vc4_irq_finish_render_job(dev); spin_unlock_irqrestore(&vc4->job_lock, irqflags); } -- cgit From 0e60eab57557bc06bb3a5ef8d5d6dcd9ddd47aff Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 29 Feb 2016 17:53:00 -0800 Subject: drm/vc4: Let gpiolib know that we're OK with sleeping for HPD. Fixes an error thrown every few seconds when we poll HPD when it's on a I2C to GPIO expander. Signed-off-by: Eric Anholt Tested-by: Daniel Stone --- drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 56272ca98ab7..6bcf51d16a1d 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -166,7 +166,7 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_dev *vc4 = to_vc4_dev(dev); if (vc4->hdmi->hpd_gpio) { - if (gpio_get_value(vc4->hdmi->hpd_gpio)) + if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio)) return connector_status_connected; else return connector_status_disconnected; -- cgit From 0b06e0a7945130e6a187f7959529cba7725f573a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 29 Feb 2016 17:53:01 -0800 Subject: drm/vc4: Respect GPIO_ACTIVE_LOW on HDMI HPD if set in the devicetree. The original Raspberry Pi had the GPIO active high, but the later models are active low. The DT GPIO bindings allow specifying the active flag, except that it doesn't get propagated to the gpiodesc, so you have to handle it yourself. Signed-off-by: Eric Anholt Tested-by: Daniel Stone --- drivers/gpu/drm/vc4/vc4_hdmi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 6bcf51d16a1d..d8b864925fd3 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -47,6 +47,7 @@ struct vc4_hdmi { void __iomem *hdmicore_regs; void __iomem *hd_regs; int hpd_gpio; + bool hpd_active_low; struct clk *pixel_clock; struct clk *hsm_clock; @@ -166,7 +167,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_dev *vc4 = to_vc4_dev(dev); if (vc4->hdmi->hpd_gpio) { - if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio)) + if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^ + vc4->hdmi->hpd_active_low) return connector_status_connected; else return connector_status_disconnected; @@ -517,11 +519,17 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) * we'll use the HDMI core's register. */ if (of_find_property(dev->of_node, "hpd-gpios", &value)) { - hdmi->hpd_gpio = of_get_named_gpio(dev->of_node, "hpd-gpios", 0); + enum of_gpio_flags hpd_gpio_flags; + + hdmi->hpd_gpio = of_get_named_gpio_flags(dev->of_node, + "hpd-gpios", 0, + &hpd_gpio_flags); if (hdmi->hpd_gpio < 0) { ret = hdmi->hpd_gpio; goto err_unprepare_hsm; } + + hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW; } vc4->hdmi = hdmi; -- cgit From 585cb132a48190b554aecda2ebc3e2911fcbb665 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 8 Mar 2016 15:09:41 +0300 Subject: drm/vc4: Return -EFAULT on copy_from_user() failure The copy_from_user() function returns the number of bytes not copied but we want to return a negative error code. Fixes: 463873d57014 ('drm/vc4: Add an API for creating GPU shaders in GEM BOs.') Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_bo.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 22278bcfc60e..ac8eafea6361 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -499,11 +499,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, if (IS_ERR(bo)) return PTR_ERR(bo); - ret = copy_from_user(bo->base.vaddr, + if (copy_from_user(bo->base.vaddr, (void __user *)(uintptr_t)args->data, - args->size); - if (ret != 0) + args->size)) { + ret = -EFAULT; goto fail; + } /* Clear the rest of the memory from allocating from the BO * cache. */ -- cgit From 90d7116061f86c1f8ea460806a0414addea7b58b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 4 Mar 2016 12:32:07 -0800 Subject: drm/vc4: Recognize a more specific compatible string for V3D. The Raspberry Pi Foundation's firmware updates are shipping device trees using the old string, so we'll keep recognizing that as this rev of V3D. Still, we should use a more specific name in the upstream DT to clarify which board is being supported, in case we do other revs of V3D in the future. Signed-off-by: Eric Anholt Acked-by: Stephen Warren --- drivers/gpu/drm/vc4/vc4_v3d.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/vc4') diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 31de5d17bc85..e6d3c6028341 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -268,6 +268,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev) } static const struct of_device_id vc4_v3d_dt_match[] = { + { .compatible = "brcm,bcm2835-v3d" }, { .compatible = "brcm,vc4-v3d" }, {} }; -- cgit