diff options
author | Thomas Zimmermann <tzimmermann@suse.de> | 2025-03-28 15:15:02 +0100 |
---|---|---|
committer | Thomas Zimmermann <tzimmermann@suse.de> | 2025-04-01 15:35:23 +0200 |
commit | 58523a25cbf728695e374fd90416d9bd4a68d4c0 (patch) | |
tree | 3da388d6207c8d5a1446974938f542acb56ba0e7 | |
parent | a376dcf49c391d06dad31a87e3cdfdf67fecc98d (diff) |
drm/format-helper: Optimize 32-to-24-bpp conversion
For ease of implementation, existing line-conversion functions
for 24-bit formats write each byte individually. Optimize the
performance by writing 4 pixels in 3 32-bit stores.
v2:
- simplify address calculation (Jani)
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Link: https://lore.kernel.org/r/20250328141709.217283-7-tzimmermann@suse.de
-rw-r--r-- | drivers/gpu/drm/drm_format_helper.c | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index a926aa6671fc..daf5a6d4f283 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -246,6 +246,9 @@ static int drm_fb_xfrm(struct iosys_map *dst, xfrm_line); } +#define ALIGN_DOWN_PIXELS(end, n, a) \ + ((end) - ((n) & ((a) - 1))) + static __always_inline void drm_fb_xfrm_line_32to8(void *dbuf, const void *sbuf, unsigned int pixels, u32 (*xfrm_pixel)(u32)) @@ -274,10 +277,42 @@ static __always_inline void drm_fb_xfrm_line_32to24(void *dbuf, const void *sbuf unsigned int pixels, u32 (*xfrm_pixel)(u32)) { - u8 *dbuf8 = dbuf; + __le32 *dbuf32 = dbuf; + u8 *dbuf8; const __le32 *sbuf32 = sbuf; const __le32 *send32 = sbuf32 + pixels; + /* write pixels in chunks of 4 */ + while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 4)) { + u32 val24[4] = { + xfrm_pixel(le32_to_cpup(sbuf32++)), + xfrm_pixel(le32_to_cpup(sbuf32++)), + xfrm_pixel(le32_to_cpup(sbuf32++)), + xfrm_pixel(le32_to_cpup(sbuf32++)), + }; + u32 out32[3] = { + /* write output bytes in reverse order for little endianness */ + ((val24[0] & 0x000000ff)) | + ((val24[0] & 0x0000ff00)) | + ((val24[0] & 0x00ff0000)) | + ((val24[1] & 0x000000ff) << 24), + ((val24[1] & 0x0000ff00) >> 8) | + ((val24[1] & 0x00ff0000) >> 8) | + ((val24[2] & 0x000000ff) << 16) | + ((val24[2] & 0x0000ff00) << 16), + ((val24[2] & 0x00ff0000) >> 16) | + ((val24[3] & 0x000000ff) << 8) | + ((val24[3] & 0x0000ff00) << 8) | + ((val24[3] & 0x00ff0000) << 8), + }; + + *dbuf32++ = cpu_to_le32(out32[0]); + *dbuf32++ = cpu_to_le32(out32[1]); + *dbuf32++ = cpu_to_le32(out32[2]); + } + + /* write trailing pixel */ + dbuf8 = (u8 __force *)dbuf32; while (sbuf32 < send32) { u32 val24 = xfrm_pixel(le32_to_cpup(sbuf32++)); /* write output in reverse order for little endianness */ |