diff options
185 files changed, 14502 insertions, 3842 deletions
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt index 3a72a103a18a..a41d280c3f9f 100644 --- a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt +++ b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt @@ -14,6 +14,7 @@ Required properties: - compatible : Shall contain one or more of - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX + - "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX When compatible with generic versions, nodes must list the SoC-specific diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt index c9cd17f99702..7c6854bd0a04 100644 --- a/Documentation/devicetree/bindings/display/renesas,du.txt +++ b/Documentation/devicetree/bindings/display/renesas,du.txt @@ -13,6 +13,7 @@ Required Properties: - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU + - "renesas,du-r8a77965" for R8A77965 (R-Car M3-N) compatible DU - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU @@ -47,20 +48,21 @@ bindings specified in Documentation/devicetree/bindings/graph.txt. The following table lists for each supported model the port number corresponding to each DU output. - Port0 Port1 Port2 Port3 + Port0 Port1 Port2 Port3 ----------------------------------------------------------------------------- - R8A7743 (RZ/G1M) DPAD 0 LVDS 0 - - - R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - - - R8A7779 (R-Car H1) DPAD 0 DPAD 1 - - - R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 - - R8A7791 (R-Car M2-W) DPAD 0 LVDS 0 - - - R8A7792 (R-Car V2H) DPAD 0 DPAD 1 - - - R8A7793 (R-Car M2-N) DPAD 0 LVDS 0 - - - R8A7794 (R-Car E2) DPAD 0 DPAD 1 - - - R8A7795 (R-Car H3) DPAD 0 HDMI 0 HDMI 1 LVDS 0 - R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 - - R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - - - R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 - + R8A7743 (RZ/G1M) DPAD 0 LVDS 0 - - + R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - - + R8A7779 (R-Car H1) DPAD 0 DPAD 1 - - + R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 - + R8A7791 (R-Car M2-W) DPAD 0 LVDS 0 - - + R8A7792 (R-Car V2H) DPAD 0 DPAD 1 - - + R8A7793 (R-Car M2-N) DPAD 0 LVDS 0 - - + R8A7794 (R-Car E2) DPAD 0 DPAD 1 - - + R8A7795 (R-Car H3) DPAD 0 HDMI 0 HDMI 1 LVDS 0 + R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 - + R8A77965 (R-Car M3-N) DPAD 0 HDMI 0 LVDS 0 - + R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - - + R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 - Example: R8A7795 (R-Car H3) ES2.0 DU diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt new file mode 100644 index 000000000000..c907aa8dd755 --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt @@ -0,0 +1,28 @@ +Broadcom V3D GPU + +Only the Broadcom V3D 3.x and newer GPUs are covered by this binding. +For V3D 2.x, see brcm,bcm-vc4.txt. + +Required properties: +- compatible: Should be "brcm,7268-v3d" or "brcm,7278-v3d" +- reg: Physical base addresses and lengths of the register areas +- reg-names: Names for the register areas. The "hub", "bridge", and "core0" + register areas are always required. The "gca" register area + is required if the GCA cache controller is present. +- interrupts: The interrupt numbers. The first interrupt is for the hub, + while the following interrupts are for the cores. + See bindings/interrupt-controller/interrupts.txt + +Optional properties: +- clocks: The core clock the unit runs on + +v3d { + compatible = "brcm,7268-v3d"; + reg = <0xf1204000 0x100>, + <0xf1200000 0x4000>, + <0xf1208000 0x4000>, + <0xf1204100 0x100>; + reg-names = "bridge", "hub", "core0", "gca"; + interrupts = <0 78 4>, + <0 77 4>; +}; diff --git a/Documentation/devicetree/bindings/gpu/samsung-scaler.txt b/Documentation/devicetree/bindings/gpu/samsung-scaler.txt new file mode 100644 index 000000000000..9c3d98105dfd --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/samsung-scaler.txt @@ -0,0 +1,27 @@ +* Samsung Exynos Image Scaler + +Required properties: + - compatible : value should be one of the following: + (a) "samsung,exynos5420-scaler" for Scaler IP in Exynos5420 + (b) "samsung,exynos5433-scaler" for Scaler IP in Exynos5433 + + - reg : Physical base address of the IP registers and length of memory + mapped region. + + - interrupts : Interrupt specifier for scaler interrupt, according to format + specific to interrupt parent. + + - clocks : Clock specifier for scaler clock, according to generic clock + bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt) + + - clock-names : Names of clocks. For exynos scaler, it should be "mscl" + on 5420 and "pclk", "aclk" and "aclk_xiu" on 5433. + +Example: + scaler@12800000 { + compatible = "samsung,exynos5420-scaler"; + reg = <0x12800000 0x1294>; + interrupts = <0 220 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clock CLK_MSCL0>; + clock-names = "mscl"; + }; diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index d3ab6abae838..f982558fc25d 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -10,6 +10,7 @@ GPU Driver Documentation tegra tinydrm tve200 + v3d vc4 bridge/dw-hdmi xen-front diff --git a/MAINTAINERS b/MAINTAINERS index 8daa96a99eac..ec7b1d890bf6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -767,12 +767,14 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h F: drivers/gpu/drm/amd/include/vi_structs.h +F: drivers/gpu/drm/amd/include/v9_structs.h F: include/uapi/linux/kfd_ioctl.h AMD SEATTLE DEVICE TREE SUPPORT @@ -4784,6 +4786,14 @@ S: Maintained F: drivers/gpu/drm/omapdrm/ F: Documentation/devicetree/bindings/display/ti/ +DRM DRIVERS FOR V3D +M: Eric Anholt <eric@anholt.net> +S: Supported +F: drivers/gpu/drm/v3d/ +F: include/uapi/drm/v3d_drm.h +F: Documentation/devicetree/bindings/display/brcm,bcm-v3d.txt +T: git git://anongit.freedesktop.org/drm/drm-misc + DRM DRIVERS FOR VC4 M: Eric Anholt <eric@anholt.net> T: git git://github.com/anholt/linux diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index d615a89f774c..05e33f937ad0 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -62,8 +62,6 @@ struct sync_pt { struct rb_node node; }; -#ifdef CONFIG_SW_SYNC - extern const struct file_operations sw_sync_debugfs_fops; void sync_timeline_debug_add(struct sync_timeline *obj); @@ -72,12 +70,4 @@ void sync_file_debug_add(struct sync_file *fence); void sync_file_debug_remove(struct sync_file *fence); void sync_dump(void); -#else -# define sync_timeline_debug_add(obj) -# define sync_timeline_debug_remove(obj) -# define sync_file_debug_add(fence) -# define sync_file_debug_remove(fence) -# define sync_dump() -#endif - #endif /* _LINUX_SYNC_H */ diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 757825ac60df..2a72d2feb76d 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -49,16 +49,17 @@ config DRM_DEBUG_MM If in doubt, say "N". -config DRM_DEBUG_MM_SELFTEST - tristate "kselftests for DRM range manager (struct drm_mm)" +config DRM_DEBUG_SELFTEST + tristate "kselftests for DRM" depends on DRM depends on DEBUG_KERNEL select PRIME_NUMBERS select DRM_LIB_RANDOM + select DRM_KMS_HELPER default n help - This option provides a kernel module that can be used to test - the DRM range manager (drm_mm) and its API. This option is not + This option provides kernel modules that can be used to run + various selftests on parts of the DRM api. This option is not useful for distributions or general kernels, but only for kernel developers working on DRM and associated drivers. @@ -267,6 +268,8 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig" source "drivers/gpu/drm/imx/Kconfig" +source "drivers/gpu/drm/v3d/Kconfig" + source "drivers/gpu/drm/vc4/Kconfig" source "drivers/gpu/drm/etnaviv/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 9d66657ea117..ef9f3dab287f 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -43,7 +43,7 @@ drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o -obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/ +obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/ obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o @@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ obj-$(CONFIG_DRM_I915) += i915/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ +obj-$(CONFIG_DRM_V3D) += v3d/ obj-$(CONFIG_DRM_VC4) += vc4/ obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ obj-$(CONFIG_DRM_SIS) += sis/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2ca2b5154d52..f3002020df6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -130,7 +130,8 @@ amdgpu-y += \ amdgpu_amdkfd.o \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ - amdgpu_amdkfd_gfx_v8.o + amdgpu_amdkfd_gfx_v8.o \ + amdgpu_amdkfd_gfx_v9.o # add cgs amdgpu-y += amdgpu_cgs.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4d36203ffb11..cd0e8f192e6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -92,6 +92,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; default: dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); return; @@ -175,6 +179,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); + if (adev->asic_type >= CHIP_VEGA10) { + /* On SOC15 the BIF is involved in routing + * doorbells using the low 12 bits of the + * address. Communicate the assignments to + * KFD. KFD uses two doorbell pages per + * process in case of 64-bit doorbells so we + * can use each doorbell assignment twice. + */ + gpu_resources.sdma_doorbell[0][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE0; + gpu_resources.sdma_doorbell[0][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; + gpu_resources.sdma_doorbell[1][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE1; + gpu_resources.sdma_doorbell[1][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; + /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for + * SDMA, IH and VCN. So don't use them for the CP. + */ + gpu_resources.reserved_doorbell_mask = 0x1f0; + gpu_resources.reserved_doorbell_val = 0x0f0; + } kgd2kfd->device_init(adev->kfd, &gpu_resources); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c2c2bea731e0..12367a9951e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,6 +28,7 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/mmu_context.h> +#include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" @@ -59,7 +60,9 @@ struct kgd_mem { uint32_t mapping_flags; + atomic_t invalid; struct amdkfd_process_info *process_info; + struct page **user_pages; struct amdgpu_sync sync; @@ -84,6 +87,9 @@ struct amdkfd_process_info { struct list_head vm_list_head; /* List head for all KFD BOs that belong to a KFD process. */ struct list_head kfd_bo_list; + /* List of userptr BOs that are valid or invalid */ + struct list_head userptr_valid_list; + struct list_head userptr_inval_list; /* Lock to protect kfd_bo_list */ struct mutex lock; @@ -91,6 +97,11 @@ struct amdkfd_process_info { unsigned int n_vms; /* Eviction Fence */ struct amdgpu_amdkfd_fence *eviction_fence; + + /* MMU-notifier related fields */ + atomic_t evicted_bos; + struct delayed_work restore_userptr_work; + struct pid *pid; }; int amdgpu_amdkfd_init(void); @@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea54e53172b9..0ff36d45a597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 89264c9a5e9f..6ef9762b4b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c new file mode 100644 index 000000000000..8f37991df61b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -0,0 +1,1043 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "soc15_hw_ip.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "vega10_enum.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma0/sdma0_4_0_sh_mask.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "sdma1/sdma1_4_0_sh_mask.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" +#include "soc15_common.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" + +/* HACK: MMHUB and GC both have VM-related register with the same + * names but different offsets. Define the MMHUB register we need here + * with a prefix. A proper solution would be to move the functions + * programming these registers into gfx_v9_0.c and mmhub_v1_0.c + * respectively. + */ +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 + +#define V9_PIPE_PER_MEC (4) +#define V9_QUEUES_PER_PIPE_MEC (8) + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_local_mem_info = get_local_mem_info, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + /* + * need to do this twice, once for gfx and once for mmhub + * for ATC add 16 to VMID for mmhub, for IH different registers. + * ATC_VMID0..15 registers are separate from ATC_VMID16..31. + */ + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << (vmid + 16)))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << (vmid + 16)); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, + pasid_mapping); + return 0; +} + +/* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[2] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v9_mqd *m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + uint32_t req = (1 << vmid) | + (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; + + mutex_lock(&adev->srbm_mutex); + + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), + req); + + while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + mutex_unlock(&adev->srbm_mutex); + +} + +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + if (ring->ready) + return invalidate_tlbs_with_kiq(adev, pasid); + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + write_vmid_invalidate_request(kgd, vmid); + break; + } + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return 0; + } + + write_vmid_invalidate_request(kgd, vmid); + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + /* No longer needed on GFXv9. The scratch base address is + * passed to the shader by the CP. It's the user mode driver's + * responsibility. + */ +} + +/* FIXME: Does this need to be ASIC-specific code? */ +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | + AMDGPU_PTE_VALID; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + /* TODO: take advantage of per-process address space size. For + * now, all processes share the same address space size, like + * on GFX8 and older. + */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1d6e1479da38..5296e24fd662 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -23,6 +23,7 @@ #define pr_fmt(fmt) "kfd2kgd: " fmt #include <linux/list.h> +#include <linux/sched/mm.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -33,10 +34,20 @@ */ #define VI_BO_SIZE_ALIGN (0x8000) +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) + +/* Userptr restore delay, just long enough to allow consecutive VM + * changes to accumulate + */ +#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; + uint64_t max_userptr_mem_limit; int64_t system_mem_used; + int64_t userptr_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -57,6 +68,7 @@ static const char * const domain_bit_to_string[] = { #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -78,6 +90,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, /* Set memory usage limits. Current, limits are * System (kernel) memory - 3/8th System RAM + * Userptr memory - 3/4th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -90,8 +103,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) spin_lock_init(&kfd_mem_limit.mem_limit_lock); kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - pr_debug("Kernel memory limit %lluM\n", - (kfd_mem_limit.max_system_mem_limit >> 20)); + kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); + pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20), + (kfd_mem_limit.max_userptr_mem_limit >> 20)); } static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, @@ -111,6 +126,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, goto err_no_mem; } kfd_mem_limit.system_mem_used += (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + if ((kfd_mem_limit.system_mem_used + acc_size > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.userptr_mem_used + (size + acc_size) > + kfd_mem_limit.max_userptr_mem_limit)) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += acc_size; + kfd_mem_limit.userptr_mem_used += size; } err_no_mem: spin_unlock(&kfd_mem_limit.mem_limit_lock); @@ -126,10 +151,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, sizeof(struct amdgpu_bo)); spin_lock(&kfd_mem_limit.mem_limit_lock); - if (domain == AMDGPU_GEM_DOMAIN_GTT) + if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.userptr_mem_used -= size; + } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -138,12 +169,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) { spin_lock(&kfd_mem_limit.mem_limit_lock); - if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; + kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); + } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -506,7 +542,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, } static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, - struct amdkfd_process_info *process_info) + struct amdkfd_process_info *process_info, + bool userptr) { struct ttm_validate_buffer *entry = &mem->validate_list; struct amdgpu_bo *bo = mem->bo; @@ -515,10 +552,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, entry->shared = true; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); - list_add_tail(&entry->head, &process_info->kfd_bo_list); + if (userptr) + list_add_tail(&entry->head, &process_info->userptr_valid_list); + else + list_add_tail(&entry->head, &process_info->kfd_bo_list); mutex_unlock(&process_info->lock); } +/* Initializes user pages. It registers the MMU notifier and validates + * the userptr BO in the GTT domain. + * + * The BO must already be on the userptr_valid_list. Otherwise an + * eviction and restore may happen that leaves the new BO unmapped + * with the user mode queues running. + * + * Takes the process_info->lock to protect against concurrent restore + * workers. + * + * Returns 0 for success, negative errno for errors. + */ +static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, + uint64_t user_addr) +{ + struct amdkfd_process_info *process_info = mem->process_info; + struct amdgpu_bo *bo = mem->bo; + struct ttm_operation_ctx ctx = { true, false }; + int ret = 0; + + mutex_lock(&process_info->lock); + + ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); + if (ret) { + pr_err("%s: Failed to set userptr: %d\n", __func__, ret); + goto out; + } + + ret = amdgpu_mn_register(bo, user_addr); + if (ret) { + pr_err("%s: Failed to register MMU notifier: %d\n", + __func__, ret); + goto out; + } + + /* If no restore worker is running concurrently, user_pages + * should not be allocated + */ + WARN(mem->user_pages, "Leaking user_pages array"); + + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", __func__); + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + if (ret) { + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + goto free_out; + } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("%s: Failed to reserve BO\n", __func__); + goto release_out; + } + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + pr_err("%s: failed to validate BO\n", __func__); + amdgpu_bo_unreserve(bo); + +release_out: + if (ret) + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); +free_out: + kvfree(mem->user_pages); + mem->user_pages = NULL; +unregister_out: + if (ret) + amdgpu_mn_unregister(bo); +out: + mutex_unlock(&process_info->lock); + return ret; +} + /* Reserving a BO and its page table BOs must happen atomically to * avoid deadlocks. Some operations update multiple VMs at once. Track * all the reservation info in a context structure. Optionally a sync @@ -748,7 +870,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, } static int map_bo_to_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, + bool no_update_pte) { int ret; @@ -762,6 +885,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, return ret; } + if (no_update_pte) + return 0; + ret = update_gpuvm_pte(adev, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); @@ -820,6 +946,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, mutex_init(&info->lock); INIT_LIST_HEAD(&info->vm_list_head); INIT_LIST_HEAD(&info->kfd_bo_list); + INIT_LIST_HEAD(&info->userptr_valid_list); + INIT_LIST_HEAD(&info->userptr_inval_list); info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), @@ -830,6 +958,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, goto create_evict_fence_fail; } + info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); + atomic_set(&info->evicted_bos, 0); + INIT_DELAYED_WORK(&info->restore_userptr_work, + amdgpu_amdkfd_restore_userptr_worker); + *process_info = info; *ef = dma_fence_get(&info->eviction_fence->base); } @@ -872,6 +1005,7 @@ reserve_pd_fail: dma_fence_put(*ef); *ef = NULL; *process_info = NULL; + put_pid(info->pid); create_evict_fence_fail: mutex_destroy(&info->lock); kfree(info); @@ -967,8 +1101,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, /* Release per-process resources when last compute VM is destroyed */ if (!process_info->n_vms) { WARN_ON(!list_empty(&process_info->kfd_bo_list)); + WARN_ON(!list_empty(&process_info->userptr_valid_list)); + WARN_ON(!list_empty(&process_info->userptr_inval_list)); dma_fence_put(&process_info->eviction_fence->base); + cancel_delayed_work_sync(&process_info->restore_userptr_work); + put_pid(process_info->pid); mutex_destroy(&process_info->lock); kfree(process_info); } @@ -1003,9 +1141,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + uint64_t user_addr = 0; struct amdgpu_bo *bo; int byte_align; - u32 alloc_domain; + u32 domain, alloc_domain; u64 alloc_flags; uint32_t mapping_flags; int ret; @@ -1014,14 +1153,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( * Check on which domain to allocate BO */ if (flags & ALLOC_MEM_FLAGS_VRAM) { - alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; } else if (flags & ALLOC_MEM_FLAGS_GTT) { - alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flags = 0; + if (!offset || !*offset) + return -EINVAL; + user_addr = *offset; } else { return -EINVAL; } @@ -1078,18 +1224,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->kfd_bo = *mem; (*mem)->bo = bo; + if (user_addr) + bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; (*mem)->va = va; - (*mem)->domain = alloc_domain; + (*mem)->domain = domain; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); + + if (user_addr) { + ret = init_user_pages(*mem, current->mm, user_addr); + if (ret) { + mutex_lock(&avm->process_info->lock); + list_del(&(*mem)->validate_list.head); + mutex_unlock(&avm->process_info->lock); + goto allocate_init_user_pages_failed; + } + } if (offset) *offset = amdgpu_bo_mmap_offset(bo); return 0; +allocate_init_user_pages_failed: + amdgpu_bo_unref(&bo); + /* Don't unreserve system mem limit twice */ + goto err_reserve_system_mem; err_bo_create: unreserve_system_mem_limit(adev, size, alloc_domain); err_reserve_system_mem: @@ -1122,12 +1284,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( * be freed anyway */ + /* No more MMU notifiers */ + amdgpu_mn_unregister(mem->bo); + /* Make sure restore workers don't access the BO any more */ bo_list_entry = &mem->validate_list; mutex_lock(&process_info->lock); list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); + /* Free user pages if necessary */ + if (mem->user_pages) { + pr_debug("%s: Freeing user_pages array\n", __func__); + if (mem->user_pages[0]) + release_pages(mem->user_pages, + mem->bo->tbo.ttm->num_pages); + kvfree(mem->user_pages); + } + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1173,21 +1347,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kfd_bo_va_list *bo_va_entry = NULL; struct kfd_bo_va_list *bo_va_entry_aql = NULL; unsigned long bo_size; - - /* Make sure restore is not running concurrently. - */ - mutex_lock(&mem->process_info->lock); - - mutex_lock(&mem->lock); + bool is_invalid_userptr = false; bo = mem->bo; - if (!bo) { pr_err("Invalid BO when mapping memory to GPU\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } + /* Make sure restore is not running concurrently. Since we + * don't map invalid userptr BOs, we rely on the next restore + * worker to do the mapping + */ + mutex_lock(&mem->process_info->lock); + + /* Lock mmap-sem. If we find an invalid userptr BO, we can be + * sure that the MMU notifier is no longer running + * concurrently and the queues are actually stopped + */ + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + down_write(¤t->mm->mmap_sem); + is_invalid_userptr = atomic_read(&mem->invalid); + up_write(¤t->mm->mmap_sem); + } + + mutex_lock(&mem->lock); + domain = mem->domain; bo_size = bo->tbo.mem.size; @@ -1200,6 +1385,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out; + /* Userptr can be marked as "not invalid", but not actually be + * validated yet (still in the system domain). In that case + * the queues are still stopped and we can leave mapping for + * the next restore worker + */ + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + is_invalid_userptr = true; + if (check_if_add_bo_to_vm(avm, mem)) { ret = add_bo_to_vm(adev, mem, avm, false, &bo_va_entry); @@ -1217,7 +1410,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( goto add_bo_to_vm_failed; } - if (mem->mapped_to_gpu_memory == 0) { + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { /* Validate BO only once. The eviction fence gets added to BO * the first time it is mapped. Validate will wait for all * background evictions to complete. @@ -1235,7 +1429,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); - ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + ret = map_bo_to_gpuvm(adev, entry, ctx.sync, + is_invalid_userptr); if (ret) { pr_err("Failed to map radeon bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; @@ -1418,6 +1613,337 @@ bo_reserve_failed: return ret; } +/* Evict a userptr BO by stopping the queues if necessary + * + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it + * cannot do any memory allocations, and cannot take any locks that + * are held elsewhere while allocating memory. Therefore this is as + * simple as possible, using atomic counters. + * + * It doesn't do anything to the BO itself. The real work happens in + * restore, where we get updated page addresses. This function only + * ensures that GPU access to the BO is stopped. + */ +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, + struct mm_struct *mm) +{ + struct amdkfd_process_info *process_info = mem->process_info; + int invalid, evicted_bos; + int r = 0; + + invalid = atomic_inc_return(&mem->invalid); + evicted_bos = atomic_inc_return(&process_info->evicted_bos); + if (evicted_bos == 1) { + /* First eviction, stop the queues */ + r = kgd2kfd->quiesce_mm(mm); + if (r) + pr_err("Failed to quiesce KFD\n"); + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + } + + return r; +} + +/* Update invalid userptr BOs + * + * Moves invalidated (evicted) userptr BOs from userptr_valid_list to + * userptr_inval_list and updates user pages for all BOs that have + * been invalidated since their last update. + */ +static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + struct mm_struct *mm) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int invalid, ret; + + /* Move all invalidated BOs to the userptr_inval_list and + * release their user pages by migration to the CPU domain + */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_valid_list, + validate_list.head) { + if (!atomic_read(&mem->invalid)) + continue; /* BO is still valid */ + + bo = mem->bo; + + if (amdgpu_bo_reserve(bo, true)) + return -EAGAIN; + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + amdgpu_bo_unreserve(bo); + if (ret) { + pr_err("%s: Failed to invalidate userptr BO\n", + __func__); + return -EAGAIN; + } + + list_move_tail(&mem->validate_list.head, + &process_info->userptr_inval_list); + } + + if (list_empty(&process_info->userptr_inval_list)) + return 0; /* All evicted userptr BOs were freed */ + + /* Go through userptr_inval_list and update any invalid user_pages */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + invalid = atomic_read(&mem->invalid); + if (!invalid) + /* BO hasn't been invalidated since the last + * revalidation attempt. Keep its BO list. + */ + continue; + + bo = mem->bo; + + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); + return -ENOMEM; + } + } else if (mem->user_pages[0]) { + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + } + + /* Get updated user pages */ + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, + mem->user_pages); + if (ret) { + mem->user_pages[0] = NULL; + pr_info("%s: Failed to get user pages: %d\n", + __func__, ret); + /* Pretend it succeeded. It will fail later + * with a VM fault if the GPU tries to access + * it. Better than hanging indefinitely with + * stalled user mode queues. + */ + } + + /* Mark the BO as valid unless it was invalidated + * again concurrently + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; + } + + return 0; +} + +/* Validate invalid userptr BOs + * + * Validates BOs on the userptr_inval_list, and moves them back to the + * userptr_valid_list. Also updates GPUVM page tables with new page + * addresses and waits for the page table updates to complete. + */ +static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct amdgpu_bo_list_entry *pd_bo_list_entries; + struct list_head resv_list, duplicates; + struct ww_acquire_ctx ticket; + struct amdgpu_sync sync; + + struct amdgpu_vm *peer_vm; + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int i, ret; + + pd_bo_list_entries = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list_entries) { + pr_err("%s: Failed to allocate PD BO list entries\n", __func__); + return -ENOMEM; + } + + INIT_LIST_HEAD(&resv_list); + INIT_LIST_HEAD(&duplicates); + + /* Get all the page directory BOs that need to be reserved */ + i = 0; + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(peer_vm, &resv_list, + &pd_bo_list_entries[i++]); + /* Add the userptr_inval_list entries to resv_list */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + list_add_tail(&mem->resv_list.head, &resv_list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + /* Reserve all BOs and page tables for validation */ + ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); + WARN(!list_empty(&duplicates), "Duplicates should be empty"); + if (ret) + goto out; + + amdgpu_sync_create(&sync); + + /* Avoid triggering eviction fences when unmapping invalid + * userptr BOs (waits for all fences, doesn't use + * FENCE_OWNER_VM) + */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, + process_info->eviction_fence, + NULL, NULL); + + ret = process_validate_vms(process_info); + if (ret) + goto unreserve_out; + + /* Validate BOs and update GPUVM page tables */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + struct kfd_bo_va_list *bo_va_entry; + + bo = mem->bo; + + /* Copy pages array and validate the BO if we got user pages */ + if (mem->user_pages[0]) { + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + mem->user_pages); + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) { + pr_err("%s: failed to validate BO\n", __func__); + goto unreserve_out; + } + } + + /* Validate succeeded, now the BO owns the pages, free + * our copy of the pointer array. Put this BO back on + * the userptr_valid_list. If we need to revalidate + * it, we need to start from scratch. + */ + kvfree(mem->user_pages); + mem->user_pages = NULL; + list_move_tail(&mem->validate_list.head, + &process_info->userptr_valid_list); + + /* Update mapping. If the BO was not validated + * (because we couldn't get user pages), this will + * clear the page table entries, which will result in + * VM faults if the GPU tries to access the invalid + * memory. + */ + list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { + if (!bo_va_entry->is_mapped) + continue; + + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, &sync); + if (ret) { + pr_err("%s: update PTE failed\n", __func__); + /* make sure this gets validated again */ + atomic_inc(&mem->invalid); + goto unreserve_out; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync); + +unreserve_out: + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_bo_fence(peer_vm->root.base.bo, + &process_info->eviction_fence->base, true); + ttm_eu_backoff_reservation(&ticket, &resv_list); + amdgpu_sync_wait(&sync, false); + amdgpu_sync_free(&sync); +out: + kfree(pd_bo_list_entries); + + return ret; +} + +/* Worker callback to restore evicted userptr BOs + * + * Tries to update and validate all userptr BOs. If successful and no + * concurrent evictions happened, the queues are restarted. Otherwise, + * reschedule for another attempt later. + */ +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info = + container_of(dwork, struct amdkfd_process_info, + restore_userptr_work); + struct task_struct *usertask; + struct mm_struct *mm; + int evicted_bos; + + evicted_bos = atomic_read(&process_info->evicted_bos); + if (!evicted_bos) + return; + + /* Reference task and mm in case of concurrent process termination */ + usertask = get_pid_task(process_info->pid, PIDTYPE_PID); + if (!usertask) + return; + mm = get_task_mm(usertask); + if (!mm) { + put_task_struct(usertask); + return; + } + + mutex_lock(&process_info->lock); + + if (update_invalid_user_pages(process_info, mm)) + goto unlock_out; + /* userptr_inval_list can be empty if all evicted userptr BOs + * have been freed. In that case there is nothing to validate + * and we can just restart the queues. + */ + if (!list_empty(&process_info->userptr_inval_list)) { + if (atomic_read(&process_info->evicted_bos) != evicted_bos) + goto unlock_out; /* Concurrent eviction, try again */ + + if (validate_invalid_user_pages(process_info)) + goto unlock_out; + } + /* Final check for concurrent evicton and atomic update. If + * another eviction happens after successful update, it will + * be a first eviction that calls quiesce_mm. The eviction + * reference counting inside KFD will handle this case. + */ + if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != + evicted_bos) + goto unlock_out; + evicted_bos = 0; + if (kgd2kfd->resume_mm(mm)) { + pr_err("%s: Failed to resume KFD\n", __func__); + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + } +unlock_out: + mutex_unlock(&process_info->lock); + mmput(mm); + put_task_struct(usertask); + + /* If validation failed, reschedule another attempt */ + if (evicted_bos) + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc34b50e6b29..8e66f3702b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -536,7 +536,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->bo_list) { amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev); + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); } INIT_LIST_HEAD(&duplicates); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index bd67f4cb8e6c..83e344fbb50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -36,12 +36,14 @@ #include <drm/drm.h> #include "amdgpu.h" +#include "amdgpu_amdkfd.h" struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; struct mmu_notifier mn; + enum amdgpu_mn_type type; /* only used on destruction */ struct work_struct work; @@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_range_start - callback to notify about mm change + * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier * @mn: the mm this callback is about @@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * We block for all BOs between start and end to be idle and * unmap them by move them into system domain again. */ -static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; @@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, } /** + * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We temporarily evict all BOs between start and end. This + * necessitates evicting all user-mode queues of the process. The BOs + * are restorted in amdgpu_mn_invalidate_range_end_hsa. + */ +static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + amdgpu_mn_read_lock(rmn); + + it = interval_tree_iter_first(&rmn->objects, start, end); + while (it) { + struct amdgpu_mn_node *node; + struct amdgpu_bo *bo; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, start, end); + + list_for_each_entry(bo, &node->bos, mn_list) { + struct kgd_mem *mem = bo->kfd_bo; + + if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + start, end)) + amdgpu_amdkfd_evict_userptr(mem, mm); + } + } +} + +/** * amdgpu_mn_invalidate_range_end - callback to notify about mm change * * @mn: our notifier @@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, amdgpu_mn_read_unlock(rmn); } -static const struct mmu_notifier_ops amdgpu_mn_ops = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, +static const struct mmu_notifier_ops amdgpu_mn_ops[] = { + [AMDGPU_MN_TYPE_GFX] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, + [AMDGPU_MN_TYPE_HSA] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, }; +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. + */ +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) + /** * amdgpu_mn_get - create notifier context * * @adev: amdgpu device pointer + * @type: type of MMU notifier context * * Creates a notifier context for current->mm. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { struct mm_struct *mm = current->mm; struct amdgpu_mn *rmn; + unsigned long key = AMDGPU_MN_KEY(mm, type); int r; mutex_lock(&adev->mn_lock); @@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) return ERR_PTR(-EINTR); } - hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) + hash_for_each_possible(adev->mn_hash, rmn, node, key) + if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) goto release_locks; rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); @@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->adev = adev; rmn->mm = mm; - rmn->mn.ops = &amdgpu_mn_ops; init_rwsem(&rmn->lock); + rmn->type = type; + rmn->mn.ops = &amdgpu_mn_ops[type]; rmn->objects = RB_ROOT_CACHED; mutex_init(&rmn->read_lock); atomic_set(&rmn->recursion, 0); @@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) if (r) goto free_rmn; - hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); + hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); release_locks: up_write(&mm->mmap_sem); @@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { unsigned long end = addr + amdgpu_bo_size(bo) - 1; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + enum amdgpu_mn_type type = + bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; struct amdgpu_mn *rmn; - struct amdgpu_mn_node *node = NULL; + struct amdgpu_mn_node *node = NULL, *new_node; struct list_head bos; struct interval_tree_node *it; - rmn = amdgpu_mn_get(adev); + rmn = amdgpu_mn_get(adev, type); if (IS_ERR(rmn)) return PTR_ERR(rmn); + new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + INIT_LIST_HEAD(&bos); down_write(&rmn->lock); @@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) list_splice(&node->bos, &bos); } - if (!node) { - node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); - if (!node) { - up_write(&rmn->lock); - return -ENOMEM; - } - } + if (!node) + node = new_node; + else + kfree(new_node); bo->mn = rmn; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index d0095a3793b8..eb0f432f78fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -29,16 +29,23 @@ */ struct amdgpu_mn; +enum amdgpu_mn_type { + AMDGPU_MN_TYPE_GFX, + AMDGPU_MN_TYPE_HSA, +}; + #if defined(CONFIG_MMU_NOTIFIER) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 205da3ff9cd0..c713d30cba86 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -695,7 +695,7 @@ struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; - struct mm_struct *usermm; + struct task_struct *usertask; uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; @@ -706,14 +706,18 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct mm_struct *mm = gtt->usertask->mm; unsigned int flags = 0; unsigned pinned = 0; int r; + if (!mm) /* Happens during process shutdown */ + return -ESRCH; + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) flags |= FOLL_WRITE; - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory @@ -721,9 +725,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; - vma = find_vma(gtt->usermm, gtt->userptr); + vma = find_vma(mm, gtt->userptr); if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return -EPERM; } } @@ -739,7 +743,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) list_add(&guptask.list, >t->guptasks); spin_unlock(>t->guptasklock); - r = get_user_pages(userptr, num_pages, flags, p, NULL); + if (mm == current->mm) + r = get_user_pages(userptr, num_pages, flags, p, NULL); + else + r = get_user_pages_remote(gtt->usertask, + mm, userptr, num_pages, + flags, p, NULL, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); @@ -752,12 +761,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) } while (pinned < ttm->num_pages); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return 0; release_pages: release_pages(pages, pinned); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return r; } @@ -978,6 +987,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + if (gtt->usertask) + put_task_struct(gtt->usertask); + ttm_dma_tt_fini(>t->ttm); kfree(gtt); } @@ -1079,8 +1091,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return -EINVAL; gtt->userptr = addr; - gtt->usermm = current->mm; gtt->userflags = flags; + + if (gtt->usertask) + put_task_struct(gtt->usertask); + gtt->usertask = current->group_leader; + get_task_struct(gtt->usertask); + spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); atomic_set(>t->mmu_invalidations, 0); @@ -1096,7 +1113,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) if (gtt == NULL) return NULL; - return gtt->usermm; + if (gtt->usertask == NULL) + return NULL; + + return gtt->usertask->mm; } bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9d39fd5b1822..e5962e61beb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4686,6 +4686,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 7f408f85fdb6..f22f7a88ce0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -268,6 +268,11 @@ * x=1: tmz_end */ +#define PACKET3_INVALIDATE_TLBS 0x98 +# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_SET_RESOURCES 0xA0 /* 1. header * 2. CONTROL diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 0d0242240c47..ffd096fffc1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ + kfd_mqd_manager_v9.o \ kfd_kernel_queue.o kfd_kernel_queue_cik.o \ - kfd_kernel_queue_vi.o kfd_packet_manager.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o \ - kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ + kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \ + kfd_packet_manager.o kfd_process_queue_manager.o \ + kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \ + kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ - kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o + kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) amdkfd-y += kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 3d5ccb3755d4..49df6c791cfc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -27,18 +27,28 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { - unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + unsigned int vmid, pasid; + + /* Only handle interrupts from KFD VMIDs */ + vmid = (ihre->ring_id & 0x0000ff00) >> 8; + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + /* If there is no valid PASID, it's likely a firmware bug */ pasid = (ihre->ring_id & 0xffff0000) >> 16; + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; - /* Do not process in ISR, just request it to be forwarded to WQ. */ - return (pasid != 0) && - (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || - ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE; } static void cik_event_interrupt_wq(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 48769d12dd7b..37ce6dd65391 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -33,7 +33,8 @@ #define APE1_MTYPE(x) ((x) << 7) /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ -#define MTYPE_CACHED 0 +#define MTYPE_CACHED_NV 0 +#define MTYPE_CACHED 1 #define MTYPE_NONCACHED 3 #define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h new file mode 100644 index 000000000000..f68aef02fc1f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -0,0 +1,560 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +static const uint32_t cwsr_trap_gfx8_hex[] = { + 0xbf820001, 0xbf820125, + 0xb8f4f802, 0x89748674, + 0xb8f5f803, 0x8675ff75, + 0x00000400, 0xbf850011, + 0xc00a1e37, 0x00000000, + 0xbf8c007f, 0x87777978, + 0xbf840002, 0xb974f802, + 0xbe801d78, 0xb8f5f803, + 0x8675ff75, 0x000001ff, + 0xbf850002, 0x80708470, + 0x82718071, 0x8671ff71, + 0x0000ffff, 0xb974f802, + 0xbe801f70, 0xb8f5f803, + 0x8675ff75, 0x00000100, + 0xbf840006, 0xbefa0080, + 0xb97a0203, 0x8671ff71, + 0x0000ffff, 0x80f08870, + 0x82f18071, 0xbefa0080, + 0xb97a0283, 0xbef60068, + 0xbef70069, 0xb8fa1c07, + 0x8e7a9c7a, 0x87717a71, + 0xb8fa03c7, 0x8e7a9b7a, + 0x87717a71, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbef2007e, + 0xbef3007f, 0xbefe0180, + 0xbf900004, 0x877a8474, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x877b7a7b, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x877b7a7b, 0xbeef007c, + 0xbeee0080, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cbc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611d3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xb8f5f803, + 0xbefe007c, 0xbefc006e, + 0xc0611d7c, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dbc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dfc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xb8eff801, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbef30080, + 0x8773737a, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8f51605, 0x80758175, + 0x8e758475, 0x8e7a8275, + 0xbefa00ff, 0x01000000, + 0xbef60178, 0x80786e78, + 0x82798079, 0xbefc0080, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003c, 0x00000000, + 0xc06b013c, 0x00000010, + 0xc06b023c, 0x00000020, + 0xc06b033c, 0x00000030, + 0x8078c078, 0x82798079, + 0x807c907c, 0xbf0a757c, + 0xbf85ffeb, 0xbef80176, + 0xbeee0080, 0xbefe00c1, + 0xbeff00c1, 0xbefa00ff, + 0x01000000, 0xe0724000, + 0x6e1e0000, 0xe0724100, + 0x6e1e0100, 0xe0724200, + 0x6e1e0200, 0xe0724300, + 0x6e1e0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f54306, + 0x8675c175, 0xbf84002c, + 0xbf8a0000, 0x867aff73, + 0x04000000, 0xbf840028, + 0x8e758675, 0x8e758275, + 0xbefa0075, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0x806eff6e, 0x00000080, + 0xbefa00ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe80007b, + 0x867bff7b, 0xff7fffff, + 0x877bff7b, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8c007f, 0xe0765000, + 0x6e1e0002, 0x32040702, + 0xd0c9006a, 0x0000eb02, + 0xbf87fff7, 0xbefb0000, + 0xbeee00ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8f52a05, 0x80758175, + 0x8e758275, 0x8e7a8875, + 0xbefa00ff, 0x01000000, + 0xbefc0084, 0xbf0a757c, + 0xbf840015, 0xbf11017c, + 0x8075ff75, 0x00001000, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x6e1e0000, + 0xe0724100, 0x6e1e0100, + 0xe0724200, 0x6e1e0200, + 0xe0724300, 0x6e1e0300, + 0x807c847c, 0x806eff6e, + 0x00000400, 0xbf0a757c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200ca, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x8676ff7f, + 0x08000000, 0x8f768376, + 0x877b767b, 0x8676ff7f, + 0x70000000, 0x8f768176, + 0x877b767b, 0x8676ff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8f34306, 0x8673c173, + 0xbf840019, 0x8e738673, + 0x8e738273, 0xbefa0073, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x8072ff72, + 0x00000080, 0xbefa00ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x721e0000, + 0xe0510100, 0x721e0000, + 0x807cff7c, 0x00000200, + 0x8072ff72, 0x00000200, + 0xbf0a737c, 0xbf85fff6, + 0xbef20080, 0xbefe00c1, + 0xbeff00c1, 0xb8f32a05, + 0x80738173, 0x8e738273, + 0x8e7a8873, 0xbefa00ff, + 0x01000000, 0xbef60072, + 0x8072ff72, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x8073ff73, 0x00008000, + 0xe0524000, 0x721e0000, + 0xe0524100, 0x721e0100, + 0xe0524200, 0x721e0200, + 0xe0524300, 0x721e0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8072ff72, 0x00000400, + 0xbf0a737c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x761e0000, 0xe0524100, + 0x761e0100, 0xe0524200, + 0x761e0200, 0xe0524300, + 0x761e0300, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0x80f2c072, 0xb8f31605, + 0x80738173, 0x8e738473, + 0x8e7a8273, 0xbefa00ff, + 0x01000000, 0xbefc0073, + 0xc031003c, 0x00000072, + 0x80f2c072, 0xbf8c007f, + 0x80fc907c, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff1, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xc0211cfc, + 0x00000072, 0x80728472, + 0xc0211c3c, 0x00000072, + 0x80728472, 0xc0211c7c, + 0x00000072, 0x80728472, + 0xc0211bbc, 0x00000072, + 0x80728472, 0xc0211bfc, + 0x00000072, 0x80728472, + 0xc0211d3c, 0x00000072, + 0x80728472, 0xc0211d7c, + 0x00000072, 0x80728472, + 0xc0211a3c, 0x00000072, + 0x80728472, 0xc0211a7c, + 0x00000072, 0x80728472, + 0xc0211dfc, 0x00000072, + 0x80728472, 0xc0211b3c, + 0x00000072, 0x80728472, + 0xc0211b7c, 0x00000072, + 0x80728472, 0xbf8c007f, + 0xbefc0073, 0xbefe006e, + 0xbeff006f, 0x867375ff, + 0x000003ff, 0xb9734803, + 0x867375ff, 0xfffff800, + 0x8f738b73, 0xb973a2c3, + 0xb977f801, 0x8673ff71, + 0xf0000000, 0x8f739c73, + 0x8e739073, 0xbef60080, + 0x87767376, 0x8673ff71, + 0x08000000, 0x8f739b73, + 0x8e738f73, 0x87767376, + 0x8673ff74, 0x00800000, + 0x8f739773, 0xb976f807, + 0x8671ff71, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb974f802, 0xbf8a0000, + 0x95807370, 0xbf810000, +}; + + +static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbf820001, 0xbf82015a, + 0xb8f8f802, 0x89788678, + 0xb8f1f803, 0x866eff71, + 0x00000400, 0xbf850034, + 0x866eff71, 0x00000800, + 0xbf850003, 0x866eff71, + 0x00000100, 0xbf840008, + 0x866eff78, 0x00002000, + 0xbf840001, 0xbf810000, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xb8eef807, 0x866fff6e, + 0x001f8000, 0x8e6f8b6f, + 0x8977ff77, 0xfc000000, + 0x87776f77, 0x896eff6e, + 0x001f8000, 0xb96ef807, + 0xb8f0f812, 0xb8f1f813, + 0x8ef08870, 0xc0071bb8, + 0x00000000, 0xbf8cc07f, + 0xc0071c38, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0xb8f1f803, 0x8671ff71, + 0x000001ff, 0xbf850002, + 0x806c846c, 0x826d806d, + 0x866dff6d, 0x0000ffff, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x86fe7e7e, 0x86ea6a6a, + 0xb978f802, 0xbe801f6c, + 0x866dff6d, 0x0000ffff, + 0xbef00080, 0xb9700283, + 0xb8f02407, 0x8e709c70, + 0x876d706d, 0xb8f003c7, + 0x8e709b70, 0x876d706d, + 0xb8f0f807, 0x8670ff70, + 0x00007fff, 0xb970f807, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x87708478, 0xb970f802, + 0xbf8e0002, 0xbf88fffe, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8f11605, + 0x80718171, 0x8e718671, + 0x80707170, 0x80707e70, + 0x8271807f, 0x8671ff71, + 0x0000ffff, 0xc0471cb8, + 0x00000040, 0xbf8cc07f, + 0xc04b1d38, 0x00000048, + 0xbf8cc07f, 0xc0431e78, + 0x00000058, 0xbf8cc07f, + 0xc0471eb8, 0x0000005c, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x8670ff7f, + 0x08000000, 0x8f708370, + 0x87777077, 0x8670ff7f, + 0x70000000, 0x8f708170, + 0x87777077, 0xbefb007c, + 0xbefa0080, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f01605, 0x80708170, + 0x8e708670, 0x807a707a, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc007a, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8f1f803, + 0xbefe007c, 0xbefc007a, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8fbf801, + 0xbefe007c, 0xbefc007a, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0x8670ff7f, + 0x04000000, 0xbeef0080, + 0x876f6f70, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f11605, 0x80718171, + 0x8e718471, 0x8e768271, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747a74, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a717c, 0xbf85ffe7, + 0xbef40172, 0xbefa0080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0xe0724000, 0x7a1d0000, + 0xe0724100, 0x7a1d0100, + 0xe0724200, 0x7a1d0200, + 0xe0724300, 0x7a1d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8f14306, 0x8671c171, + 0xbf84002c, 0xbf8a0000, + 0x8670ff6f, 0x04000000, + 0xbf840028, 0x8e718671, + 0x8e718271, 0xbef60071, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f01605, + 0x80708170, 0x8e708670, + 0x807a707a, 0x807aff7a, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x7a1d0002, + 0x68040702, 0xd0c9006a, + 0x0000e302, 0xbf87fff7, + 0xbef70000, 0xbefa00ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8f12a05, + 0x80718171, 0x8e718271, + 0x8e768871, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a717c, 0xbf840015, + 0xbf11017c, 0x8071ff71, + 0x00001000, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, + 0x7a1d0000, 0xe0724100, + 0x7a1d0100, 0xe0724200, + 0x7a1d0200, 0xe0724300, + 0x7a1d0300, 0x807c847c, + 0x807aff7a, 0x00000400, + 0xbf0a717c, 0xbf85ffef, + 0xbf9c0000, 0xbf8200d9, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, + 0x00000078, 0x80788478, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, + 0x00000078, 0x80788478, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, + 0x00000078, 0x80788478, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, + 0x00000078, 0x80788478, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe007a, + 0xbeff007b, 0x866f71ff, + 0x000003ff, 0xb96f4803, + 0x866f71ff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc0071cb7, 0x00000040, + 0xc00b1d37, 0x00000048, + 0xc0031e77, 0x00000058, + 0xc0071eb7, 0x0000005c, + 0xbf8cc07f, 0x866fff6d, + 0xf0000000, 0x8f6f9c6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x08000000, 0x8f6f9b6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff70, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb970f802, 0xbf8a0000, + 0x95806f6c, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index 997a383dcb8b..a2a04bb64096 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -20,9 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if 0 -HW (VI) source code for CWSR trap handler -#Version 18 + multiple trap handler +/* To compile this assembly code: + * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex + */ + +/* HW (VI) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ // this performance-optimal version was originally from Seven Xu at SRDC @@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi /**************************************************************************/ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 @@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi //tba_lo and tba_hi need to be saved/restored -var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} var s_save_pc_hi = ttmp1 var s_save_exec_lo = ttmp2 var s_save_exec_hi = ttmp3 @@ -319,6 +323,10 @@ end s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC end + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + L_SLEEP: s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 @@ -1007,8 +1015,6 @@ end s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) @@ -1044,6 +1050,7 @@ end s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu @@ -1127,258 +1134,3 @@ end function get_hwreg_size_bytes return 128 //HWREG size 128 bytes end - - -#endif - -static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf820123, - 0xb8f4f802, 0x89748674, - 0xb8f5f803, 0x8675ff75, - 0x00000400, 0xbf850011, - 0xc00a1e37, 0x00000000, - 0xbf8c007f, 0x87777978, - 0xbf840002, 0xb974f802, - 0xbe801d78, 0xb8f5f803, - 0x8675ff75, 0x000001ff, - 0xbf850002, 0x80708470, - 0x82718071, 0x8671ff71, - 0x0000ffff, 0xb974f802, - 0xbe801f70, 0xb8f5f803, - 0x8675ff75, 0x00000100, - 0xbf840006, 0xbefa0080, - 0xb97a0203, 0x8671ff71, - 0x0000ffff, 0x80f08870, - 0x82f18071, 0xbefa0080, - 0xb97a0283, 0xbef60068, - 0xbef70069, 0xb8fa1c07, - 0x8e7a9c7a, 0x87717a71, - 0xb8fa03c7, 0x8e7a9b7a, - 0x87717a71, 0xb8faf807, - 0x867aff7a, 0x00007fff, - 0xb97af807, 0xbef2007e, - 0xbef3007f, 0xbefe0180, - 0xbf900004, 0xbf8e0002, - 0xbf88fffe, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x877b7a7b, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x877b7a7b, 0xbeef007c, - 0xbeee0080, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cbc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611d3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xb8f5f803, - 0xbefe007c, 0xbefc006e, - 0xc0611d7c, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dbc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dfc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xb8eff801, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbef30080, - 0x8773737a, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8f51605, 0x80758175, - 0x8e758475, 0x8e7a8275, - 0xbefa00ff, 0x01000000, - 0xbef60178, 0x80786e78, - 0x82798079, 0xbefc0080, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003c, 0x00000000, - 0xc06b013c, 0x00000010, - 0xc06b023c, 0x00000020, - 0xc06b033c, 0x00000030, - 0x8078c078, 0x82798079, - 0x807c907c, 0xbf0a757c, - 0xbf85ffeb, 0xbef80176, - 0xbeee0080, 0xbefe00c1, - 0xbeff00c1, 0xbefa00ff, - 0x01000000, 0xe0724000, - 0x6e1e0000, 0xe0724100, - 0x6e1e0100, 0xe0724200, - 0x6e1e0200, 0xe0724300, - 0x6e1e0300, 0xbefe00c1, - 0xbeff00c1, 0xb8f54306, - 0x8675c175, 0xbf84002c, - 0xbf8a0000, 0x867aff73, - 0x04000000, 0xbf840028, - 0x8e758675, 0x8e758275, - 0xbefa0075, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0x806eff6e, 0x00000080, - 0xbefa00ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe80007b, - 0x867bff7b, 0xff7fffff, - 0x877bff7b, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8c007f, 0xe0765000, - 0x6e1e0002, 0x32040702, - 0xd0c9006a, 0x0000eb02, - 0xbf87fff7, 0xbefb0000, - 0xbeee00ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8f52a05, 0x80758175, - 0x8e758275, 0x8e7a8875, - 0xbefa00ff, 0x01000000, - 0xbefc0084, 0xbf0a757c, - 0xbf840015, 0xbf11017c, - 0x8075ff75, 0x00001000, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x6e1e0000, - 0xe0724100, 0x6e1e0100, - 0xe0724200, 0x6e1e0200, - 0xe0724300, 0x6e1e0300, - 0x807c847c, 0x806eff6e, - 0x00000400, 0xbf0a757c, - 0xbf85ffef, 0xbf9c0000, - 0xbf8200ca, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x8676ff7f, - 0x08000000, 0x8f768376, - 0x877b767b, 0x8676ff7f, - 0x70000000, 0x8f768176, - 0x877b767b, 0x8676ff7f, - 0x04000000, 0xbf84001e, - 0xbefe00c1, 0xbeff00c1, - 0xb8f34306, 0x8673c173, - 0xbf840019, 0x8e738673, - 0x8e738273, 0xbefa0073, - 0xb8f22a05, 0x80728172, - 0x8e728a72, 0xb8f61605, - 0x80768176, 0x8e768676, - 0x80727672, 0x8072ff72, - 0x00000080, 0xbefa00ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x721e0000, - 0xe0510100, 0x721e0000, - 0x807cff7c, 0x00000200, - 0x8072ff72, 0x00000200, - 0xbf0a737c, 0xbf85fff6, - 0xbef20080, 0xbefe00c1, - 0xbeff00c1, 0xb8f32a05, - 0x80738173, 0x8e738273, - 0x8e7a8873, 0xbefa00ff, - 0x01000000, 0xbef60072, - 0x8072ff72, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x8073ff73, 0x00008000, - 0xe0524000, 0x721e0000, - 0xe0524100, 0x721e0100, - 0xe0524200, 0x721e0200, - 0xe0524300, 0x721e0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8072ff72, 0x00000400, - 0xbf0a737c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x761e0000, 0xe0524100, - 0x761e0100, 0xe0524200, - 0x761e0200, 0xe0524300, - 0x761e0300, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0x80f2c072, 0xb8f31605, - 0x80738173, 0x8e738473, - 0x8e7a8273, 0xbefa00ff, - 0x01000000, 0xbefc0073, - 0xc031003c, 0x00000072, - 0x80f2c072, 0xbf8c007f, - 0x80fc907c, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff1, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xc0211cfc, - 0x00000072, 0x80728472, - 0xc0211c3c, 0x00000072, - 0x80728472, 0xc0211c7c, - 0x00000072, 0x80728472, - 0xc0211bbc, 0x00000072, - 0x80728472, 0xc0211bfc, - 0x00000072, 0x80728472, - 0xc0211d3c, 0x00000072, - 0x80728472, 0xc0211d7c, - 0x00000072, 0x80728472, - 0xc0211a3c, 0x00000072, - 0x80728472, 0xc0211a7c, - 0x00000072, 0x80728472, - 0xc0211dfc, 0x00000072, - 0x80728472, 0xc0211b3c, - 0x00000072, 0x80728472, - 0xc0211b7c, 0x00000072, - 0x80728472, 0xbf8c007f, - 0x8671ff71, 0x0000ffff, - 0xbefc0073, 0xbefe006e, - 0xbeff006f, 0x867375ff, - 0x000003ff, 0xb9734803, - 0x867375ff, 0xfffff800, - 0x8f738b73, 0xb973a2c3, - 0xb977f801, 0x8673ff71, - 0xf0000000, 0x8f739c73, - 0x8e739073, 0xbef60080, - 0x87767376, 0x8673ff71, - 0x08000000, 0x8f739b73, - 0x8e738f73, 0x87767376, - 0x8673ff74, 0x00800000, - 0x8f739773, 0xb976f807, - 0x86fe7e7e, 0x86ea6a6a, - 0xb974f802, 0xbf8a0000, - 0x95807370, 0xbf810000, -}; - diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm new file mode 100644 index 000000000000..998be96be736 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -0,0 +1,1214 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To compile this assembly code: + * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex + */ + +/* HW (GFX9) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ + +// this performance-optimal version was originally from Seven Xu at SRDC + +// Revison #18 --... +/* Rev History +** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) +** #4. SR Memory Layout: +** 1. VGPR-SGPR-HWREG-{LDS} +** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. +** #5. Update: 1. Accurate g8sr_ts_save_d timestamp +** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) +** #7. Update: 1. don't barrier if noLDS +** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version +** 2. Fix SQ issue by s_sleep 2 +** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last +** 2. optimize s_buffer save by burst 16sgprs... +** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. +** #11. Update 1. Add 2 more timestamp for debug version +** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance +** #13. Integ 1. Always use MUBUF for PV trap shader... +** #14. Update 1. s_buffer_store soft clause... +** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. +** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree +** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] +** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... +** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 +** 2. FUNC - Handle non-CWSR traps +*/ + +var G8SR_WDMEM_HWREG_OFFSET = 0 +var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes + +// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. + +var G8SR_DEBUG_TIMESTAMP = 0 +var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset +var s_g8sr_ts_save_s = s[34:35] // save start +var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi +var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ +var s_g8sr_ts_save_d = s[40:41] // save end +var s_g8sr_ts_restore_s = s[42:43] // restore start +var s_g8sr_ts_restore_d = s[44:45] // restore end + +var G8SR_VGPR_SR_IN_DWX4 = 0 +var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes +var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 + + +/*************************************************************************/ +/* control on how to run the shader */ +/*************************************************************************/ +//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) +var EMU_RUN_HACK = 0 +var EMU_RUN_HACK_RESTORE_NORMAL = 0 +var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 +var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var SAVE_LDS = 1 +var WG_BASE_ADDR_LO = 0x9000a000 +var WG_BASE_ADDR_HI = 0x0 +var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem +var CTX_SAVE_CONTROL = 0x0 +var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) +var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes +var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing +var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency + +/**************************************************************************/ +/* variables */ +/**************************************************************************/ +var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 +var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 +var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +var SQ_WAVE_STATUS_HALT_MASK = 0x2000 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits + +var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 +var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 +var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + +var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + +var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 +var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + +var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data +var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 + +/* Save */ +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE + +var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_SAVE_SPI_INIT_ATC_SHIFT = 27 +var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi + +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_tmp = ttmp4 +var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_xnack_mask_lo = ttmp6 +var s_save_xnack_mask_hi = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_status = ttmp12 +var s_save_mem_offset = ttmp14 +var s_save_alloc_size = s_save_trapsts //conflict +var s_save_m0 = ttmp15 +var s_save_ttmps_lo = s_save_tmp //no conflict +var s_save_ttmps_hi = s_save_trapsts //no conflict + +/* Restore */ +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 +var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT +var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK +var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT +var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi + +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp2 +var s_restore_mem_offset_save = s_restore_tmp //no conflict + +var s_restore_m0 = s_restore_alloc_size //no conflict + +var s_restore_mode = ttmp7 + +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp14 +var s_restore_exec_hi = ttmp15 +var s_restore_status = ttmp4 +var s_restore_trapsts = ttmp5 +var s_restore_xnack_mask_lo = xnack_mask_lo +var s_restore_xnack_mask_hi = xnack_mask_hi +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_ttmps_lo = s_restore_tmp //no conflict +var s_restore_ttmps_hi = s_restore_alloc_size //no conflict + +/**************************************************************************/ +/* trap handler entry points */ +/**************************************************************************/ +/* Shader Main*/ + +shader main + asic(GFX9) + type(CS) + + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore + //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC + s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. + s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE + //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE + s_branch L_SKIP_RESTORE //NOT restore, SAVE actually + else + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + end + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE //restore + +L_SKIP_RESTORE: + + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* +if (!EMU_RUN_HACK) + // Illegal instruction is a non-maskable exception which blocks context save. + // Halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_HALT_WAVE + + // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. + // Instead, halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_FETCH_2ND_TRAP + +L_HALT_WAVE: + // If STATUS.HALT is set then this fault must come from SQC instruction fetch. + // We cannot prevent further faults so just terminate the wavefront. + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_ALREADY_HALTED + s_endpgm +L_NOT_ALREADY_HALTED: + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. The debugger compensates for this. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + +L_FETCH_2ND_TRAP: + // Preserve and clear scalar XNACK state before issuing scalar reads. + // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. + s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK + s_or_b32 ttmp11, ttmp11, ttmp3 + + s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) + s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) + s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_waitcnt lgkmcnt(0) + s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception + s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. + s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 + s_addc_u32 ttmp1, ttmp1, 0 +L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_IB_STS. + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status + + s_rfe_b64 [ttmp0, ttmp1] +end + // ********* End handling of non-CWSR traps ******************* + +/**************************************************************************/ +/* save routine */ +/**************************************************************************/ + +L_SAVE: + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? +end + + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG + + s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_sq_save_msg + s_waitcnt lgkmcnt(0) +end + + if (EMU_RUN_HACK) + + else + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + + if (EMU_RUN_HACK) + + else + s_cbranch_execz L_SLEEP + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_spi_wrexec + s_waitcnt lgkmcnt(0) +end + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 + s_lshr_b32 s_save_tmp, s_save_tmp, 6 + s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + + // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_save_ttmps_lo) + get_sgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo + s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 + s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF + s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 + ack_sqc_store_workaround() + s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 + ack_sqc_store_workaround() + s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 + ack_sqc_store_workaround() + s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 + ack_sqc_store_workaround() + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE + + //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) + s_mov_b32 s_save_m0, m0 //save M0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 + + + + + /* save HW registers */ + ////////////////////////////// + + L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + + + s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 + + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + end + + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC + write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC + write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS + + //s_save_trapsts conflicts with s_save_alloc_size + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + + write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO + write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + + + + /* the first wave in the threadgroup */ + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset + s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states + L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4 + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? + // restore s_save_buf_rsrc0,1 + //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo + s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo + + + + + /* save first 4 VGPR, then LDS save could use */ + // each wave will alloc 4 vgprs at least... + ///////////////////////////////////////////////////////////////////////////////////// + + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + s_mov_b32 xnack_mask_lo, 0x0 + s_mov_b32 xnack_mask_hi, 0x0 + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 +end + + + + /* save LDS */ + ////////////////////////////// + + L_SAVE_LDS: + + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + +var LDS_DMA_ENABLE = 0 +var UNROLL = 0 +if UNROLL==0 && LDS_DMA_ENABLE==1 + s_mov_b32 s3, 256*2 + s_nop 0 + s_nop 0 + s_nop 0 + L_SAVE_LDS_LOOP: + //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? + if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + end + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? + +elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss + // store from higest LDS address to lowest + s_mov_b32 s3, 256*2 + s_sub_u32 m0, s_save_alloc_size, s3 + s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 + s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... + s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest + s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction + s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc + s_nop 0 + s_nop 0 + s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes + s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved + s_add_u32 s0, s0,s_save_alloc_size + s_addc_u32 s1, s1, 0 + s_setpc_b64 s[0:1] + + + for var i =0; i< 128; i++ + // be careful to make here a 64Byte aligned address, which could improve performance... + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + + if i!=127 + s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline + s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 + end + end + +else // BUFFER_STORE + v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 + v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid + v_mul_i32_i24 v2, v3, 8 // tid*8 + v_mov_b32 v3, 256*2 + s_mov_b32 m0, 0x10000 + s_mov_b32 s0, s_save_buf_rsrc3 + s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT + +L_SAVE_LDS_LOOP_VECTOR: + ds_read_b64 v[0:1], v2 //x =LDS[a], byte address + s_waitcnt lgkmcnt(0) + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 +// s_waitcnt vmcnt(0) +// v_add_u32 v2, vcc[0:1], v2, v3 + v_add_u32 v2, v2, v3 + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size + s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR + + // restore rsrc3 + s_mov_b32 s_save_buf_rsrc3, s0 + +end + +L_SAVE_LDS_DONE: + + + /* save VGPRs - set the Rest VGPRs */ + ////////////////////////////////////////////////////////////////////////////////////// + L_SAVE_VGPR: + // VGPR SR memory offset: 0 + // TODO rearrange the RSRC words to use swizzle for VGPR save... + + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, 4 // skip first 4 VGPRs + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs + + s_set_gpr_idx_on m0, 0x1 // This will change M0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 +L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 // v0 = v[0+m0] + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + s_add_u32 m0, m0, 4 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +L_SAVE_VGPR_LOOP_END: + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =0 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + + + s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later + + L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 //v0 = v[0+m0] + v_mov_b32 v1, v1 //v0 = v[0+m0] + v_mov_b32 v2, v2 //v0 = v[0+m0] + v_mov_b32 v3, v3 //v0 = v[0+m0] + + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + end + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +end + +L_SAVE_VGPR_END: + + + + + + + /* S_PGM_END_SAVED */ //FIXME graphics ONLY + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_rfe_b64 s_save_pc_lo //Return to the main shader program + else + end + +// Save Done timestamp +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_d + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // Need reset rsrc2?? + s_mov_b32 m0, s_save_mem_offset + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 +end + + + s_branch L_END_PGM + + + +/**************************************************************************/ +/* restore routine */ +/**************************************************************************/ + +L_RESTORE: + /* Setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_restore_tmp, v9, 0 + s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 + s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE + s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL + else + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... + s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] + s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. +end + + + + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + + /* global mem offset */ +// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 + + /* the first wave in the threadgroup */ + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ + ////////////////////////////// + L_RESTORE_LDS: + + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? + + + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + L_RESTORE_LDS_LOOP: + if (SAVE_LDS) + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW + end + s_add_u32 m0, m0, 256*2 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? + + + /* restore VGPRs */ + ////////////////////////////// + L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + +if G8SR_VGPR_SR_IN_DWX4 + get_vgpr_size_bytes(s_restore_mem_offset) + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, s_restore_alloc_size + s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 + +L_RESTORE_VGPR_LOOP: + buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + s_waitcnt vmcnt(0) + s_sub_u32 m0, m0, 4 + v_mov_b32 v0, v0 // v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_cmp_eq_u32 m0, 0x8000 + s_cbranch_scc0 L_RESTORE_VGPR_LOOP + s_set_gpr_idx_off + + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes + +else + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 1 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later + + L_RESTORE_VGPR_LOOP: + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 + end + s_waitcnt vmcnt(0) //ensure data ready + v_mov_b32 v0, v0 //v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? + s_set_gpr_idx_off + /* VGPR restore on v0 */ + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + end + +end + + /* restore SGPRs */ + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, s_restore_alloc_size + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made + s_waitcnt lgkmcnt(0) //ensure data ready + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? + + /* restore HW registers */ + ////////////////////////////// + L_RESTORE_HWREG: + + +if G8SR_DEBUG_TIMESTAMP + s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo + s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi +end + + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + + + s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS + read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS + read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO + read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode + + // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_restore_ttmps_lo) + get_sgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF + s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 + s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 + s_waitcnt lgkmcnt(0) + + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT + s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + + s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d + s_waitcnt lgkmcnt(0) +end + +// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + + +/**************************************************************************/ +/* the END */ +/**************************************************************************/ +L_END_PGM: + s_endpgm + +end + + +/**************************************************************************/ +/* the helper functions */ +/**************************************************************************/ + +//Only for save hwreg to mem +function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo +end + + +// HWREG are saved before SGPRs, so all HWREG could be use. +function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc +end + + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 +end + + + +function get_lds_size_bytes(s_lds_size_byte) + // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW + s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size + s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW +end + +function get_vgpr_size_bytes(s_vgpr_size_byte) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible +end + +function get_sgpr_size_bytes(s_sgpr_size_byte) + s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1 + s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value) +end + +function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes +end + +function ack_sqc_store_workaround + if ACK_SQC_STORE + s_waitcnt lgkmcnt(0) + end +end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 59808a39ecf4..f64c5551cdba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Queue Size: 0x%llX, %u\n", q_properties->queue_size, args->ring_size); - pr_debug("Queue r/w Pointers: %p, %p\n", + pr_debug("Queue r/w Pointers: %px, %px\n", q_properties->read_ptr, q_properties->write_ptr); @@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ - args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); + args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; + args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); args->doorbell_offset <<= PAGE_SHIFT; + if (KFD_IS_SOC15(dev->device_info->asic_family)) + /* On SOC15 ASICs, doorbell allocation must be + * per-device, and independent from the per-process + * queue_id. Return the doorbell offset within the + * doorbell aperture to user mode. + */ + args->doorbell_offset |= q_properties.doorbell_off; mutex_unlock(&p->mutex); @@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1645,23 +1653,33 @@ err_i1: static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; + struct kfd_dev *dev = NULL; + unsigned long vm_pgoff; + unsigned int gpu_id; process = kfd_get_process(current); if (IS_ERR(process)) return PTR_ERR(process); - if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == - KFD_MMAP_DOORBELL_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; - return kfd_doorbell_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == - KFD_MMAP_EVENTS_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); + gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); + if (gpu_id) + dev = kfd_device_by_id(gpu_id); + + switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { + case KFD_MMAP_TYPE_DOORBELL: + if (!dev) + return -ENODEV; + return kfd_doorbell_mmap(dev, process, vma); + + case KFD_MMAP_TYPE_EVENTS: return kfd_event_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == - KFD_MMAP_RESERVED_MEM_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; - return kfd_reserved_mem_mmap(process, vma); + + case KFD_MMAP_TYPE_RESERVED_MEM: + if (!dev) + return -ENODEV; + return kfd_reserved_mem_mmap(dev, process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 4f126ef6139b..296b3f230280 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define fiji_cache_info carrizo_cache_info #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info +/* TODO - check & update Vega10 cache details */ +#define vega10_cache_info carrizo_cache_info +#define raven_cache_info carrizo_cache_info static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) @@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris11_cache_info; num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; + case CHIP_VEGA10: + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; + case CHIP_RAVEN: + pcache_info = raven_cache_info; + num_of_cache_types = ARRAY_SIZE(raven_cache_info); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3346699960dd..7ee6cec2c060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,16 +20,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) -#include <linux/amd-iommu.h> -#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" -#include "cwsr_trap_handler_gfx8.asm" +#include "cwsr_trap_handler.h" #include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 @@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = { .max_pasid_bits = 16, /* max num of queues for CZ.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = { .asic_family = CHIP_POLARIS11, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = { .needs_pci_atomics = true, }; +static const struct kfd_device_info vega10_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info vega10_vf_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + struct kfd_deviceid { unsigned short did; @@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67EB, &polaris11_device_info }, /* Polaris11 */ { 0x67EF, &polaris11_device_info }, /* Polaris11 */ { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6860, &vega10_device_info }, /* Vega10 */ + { 0x6861, &vega10_device_info }, /* Vega10 */ + { 0x6862, &vega10_device_info }, /* Vega10 */ + { 0x6863, &vega10_device_info }, /* Vega10 */ + { 0x6864, &vega10_device_info }, /* Vega10 */ + { 0x6867, &vega10_device_info }, /* Vega10 */ + { 0x6868, &vega10_device_info }, /* Vega10 */ + { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ + { 0x687F, &vega10_device_info }, /* Vega10 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) { struct kfd_dev *kfd; - + int ret; const struct kfd_device_info *device_info = lookup_device_info(pdev->device); @@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } - if (device_info->needs_pci_atomics) { - /* Allow BIF to recode atomics to PCIe 3.0 - * AtomicOps. 32 and 64-bit requests are possible and - * must be supported. - */ - if (pci_enable_atomic_ops_to_root(pdev, - PCI_EXP_DEVCAP2_ATOMIC_COMP32 | - PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics", - pdev->vendor, pdev->device); - return NULL; - } + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + ret = pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (device_info->needs_pci_atomics && ret < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics\n", + pdev->vendor, pdev->device); + return NULL; } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); @@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + if (kfd->device_info->asic_family < CHIP_VEGA10) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx9_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); + } - kfd->cwsr_isa = cwsr_trap_gfx8_hex; - kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); kfd->cwsr_enabled = true; } } @@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock(&kfd->interrupt_lock); } +int kgd2kfd_quiesce_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_evict_queues(p); + + kfd_unref_process(p); + return r; +} + +int kgd2kfd_resume_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_restore_queues(p); + + kfd_unref_process(p); + return r; +} + /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will * prepare for safe eviction of KFD BOs that belong to the specified * process. @@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) return -ENOMEM; - *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); if ((*mem_obj) == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d55d29d31da4..668ad07ebe1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) +{ + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) { + /* On pre-SOC15 chips we need to use the queue ID to + * preserve the user mode ABI. + */ + q->doorbell_id = q->properties.queue_id; + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + /* For SDMA queues on SOC15, use static doorbell + * assignments based on the engine and queue. + */ + q->doorbell_id = dev->shared_resources.sdma_doorbell + [q->properties.sdma_engine_id] + [q->properties.sdma_queue_id]; + } else { + /* For CP queues on SOC15 reserve a free doorbell ID */ + unsigned int found; + + found = find_first_zero_bit(qpd->doorbell_bitmap, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { + pr_debug("No doorbells available"); + return -EBUSY; + } + set_bit(found, qpd->doorbell_bitmap); + q->doorbell_id = found; + } + + q->properties.doorbell_off = + kfd_doorbell_id_to_offset(dev, q->process, + q->doorbell_id); + + return 0; +} + +static void deallocate_doorbell(struct qcm_process_device *qpd, + struct queue *q) +{ + unsigned int old; + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family) || + q->properties.type == KFD_QUEUE_TYPE_SDMA) + return; + + old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); + WARN_ON(!old); +} + static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm, static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, struct qcm_process_device *qpd) { - uint32_t len; + const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; + int ret; if (!qpd->ib_kaddr) return -ENOMEM; - len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + if (ret) + return ret; return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, - qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, + pmf->release_mem_size / sizeof(uint32_t)); } static void deallocate_vmid(struct device_queue_manager *dqm, @@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (retval) return retval; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_hqd; + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_hqd; + goto out_deallocate_doorbell; pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", q->pipe, q->queue); @@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_hqd: deallocate_hqd(dqm, q); @@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } dqm->total_queue_count--; + deallocate_doorbell(qpd, q); + retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); if (retval) @@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: deallocate_sdma_queue(dqm, q->sdma_id); @@ -1070,12 +1139,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; } + + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; } /* * Eviction state logic: we only mark active queues as evicted @@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -1117,6 +1191,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mutex_unlock(&dqm->lock); return retval; +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); @@ -1257,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, goto failed; } + deallocate_doorbell(qpd, q); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); @@ -1308,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - bool retval; + bool retval = true; + + if (!dqm->asic_ops.set_cache_memory_policy) + return retval; mutex_lock(&dqm->lock); @@ -1577,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_POLARIS11: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + device_queue_manager_init_v9(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); @@ -1627,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) int pipe, queue; int r = 0; + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, + KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); + if (!r) { + seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", + KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, + KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), + KFD_CIK_HIQ_QUEUE); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 412beff3281d..59a6b1956932 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -200,6 +200,8 @@ void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi_tonga( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c new file mode 100644 index 000000000000..79e5bcf6367c --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -0,0 +1,84 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "vega10_enum.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); + +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->update_qpd = update_qpd_v9; + asic_ops->init_sdma_vm = init_sdma_vm_v9; +} + +static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) +{ + uint32_t shared_base = pdd->lds_base >> 48; + uint32_t private_base = pdd->scratch_base >> 48; + + return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) | + private_base; +} + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + if (vega10_noretry && + !dqm->dev->device_info->needs_iommu_device) + qpd->sh_mem_config |= + 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + + pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + + return 0; +} + +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + /* Not needed on SDMAv4 any more */ + q->properties.sdma_vm_addr = 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index ebb4da14e3df..c3744d89352c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -33,7 +33,6 @@ static DEFINE_IDA(doorbell_ida); static unsigned int max_doorbell_slices; -#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4 /* * Each device exposes a doorbell aperture, a PCI MMIO aperture that @@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices; */ /* # of doorbell bytes allocated for each process. */ -static inline size_t doorbell_process_allocation(void) +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) { - return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES * + return roundup(kfd->device_info->doorbell_size * KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, PAGE_SIZE); } @@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd) doorbell_start_offset = roundup(kfd->shared_resources.doorbell_start_offset, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); doorbell_aperture_size = rounddown(kfd->shared_resources.doorbell_aperture_size, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (doorbell_aperture_size > doorbell_start_offset) doorbell_process_limit = (doorbell_aperture_size - doorbell_start_offset) / - doorbell_process_allocation(); + kfd_doorbell_process_slice(kfd); else return -ENOSPC; @@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (!kfd->doorbell_kernel_ptr) return -ENOMEM; @@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd) iounmap(kfd->doorbell_kernel_ptr); } -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma) { phys_addr_t address; - struct kfd_dev *dev; /* * For simplicitly we only allow mapping of the entire doorbell * allocation of a single device & process. */ - if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) - return -EINVAL; - - /* Find kfd device according to gpu id */ - dev = kfd_device_by_id(vma->vm_pgoff); - if (!dev) + if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) return -EINVAL; /* Calculate physical address of doorbell */ @@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", (unsigned long long) vma->vm_start, address, vma->vm_flags, - doorbell_process_allocation()); + kfd_doorbell_process_slice(dev)); return io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, - doorbell_process_allocation(), + kfd_doorbell_process_slice(dev), vma->vm_page_prot); } /* get kernel iomem pointer for a doorbell */ -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off) { u32 inx; @@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; + inx *= kfd->device_info->doorbell_size / sizeof(u32); + /* * Calculating the kernel doorbell offset using the first * doorbell page. @@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) mutex_unlock(&kfd->doorbell_mutex); } -inline void write_kernel_doorbell(u32 __iomem *db, u32 value) +void write_kernel_doorbell(void __iomem *db, u32 value) { if (db) { writel(value, db); @@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) } } -/* - * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 - * to doorbells with the process's doorbell page - */ -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell64(void __iomem *db, u64 value) +{ + if (db) { + WARN(((unsigned long)db & 7) != 0, + "Unaligned 64-bit doorbell"); + writeq(value, (u64 __iomem *)db); + pr_debug("writing %llu to doorbell address %p\n", value, db); + } +} + +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id) + unsigned int doorbell_id) { /* * doorbell_id_offset accounts for doorbells taken by KGD. - * index * doorbell_process_allocation/sizeof(u32) adjusts to - * the process's doorbells. + * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to + * the process's doorbells. The offset returned is in dword + * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_id_offset + process->doorbell_index - * doorbell_process_allocation() / sizeof(u32) + - queue_id; + * kfd_doorbell_process_slice(kfd) / sizeof(u32) + + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } uint64_t kfd_get_number_elems(struct kfd_dev *kfd) { uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - kfd->shared_resources.doorbell_start_offset) / - doorbell_process_allocation() + 1; + kfd_doorbell_process_slice(kfd) + 1; return num_of_elems; @@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process) { return dev->doorbell_base + - process->doorbell_index * doorbell_process_allocation(); + process->doorbell_index * kfd_doorbell_process_slice(dev); } int kfd_alloc_process_doorbells(struct kfd_process *process) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 4890a90f1e44..5562e94e786a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, case KFD_EVENT_TYPE_DEBUG: ret = create_signal_event(devkfd, p, ev); if (!ret) { - *event_page_offset = KFD_MMAP_EVENTS_MASK; + *event_page_offset = KFD_MMAP_TYPE_EVENTS; *event_page_offset <<= PAGE_SHIFT; *event_slot_index = ev->event_id; } @@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", partial_id, valid_id_bits); - if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) { /* With relatively few events, it's faster to * iterate over the event IDR */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 66852de410c8..97d5423c5673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -275,23 +275,35 @@ * for FLAT_* / S_LOAD operations. */ -#define MAKE_GPUVM_APP_BASE(gpu_num) \ +#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \ (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) #define MAKE_GPUVM_APP_LIMIT(base, size) \ (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1) -#define MAKE_SCRATCH_APP_BASE() \ +#define MAKE_SCRATCH_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x100000000L) #define MAKE_SCRATCH_APP_LIMIT(base) \ (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) -#define MAKE_LDS_APP_BASE() \ +#define MAKE_LDS_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x0) #define MAKE_LDS_APP_LIMIT(base) \ (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) +/* On GFXv9 the LDS and scratch apertures are programmed independently + * using the high 16 bits of the 64-bit virtual address. They must be + * in the hole, which will be the case as long as the high 16 bits are + * not 0. + * + * The aperture sizes are still 4GB implicitly. + * + * A GPUVM aperture is not applicable on GFXv9. + */ +#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48) +#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48) + /* User mode manages most of the SVM aperture address space. The low * 16MB are reserved for kernel use (CWSR trap handler and kernel IB * for now). @@ -300,6 +312,55 @@ #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE) #define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE) +static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) +{ + /* + * node id couldn't be 0 - the three MSB bits of + * aperture shoudn't be 0 + */ + pdd->lds_base = MAKE_LDS_APP_BASE_VI(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + if (!pdd->dev->device_info->needs_iommu_device) { + /* dGPUs: SVM aperture starting at 0 + * with small reserved space for kernel. + * Set them to CANONICAL addresses. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + } else { + /* set them to non CANONICAL addresses, and no SVM is + * allocated. + */ + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1); + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base, + pdd->dev->shared_resources.gpuvm_size); + } + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + +static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) +{ + pdd->lds_base = MAKE_LDS_APP_BASE_V9(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + /* Raven needs SVM to support graphic handle, etc. Leave the small + * reserved space before SVM on Raven as well, even though we don't + * have to. + * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they + * are used in Thunk to reserve SVM. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + int kfd_init_apertures(struct kfd_process *process) { uint8_t id = 0; @@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_process_device *pdd; /*Iterating over all devices*/ - while (kfd_topology_enum_kfd_devices(id, &dev) == 0 && - id < NUM_OF_SUPPORTED_GPUS) { - + while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { if (!dev) { id++; /* Skip non GPU devices */ continue; @@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process) pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); - return -1; + return -ENOMEM; } /* * For 64 bit process apertures will be statically reserved in @@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - /* Same LDS and scratch apertures can be used - * on all GPUs. This allows using more dGPUs - * than placement options for apertures. - */ - pdd->lds_base = MAKE_LDS_APP_BASE(); - pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - - pdd->scratch_base = MAKE_SCRATCH_APP_BASE(); - pdd->scratch_limit = - MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + switch (dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + kfd_init_apertures_vi(pdd, id); + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd_init_apertures_v9(pdd, id); + break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + return -EINVAL; + } - if (dev->device_info->needs_iommu_device) { - /* APUs: GPUVM aperture in - * non-canonical address space - */ - pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); - pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT( - pdd->gpuvm_base, - dev->shared_resources.gpuvm_size); - } else { - /* dGPUs: SVM aperture starting at 0 - * with small reserved space for kernel + if (!dev->device_info->needs_iommu_device) { + /* dGPUs: the reserved space for kernel + * before SVM */ - pdd->gpuvm_base = SVM_USER_BASE; - pdd->gpuvm_limit = - dev->shared_resources.gpuvm_size - 1; pdd->qpd.cwsr_base = SVM_CWSR_BASE; pdd->qpd.ib_base = SVM_IB_BASE; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c new file mode 100644 index 000000000000..37029baa3346 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -0,0 +1,92 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "kfd_priv.h" +#include "kfd_events.h" +#include "soc15_int.h" + + +static bool event_interrupt_isr_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + const uint32_t *data = ih_ring_entry; + + /* Only handle interrupts from KFD VMIDs */ + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + + /* If there is no valid PASID, it's likely a firmware bug */ + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + + pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", + client_id, source_id, pasid); + pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); + + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return source_id == SOC15_INTSRC_CP_END_OF_PIPE || + source_id == SOC15_INTSRC_SDMA_TRAP || + source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || + source_id == SOC15_INTSRC_CP_BAD_OPCODE; +} + +static void event_interrupt_wq_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + uint32_t context_id; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) + kfd_signal_event_interrupt(pasid, context_id, 32); + else if (source_id == SOC15_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) + kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(pasid); + else if (client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_UTCL2) { + /* TODO */ + } +} + +const struct kfd_event_interrupt_class event_interrupt_class_v9 = { + .interrupt_isr = event_interrupt_isr_v9, + .interrupt_wq = event_interrupt_wq_v9, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 035c351f47c5..db6d9336b80d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work) { struct kfd_dev *dev = container_of(work, struct kfd_dev, interrupt_work); + uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; - uint32_t ih_ring_entry[DIV_ROUND_UP( - dev->device_info->ih_ring_entry_size, - sizeof(uint32_t))]; + if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { + dev_err_once(kfd_chardev(), "Ring entry too small\n"); + return; + } while (dequeue_ih_ring_entry(dev, ih_ring_entry)) dev->device_info->event_interrupt_class->interrupt_wq(dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 69f496485331..476951d8c91c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, &kq->wptr_mem); if (retval != 0) @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, size_t available_size; size_t queue_size_dwords; uint32_t wptr, rptr; + uint64_t wptr64; unsigned int *queue_address; /* When rptr == wptr, the buffer is empty. @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * the opposite. So we can only use up to queue_size_dwords - 1 dwords. */ rptr = *kq->rptr_kernel; - wptr = *kq->wptr_kernel; + wptr = kq->pending_wptr; + wptr64 = kq->pending_wptr64; queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / 4; @@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * make sure calling functions know * acquire_packet_buffer() failed */ - *buffer_ptr = NULL; - return -ENOMEM; + goto err_no_space; } if (wptr + packet_size_in_dwords >= queue_size_dwords) { /* make sure after rolling back to position 0, there is * still enough space. */ - if (packet_size_in_dwords >= rptr) { - *buffer_ptr = NULL; - return -ENOMEM; - } + if (packet_size_in_dwords >= rptr) + goto err_no_space; + /* fill nops, roll back and start at position 0 */ while (wptr > 0) { queue_address[wptr] = kq->nop_packet; wptr = (wptr + 1) % queue_size_dwords; + wptr64++; } } *buffer_ptr = &queue_address[wptr]; kq->pending_wptr = wptr + packet_size_in_dwords; + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; return 0; + +err_no_space: + *buffer_ptr = NULL; + return -ENOMEM; } static void submit_packet(struct kernel_queue *kq) @@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) pr_debug("\n"); #endif - *kq->wptr_kernel = kq->pending_wptr; - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, - kq->pending_wptr); + kq->ops_asic_specific.submit_packet(kq); } static void rollback_packet(struct kernel_queue *kq) { - kq->pending_wptr = *kq->queue->properties.write_ptr; + if (kq->dev->device_info->doorbell_size == 8) { + kq->pending_wptr64 = *kq->wptr64_kernel; + kq->pending_wptr = *kq->wptr_kernel % + (kq->queue->properties.queue_size / 4); + } else { + kq->pending_wptr = *kq->wptr_kernel; + } } struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + kernel_queue_init_v9(&kq->ops_asic_specific); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 594053136ee4..97aff2041a5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -72,6 +72,7 @@ struct kernel_queue { struct kfd_dev *dev; struct mqd_manager *mqd; struct queue *queue; + uint64_t pending_wptr64; uint32_t pending_wptr; unsigned int nop_packet; @@ -79,7 +80,10 @@ struct kernel_queue { uint32_t *rptr_kernel; uint64_t rptr_gpu_addr; struct kfd_mem_obj *wptr_mem; - uint32_t *wptr_kernel; + union { + uint64_t *wptr64_kernel; + uint32_t *wptr_kernel; + }; uint64_t wptr_gpu_addr; struct kfd_mem_obj *pq; uint64_t pq_gpu_addr; @@ -97,5 +101,6 @@ struct kernel_queue { void kernel_queue_init_cik(struct kernel_queue_ops *ops); void kernel_queue_init_vi(struct kernel_queue_ops *ops); +void kernel_queue_init_v9(struct kernel_queue_ops *ops); #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c index a90eb440b1fb..19e54acb4125 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -26,11 +26,13 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_cik(struct kernel_queue *kq); +static void submit_packet_cik(struct kernel_queue *kq); void kernel_queue_init_cik(struct kernel_queue_ops *ops) { ops->initialize = initialize_cik; ops->uninitialize = uninitialize_cik; + ops->submit_packet = submit_packet_cik; } static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, static void uninitialize_cik(struct kernel_queue *kq) { } + +static void submit_packet_cik(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c new file mode 100644 index 000000000000..684a3bf07efd --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -0,0 +1,340 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_ai.h" +#include "kfd_pm4_opcodes.h" + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_v9(struct kernel_queue *kq); +static void submit_packet_v9(struct kernel_queue *kq); + +void kernel_queue_init_v9(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_v9; + ops->uninitialize = uninitialize_v9; + ops->submit_packet = submit_packet_v9; +} + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + + return true; +} + +static void uninitialize_v9(struct kernel_queue *kq) +{ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); +} + +static void submit_packet_v9(struct kernel_queue *kq) +{ + *kq->wptr64_kernel = kq->pending_wptr64; + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, + kq->pending_wptr64); +} + +static int pm_map_process_v9(struct packet_manager *pm, + uint32_t *buffer, struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + uint64_t vm_page_table_base_addr = + (uint64_t)(qpd->page_table_base) << 12; + + packet = (struct pm4_mes_map_process *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields14.gds_size = qpd->gds_size; + packet->bitfields14.num_gws = qpd->num_gws; + packet->bitfields14.num_oac = qpd->num_oac; + packet->bitfields14.sdma_enable = 1; + packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + packet->vm_context_page_table_base_addr_lo32 = + lower_32_bits(vm_page_table_base_addr); + packet->vm_context_page_table_base_addr_hi32 = + upper_32_bits(vm_page_table_base_addr); + + return 0; +} + +static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->ib_base_hi = upper_32_bits(ib); + + return 0; +} + +static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + + +static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(struct pm4_mec_release_mem)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; + + packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel__mec_release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_v9_pm_funcs = { + .map_process = pm_map_process_v9, + .runlist = pm_runlist_v9, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_v9, + .unmap_queues = pm_unmap_queues_v9, + .query_status = pm_query_status_v9, + .release_mem = pm_release_mem_v9, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index f1d48281e322..bf20c6d32ef3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -22,15 +22,20 @@ */ #include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_vi.h" +#include "kfd_pm4_opcodes.h" static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_vi(struct kernel_queue *kq); +static void submit_packet_vi(struct kernel_queue *kq); void kernel_queue_init_vi(struct kernel_queue_ops *ops) { ops->initialize = initialize_vi; ops->uninitialize = uninitialize_vi; + ops->submit_packet = submit_packet_vi; } static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, @@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq) { kfd_gtt_sa_free(kq->dev, kq->eop_mem); } + +static void submit_packet_vi(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} + +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) +{ + union PM4_MES_TYPE_3_HEADER header; + + header.u32All = 0; + header.opcode = opcode; + header.count = packet_size / 4 - 2; + header.type = PM4_TYPE_3; + + return header.u32All; +} + +static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + + packet = (struct pm4_mes_map_process *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields3.page_table_base = qpd->page_table_base; + packet->bitfields10.gds_size = qpd->gds_size; + packet->bitfields10.num_gws = qpd->num_gws; + packet->bitfields10.num_oac = qpd->num_oac; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; + packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; + + packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + return 0; +} + +static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + if (WARN_ON(!ib)) + return -EFAULT; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->bitfields3.ib_base_hi = upper_32_bits(ib); + + return 0; +} + +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) +{ + struct pm4_mes_set_resources *packet; + + packet = (struct pm4_mes_set_resources *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); + + packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + return 0; +} + +static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + +static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_vi_pm_funcs = { + .map_process = pm_map_process_vi, + .runlist = pm_runlist_vi, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_vi, + .unmap_queues = pm_unmap_queues_vi, + .query_status = pm_query_status_vi, + .release_mem = pm_release_mem_vi, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index e0c07d24d251..76bf2dc8aec4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .interrupt = kgd2kfd_interrupt, .suspend = kgd2kfd_suspend, .resume = kgd2kfd_resume, + .quiesce_mm = kgd2kfd_quiesce_mm, + .resume_mm = kgd2kfd_resume_mm, .schedule_evict_and_restore_process = kgd2kfd_schedule_evict_and_restore_process, }; @@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444); MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); +int vega10_noretry; +module_param_named(noretry, vega10_noretry, int, 0644); +MODULE_PARM_DESC(noretry, + "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); + static int amdkfd_init_completed; int kgd2kfd_init(unsigned int interface_version, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index ee7061e1c466..4b8eb506642b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, case CHIP_POLARIS10: case CHIP_POLARIS11: return mqd_manager_init_vi_tonga(type, dev); + case CHIP_VEGA10: + case CHIP_RAVEN: + return mqd_manager_init_v9(type, dev); default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index c00c325ed3c9..06eaa218eba6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_mqd_base_addr_lo = lower_32_bits(addr); m->cp_mqd_base_addr_hi = upper_32_bits(addr); - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; - /* Although WinKFD writes this, I suspect it should not be necessary */ - m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10); @@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c new file mode 100644 index 000000000000..684054ff02cd --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -0,0 +1,443 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "v9_structs.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + uint64_t addr; + struct v9_mqd *m; + struct kfd_dev *kfd = mm->dev; + + /* From V9, for CWSR, the control stack is located on the next page + * boundary after the mqd, we will use the gtt allocation function + * instead of sub-allocation function. + */ + if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { + *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!*mqd_mem_obj) + return -ENOMEM; + retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, + ALIGN(q->ctl_stack_size, PAGE_SIZE) + + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), + &((*mqd_mem_obj)->gtt_mem), + &((*mqd_mem_obj)->gpu_addr), + (void *)&((*mqd_mem_obj)->cpu_ptr)); + } else + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), + mqd_mem_obj); + if (retval != 0) + return -ENOMEM; + + m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, sizeof(struct v9_mqd)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | + 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; + + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | + 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | + 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_aql_control = + 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; + } + + if (q->tba_addr) { + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, + wptr_shift, 0, mms); +} + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); + + m->cp_hqd_pq_doorbell_control = + q->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + m->cp_hqd_ib_control = + 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | + 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; + + /* + * HW does not clamp this field correctly. Maximum EOP queue size + * is constrained by per-SE EOP done signal count, which is 8-bit. + * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit + * more than (EOP entry count - 1) so a queue size of 0x800 dwords + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control = min(0xA, + order_base_2(q->eop_ring_buffer_size / 4) - 1); + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = + upper_32_bits(q->eop_ring_buffer_address >> 8); + + m->cp_hqd_iq_timer = 0; + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | + 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | + 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = 0; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_destroy + (mm->dev->kgd, mqd, type, timeout, + pipe_id, queue_id); +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + struct kfd_dev *kfd = mm->dev; + + if (mqd_mem_obj->gtt_mem) { + kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); + } else { + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_is_occupied( + mm->dev->kgd, queue_address, + pipe_id, queue_id); +} + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); + + if (retval != 0) + return retval; + + m = get_mqd(*mqd); + + m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | + 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = update_mqd(mm, mqd, q); + + if (retval != 0) + return retval; + + /* TODO: what's the point? update_mqd already does this. */ + m = get_mqd(mqd); + m->cp_hqd_vmid = q->vmid; + return retval; +} + +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct v9_sdma_mqd *m; + + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct v9_sdma_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct v9_sdma_mqd)); + + *mqd = m; + if (gart_addr) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); +} + +#define SDMA_RLC_DUMMY_DEFAULT 0xf + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_sdma_mqd *m; + + m = get_sdma_mqd(mqd); + m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_doorbell_offset = + q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + +/* + * * preempt type here is ignored because there is only one way + * * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +#if defined(CONFIG_DEBUG_FS) + +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_mqd), false); + return 0; +} + +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_sdma_mqd), false); + return 0; +} + +#endif + +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; + + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + case KFD_MQD_TYPE_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 89e4242e43e7..481307b8b4db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 89ba4c670ec5..c317feb43f69 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -26,8 +26,6 @@ #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" #include "kfd_priv.h" -#include "kfd_pm4_headers_vi.h" -#include "kfd_pm4_opcodes.h" static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) @@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, *wptr = temp; } -static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) -{ - union PM4_MES_TYPE_3_HEADER header; - - header.u32All = 0; - header.opcode = opcode; - header.count = packet_size / 4 - 2; - header.type = PM4_TYPE_3; - - return header.u32All; -} - static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) @@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm, pr_debug("Over subscribed runlist\n"); } - map_queue_size = sizeof(struct pm4_mes_map_queues); + map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ - *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + + *rlib_size = process_count * pm->pmf->map_process_size + queue_count * map_queue_size; /* @@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * when over subscription */ if (*over_subscription) - *rlib_size += sizeof(struct pm4_mes_runlist); + *rlib_size += pm->pmf->runlist_size; pr_debug("runlist ib size %d\n", *rlib_size); } @@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + mutex_lock(&pm->lock); + retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, &pm->ib_buffer_obj); if (retval) { pr_err("Failed to allocate runlist IB\n"); - return retval; + goto out; } *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; @@ -121,138 +109,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, memset(*rl_buffer, 0, *rl_buffer_size); pm->allocated = true; - return retval; -} - -static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, - uint64_t ib, size_t ib_size_in_dwords, bool chain) -{ - struct pm4_mes_runlist *packet; - int concurrent_proc_cnt = 0; - struct kfd_dev *kfd = pm->dqm->dev; - - if (WARN_ON(!ib)) - return -EFAULT; - - /* Determine the number of processes to map together to HW: - * it can not exceed the number of VMIDs available to the - * scheduler, and it is determined by the smaller of the number - * of processes in the runlist and kfd module parameter - * hws_max_conc_proc. - * Note: the arbitration between the number of VMIDs and - * hws_max_conc_proc has been done in - * kgd2kfd_device_init(). - */ - concurrent_proc_cnt = min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); - - packet = (struct pm4_mes_runlist *)buffer; - - memset(buffer, 0, sizeof(struct pm4_mes_runlist)); - packet->header.u32All = build_pm4_header(IT_RUN_LIST, - sizeof(struct pm4_mes_runlist)); - - packet->bitfields4.ib_size = ib_size_in_dwords; - packet->bitfields4.chain = chain ? 1 : 0; - packet->bitfields4.offload_polling = 0; - packet->bitfields4.valid = 1; - packet->bitfields4.process_cnt = concurrent_proc_cnt; - packet->ordinal2 = lower_32_bits(ib); - packet->bitfields3.ib_base_hi = upper_32_bits(ib); - - return 0; -} - -static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, - struct qcm_process_device *qpd) -{ - struct pm4_mes_map_process *packet; - - packet = (struct pm4_mes_map_process *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - - packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, - sizeof(struct pm4_mes_map_process)); - packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; - packet->bitfields2.process_quantum = 1; - packet->bitfields2.pasid = qpd->pqm->process->pasid; - packet->bitfields3.page_table_base = qpd->page_table_base; - packet->bitfields10.gds_size = qpd->gds_size; - packet->bitfields10.num_gws = qpd->num_gws; - packet->bitfields10.num_oac = qpd->num_oac; - packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; - - packet->sh_mem_config = qpd->sh_mem_config; - packet->sh_mem_bases = qpd->sh_mem_bases; - packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; - packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; - - packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; - - packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); - packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); - - return 0; -} - -static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, - struct queue *q, bool is_static) -{ - struct pm4_mes_map_queues *packet; - bool use_static = is_static; - - packet = (struct pm4_mes_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); - - packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; - packet->bitfields2.num_queues = 1; - packet->bitfields2.queue_sel = - queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; - - packet->bitfields2.engine_sel = - engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_compute_vi; - - switch (q->properties.type) { - case KFD_QUEUE_TYPE_COMPUTE: - if (use_static) - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_latency_static_queue_vi; - break; - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.queue_type = - queue_type__mes_map_queues__debug_interface_queue_vi; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = q->properties.sdma_engine_id + - engine_sel__mes_map_queues__sdma0_vi; - use_static = false; /* no static queues under SDMA */ - break; - default: - WARN(1, "queue type %d", q->properties.type); - return -EINVAL; - } - packet->bitfields3.doorbell_offset = - q->properties.doorbell_off; - - packet->mqd_addr_lo = - lower_32_bits(q->gart_mqd_addr); - - packet->mqd_addr_hi = - upper_32_bits(q->gart_mqd_addr); - - packet->wptr_addr_lo = - lower_32_bits((uint64_t)q->properties.write_ptr); - - packet->wptr_addr_hi = - upper_32_bits((uint64_t)q->properties.write_ptr); - - return 0; +out: + mutex_unlock(&pm->lock); + return retval; } static int pm_create_runlist_ib(struct packet_manager *pm, @@ -292,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return -ENOMEM; } - retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); + retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); if (retval) return retval; proccesses_mapped++; - inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), + inc_wptr(&rl_wptr, pm->pmf->map_process_size, alloc_size_bytes); list_for_each_entry(kq, &qpd->priv_queue_list, list) { @@ -307,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], kq->queue, qpd->is_debug); @@ -315,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } @@ -326,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], q, qpd->is_debug); @@ -335,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } } @@ -343,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("Finished map process and queues to runlist\n"); if (is_over_subscription) - retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], + retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); @@ -355,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; } -/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size - * of this packet - * @gpu_addr - GPU address of the packet. It's a virtual address. - * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer - * Return - length of the packet - */ -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) -{ - struct pm4_mec_release_mem *packet; - - WARN_ON(!buffer); - - packet = (struct pm4_mec_release_mem *)buffer; - memset(buffer, 0, sizeof(*packet)); - - packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, - sizeof(*packet)); - - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; - packet->bitfields2.tcl1_action_ena = 1; - packet->bitfields2.tc_action_ena = 1; - packet->bitfields2.cache_policy = cache_policy___release_mem__lru; - packet->bitfields2.atc = 0; - - packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; - packet->bitfields3.int_sel = - int_sel___release_mem__send_interrupt_after_write_confirm; - - packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; - packet->address_hi = upper_32_bits(gpu_addr); - - packet->data_lo = 0; - - return sizeof(*packet) / sizeof(unsigned int); -} - int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { + switch (dqm->dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + /* PM4 packet structures on CIK are the same as on VI */ + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + pm->pmf = &kfd_vi_pm_funcs; + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + pm->pmf = &kfd_v9_pm_funcs; + break; + default: + WARN(1, "Unexpected ASIC family %u", + dqm->dev->device_info->asic_family); + return -EINVAL; + } + pm->dqm = dqm; mutex_init(&pm->lock); pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); @@ -415,38 +259,25 @@ void pm_uninit(struct packet_manager *pm) int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res) { - struct pm4_mes_set_resources *packet; + uint32_t *buffer, size; int retval = 0; + size = pm->pmf->set_resources_size; mutex_lock(&pm->lock); pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, - sizeof(*packet) / sizeof(uint32_t), - (unsigned int **)&packet); - if (!packet) { + size / sizeof(uint32_t), + (unsigned int **)&buffer); + if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; goto out; } - memset(packet, 0, sizeof(struct pm4_mes_set_resources)); - packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, - sizeof(struct pm4_mes_set_resources)); - - packet->bitfields2.queue_type = - queue_type__mes_set_resources__hsa_interface_queue_hiq; - packet->bitfields2.vmid_mask = res->vmid_mask; - packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; - packet->bitfields7.oac_mask = res->oac_mask; - packet->bitfields8.gds_heap_base = res->gds_heap_base; - packet->bitfields8.gds_heap_size = res->gds_heap_size; - - packet->gws_mask_lo = lower_32_bits(res->gws_mask); - packet->gws_mask_hi = upper_32_bits(res->gws_mask); - - packet->queue_mask_lo = lower_32_bits(res->queue_mask); - packet->queue_mask_hi = upper_32_bits(res->queue_mask); - - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->set_resources(pm, buffer, res); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); out: mutex_unlock(&pm->lock); @@ -468,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); + packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, @@ -476,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_acquire_packet_buffer; - retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, + retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false); if (retval) goto fail_create_runlist; @@ -499,37 +330,29 @@ fail_create_runlist_ib: int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value) { - int retval; - struct pm4_mes_query_status *packet; + uint32_t *buffer, size; + int retval = 0; if (WARN_ON(!fence_address)) return -EFAULT; + size = pm->pmf->query_status_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), - (unsigned int **)&packet); - if (retval) - goto fail_acquire_packet_buffer; - - packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, - sizeof(struct pm4_mes_query_status)); - - packet->bitfields2.context_id = 0; - packet->bitfields2.interrupt_sel = - interrupt_sel__mes_query_status__completion_status; - packet->bitfields2.command = - command__mes_query_status__fence_only_after_write_ack; - - packet->addr_hi = upper_32_bits((uint64_t)fence_address); - packet->addr_lo = lower_32_bits((uint64_t)fence_address); - packet->data_hi = upper_32_bits((uint64_t)fence_value); - packet->data_lo = lower_32_bits((uint64_t)fence_value); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; + } - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); -fail_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } @@ -539,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, uint32_t filter_param, bool reset, unsigned int sdma_engine) { - int retval; - uint32_t *buffer; - struct pm4_mes_unmap_queues *packet; + uint32_t *buffer, size; + int retval = 0; + size = pm->pmf->unmap_queues_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), - &buffer); - if (retval) - goto err_acquire_packet_buffer; - - packet = (struct pm4_mes_unmap_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n", - filter, reset, type); - packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, - sizeof(struct pm4_mes_unmap_queues)); - switch (type) { - case KFD_QUEUE_TYPE_COMPUTE: - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__compute; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__sdma0 + sdma_engine; - break; - default: - WARN(1, "queue type %d", type); - retval = -EINVAL; - goto err_invalid; + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; } - if (reset) - packet->bitfields2.action = - action__mes_unmap_queues__reset_queues; + retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param, + reset, sdma_engine); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); else - packet->bitfields2.action = - action__mes_unmap_queues__preempt_queues; - - switch (filter) { - case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_specified_queues; - packet->bitfields2.num_queues = 1; - packet->bitfields3b.doorbell_offset0 = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_BY_PASID: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; - packet->bitfields3a.pasid = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_queues; - break; - case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: - /* in this case, we do not preempt static queues */ - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_non_static_queues; - break; - default: - WARN(1, "filter %d", filter); - retval = -EINVAL; - goto err_invalid; - } + pm->priv_queue->ops.rollback_packet(pm->priv_queue); - pm->priv_queue->ops.submit_packet(pm->priv_queue); - - mutex_unlock(&pm->lock); - return 0; - -err_invalid: - pm->priv_queue->ops.rollback_packet(pm->priv_queue); -err_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h new file mode 100644 index 000000000000..f2bcf5c092ea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -0,0 +1,583 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef F32_MES_PM4_PACKETS_H +#define F32_MES_PM4_PACKETS_H + +#ifndef PM4_MES_HEADER_DEFINED +#define PM4_MES_HEADER_DEFINED +union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1 : 8; /* < reserved */ + uint32_t opcode : 8; /* < IT opcode */ + uint32_t count : 14;/* < number of DWORDs - 1 in the + * information body. + */ + uint32_t type : 2; /* < packet identifier. + * It should be 3 for type 3 packets + */ + }; + uint32_t u32All; +}; +#endif /* PM4_MES_HEADER_DEFINED */ + +/*--------------------MES_SET_RESOURCES--------------------*/ + +#ifndef PM4_MES_SET_RESOURCES_DEFINED +#define PM4_MES_SET_RESOURCES_DEFINED +enum mes_set_resources_queue_type_enum { + queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, + queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, + queue_type__mes_set_resources__hsa_debug_interface_queue = 4 +}; + + +struct pm4_mes_set_resources { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t vmid_mask:16; + uint32_t unmap_latency:8; + uint32_t reserved1:5; + enum mes_set_resources_queue_type_enum queue_type:3; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t queue_mask_lo; + uint32_t queue_mask_hi; + uint32_t gws_mask_lo; + uint32_t gws_mask_hi; + + union { + struct { + uint32_t oac_mask:16; + uint32_t reserved2:16; + } bitfields7; + uint32_t ordinal7; + }; + + union { + struct { + uint32_t gds_heap_base:6; + uint32_t reserved3:5; + uint32_t gds_heap_size:6; + uint32_t reserved4:15; + } bitfields8; + uint32_t ordinal8; + }; + +}; +#endif + +/*--------------------MES_RUN_LIST--------------------*/ + +#ifndef PM4_MES_RUN_LIST_DEFINED +#define PM4_MES_RUN_LIST_DEFINED + +struct pm4_mes_runlist { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:2; + uint32_t ib_base_lo:30; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t ib_base_hi; + + union { + struct { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; + uint32_t reserved2:1; + uint32_t valid:1; + uint32_t process_cnt:4; + uint32_t reserved3:4; + } bitfields4; + uint32_t ordinal4; + }; + +}; +#endif + +/*--------------------MES_MAP_PROCESS--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_DEFINED +#define PM4_MES_MAP_PROCESS_DEFINED + +struct pm4_mes_map_process { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:8; + uint32_t diq_enable:1; + uint32_t process_quantum:7; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t vm_context_page_table_base_addr_lo32; + + uint32_t vm_context_page_table_base_addr_hi32; + + uint32_t sh_mem_bases; + + uint32_t sh_mem_config; + + uint32_t sq_shader_tba_lo; + + uint32_t sq_shader_tba_hi; + + uint32_t sq_shader_tma_lo; + + uint32_t sq_shader_tma_hi; + + uint32_t reserved6; + + uint32_t gds_addr_lo; + + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:6; + uint32_t reserved7:1; + uint32_t sdma_enable:1; + uint32_t num_oac:4; + uint32_t reserved8:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields14; + uint32_t ordinal14; + }; + + uint32_t completion_signal_lo; + + uint32_t completion_signal_hi; + +}; + +#endif + +/*--------------------MES_MAP_PROCESS_VM--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED +#define PM4_MES_MAP_PROCESS_VM_DEFINED + +struct PM4_MES_MAP_PROCESS_VM { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + uint32_t reserved1; + + uint32_t vm_context_cntl; + + uint32_t reserved2; + + uint32_t vm_context_page_table_end_addr_lo32; + + uint32_t vm_context_page_table_end_addr_hi32; + + uint32_t vm_context_page_table_start_addr_lo32; + + uint32_t vm_context_page_table_start_addr_hi32; + + uint32_t reserved3; + + uint32_t reserved4; + + uint32_t reserved5; + + uint32_t reserved6; + + uint32_t reserved7; + + uint32_t reserved8; + + uint32_t completion_signal_lo32; + + uint32_t completion_signal_hi32; + +}; +#endif + +/*--------------------MES_MAP_QUEUES--------------------*/ + +#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED +#define PM4_MES_MAP_QUEUES_VI_DEFINED +enum mes_map_queues_queue_sel_enum { + queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0, +queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1 +}; + +enum mes_map_queues_queue_type_enum { + queue_type__mes_map_queues__normal_compute_vi = 0, + queue_type__mes_map_queues__debug_interface_queue_vi = 1, + queue_type__mes_map_queues__normal_latency_static_queue_vi = 2, +queue_type__mes_map_queues__low_latency_static_queue_vi = 3 +}; + +enum mes_map_queues_alloc_format_enum { + alloc_format__mes_map_queues__one_per_pipe_vi = 0, +alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 +}; + +enum mes_map_queues_engine_sel_enum { + engine_sel__mes_map_queues__compute_vi = 0, + engine_sel__mes_map_queues__sdma0_vi = 2, + engine_sel__mes_map_queues__sdma1_vi = 3 +}; + + +struct pm4_mes_map_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:4; + enum mes_map_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:15; + enum mes_map_queues_queue_type_enum queue_type:3; + enum mes_map_queues_alloc_format_enum alloc_format:2; + enum mes_map_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t reserved3:1; + uint32_t check_disable:1; + uint32_t doorbell_offset:26; + uint32_t reserved4:4; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t mqd_addr_lo; + uint32_t mqd_addr_hi; + uint32_t wptr_addr_lo; + uint32_t wptr_addr_hi; +}; +#endif + +/*--------------------MES_QUERY_STATUS--------------------*/ + +#ifndef PM4_MES_QUERY_STATUS_DEFINED +#define PM4_MES_QUERY_STATUS_DEFINED +enum mes_query_status_interrupt_sel_enum { + interrupt_sel__mes_query_status__completion_status = 0, + interrupt_sel__mes_query_status__process_status = 1, + interrupt_sel__mes_query_status__queue_status = 2 +}; + +enum mes_query_status_command_enum { + command__mes_query_status__interrupt_only = 0, + command__mes_query_status__fence_only_immediate = 1, + command__mes_query_status__fence_only_after_write_ack = 2, + command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 +}; + +enum mes_query_status_engine_sel_enum { + engine_sel__mes_query_status__compute = 0, + engine_sel__mes_query_status__sdma0_queue = 2, + engine_sel__mes_query_status__sdma1_queue = 3 +}; + +struct pm4_mes_query_status { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t context_id:28; + enum mes_query_status_interrupt_sel_enum interrupt_sel:2; + enum mes_query_status_command_enum command:2; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:16; + } bitfields3a; + struct { + uint32_t reserved2:2; + uint32_t doorbell_offset:26; + enum mes_query_status_engine_sel_enum engine_sel:3; + uint32_t reserved3:1; + } bitfields3b; + uint32_t ordinal3; + }; + + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data_lo; + uint32_t data_hi; +}; +#endif + +/*--------------------MES_UNMAP_QUEUES--------------------*/ + +#ifndef PM4_MES_UNMAP_QUEUES_DEFINED +#define PM4_MES_UNMAP_QUEUES_DEFINED +enum mes_unmap_queues_action_enum { + action__mes_unmap_queues__preempt_queues = 0, + action__mes_unmap_queues__reset_queues = 1, + action__mes_unmap_queues__disable_process_queues = 2, + action__mes_unmap_queues__reserved = 3 +}; + +enum mes_unmap_queues_queue_sel_enum { + queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, + queue_sel__mes_unmap_queues__unmap_all_queues = 2, + queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3 +}; + +enum mes_unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__compute = 0, + engine_sel__mes_unmap_queues__sdma0 = 2, + engine_sel__mes_unmap_queues__sdmal = 3 +}; + +struct pm4_mes_unmap_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + enum mes_unmap_queues_action_enum action:2; + uint32_t reserved1:2; + enum mes_unmap_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:20; + enum mes_unmap_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved3:16; + } bitfields3a; + struct { + uint32_t reserved4:2; + uint32_t doorbell_offset0:26; + int32_t reserved5:4; + } bitfields3b; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t reserved6:2; + uint32_t doorbell_offset1:26; + uint32_t reserved7:4; + } bitfields4; + uint32_t ordinal4; + }; + + union { + struct { + uint32_t reserved8:2; + uint32_t doorbell_offset2:26; + uint32_t reserved9:4; + } bitfields5; + uint32_t ordinal5; + }; + + union { + struct { + uint32_t reserved10:2; + uint32_t doorbell_offset3:26; + uint32_t reserved11:4; + } bitfields6; + uint32_t ordinal6; + }; +}; +#endif + +#ifndef PM4_MEC_RELEASE_MEM_DEFINED +#define PM4_MEC_RELEASE_MEM_DEFINED + +enum mec_release_mem_event_index_enum { + event_index__mec_release_mem__end_of_pipe = 5, + event_index__mec_release_mem__shader_done = 6 +}; + +enum mec_release_mem_cache_policy_enum { + cache_policy__mec_release_mem__lru = 0, + cache_policy__mec_release_mem__stream = 1 +}; + +enum mec_release_mem_pq_exe_status_enum { + pq_exe_status__mec_release_mem__default = 0, + pq_exe_status__mec_release_mem__phase_update = 1 +}; + +enum mec_release_mem_dst_sel_enum { + dst_sel__mec_release_mem__memory_controller = 0, + dst_sel__mec_release_mem__tc_l2 = 1, + dst_sel__mec_release_mem__queue_write_pointer_register = 2, + dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3 +}; + +enum mec_release_mem_int_sel_enum { + int_sel__mec_release_mem__none = 0, + int_sel__mec_release_mem__send_interrupt_only = 1, + int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2, + int_sel__mec_release_mem__send_data_after_write_confirm = 3, + int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6 +}; + +enum mec_release_mem_data_sel_enum { + data_sel__mec_release_mem__none = 0, + data_sel__mec_release_mem__send_32_bit_low = 1, + data_sel__mec_release_mem__send_64_bit_data = 2, + data_sel__mec_release_mem__send_gpu_clock_counter = 3, + data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4, + data_sel__mec_release_mem__store_gds_data_to_memory = 5 +}; + +struct pm4_mec_release_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int event_type:6; + unsigned int reserved1:2; + enum mec_release_mem_event_index_enum event_index:4; + unsigned int tcl1_vol_action_ena:1; + unsigned int tc_vol_action_ena:1; + unsigned int reserved2:1; + unsigned int tc_wb_action_ena:1; + unsigned int tcl1_action_ena:1; + unsigned int tc_action_ena:1; + uint32_t reserved3:1; + uint32_t tc_nc_action_ena:1; + uint32_t tc_wc_action_ena:1; + uint32_t tc_md_action_ena:1; + uint32_t reserved4:3; + enum mec_release_mem_cache_policy_enum cache_policy:2; + uint32_t reserved5:2; + enum mec_release_mem_pq_exe_status_enum pq_exe_status:1; + uint32_t reserved6:2; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + uint32_t reserved7:16; + enum mec_release_mem_dst_sel_enum dst_sel:2; + uint32_t reserved8:6; + enum mec_release_mem_int_sel_enum int_sel:3; + uint32_t reserved9:2; + enum mec_release_mem_data_sel_enum data_sel:3; + } bitfields3; + unsigned int ordinal3; + }; + + union { + struct { + uint32_t reserved10:2; + unsigned int address_lo_32b:30; + } bitfields4; + struct { + uint32_t reserved11:3; + uint32_t address_lo_64b:29; + } bitfields4b; + uint32_t reserved12; + unsigned int ordinal4; + }; + + union { + uint32_t address_hi; + uint32_t reserved13; + uint32_t ordinal5; + }; + + union { + uint32_t data_lo; + uint32_t cmp_data_lo; + struct { + uint32_t dw_offset:16; + uint32_t num_dwords:16; + } bitfields6c; + uint32_t reserved14; + uint32_t ordinal6; + }; + + union { + uint32_t data_hi; + uint32_t cmp_data_hi; + uint32_t reserved15; + uint32_t reserved16; + uint32_t ordinal7; + }; + + uint32_t int_ctxid; + +}; + +#endif + +enum { + CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 +}; +#endif + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 96a9cc0f02c9..5e3990bb4c4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -39,11 +39,37 @@ #include "amd_shared.h" +#define KFD_MAX_RING_ENTRY_SIZE 8 + #define KFD_SYSFS_FILE_MODE 0444 -#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull -#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull -#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull +/* GPU ID hash width in bits */ +#define KFD_GPU_ID_HASH_WIDTH 16 + +/* Use upper bits of mmap offset to store KFD driver specific information. + * BITS[63:62] - Encode MMAP type + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to + * BITS[45:0] - MMAP offset value + * + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these + * defines are w.r.t to PAGE_SIZE + */ +#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) + +#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) +#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ + << KFD_MMAP_GPU_ID_SHIFT) +#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ + & KFD_MMAP_GPU_ID_MASK) +#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ + >> KFD_MMAP_GPU_ID_SHIFT) + +#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) +#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) /* * When working with cp scheduler we should assign the HIQ manually or via @@ -55,9 +81,6 @@ #define KFD_CIK_HIQ_PIPE 4 #define KFD_CIK_HIQ_QUEUE 0 -/* GPU ID hash width in bits */ -#define KFD_GPU_ID_HASH_WIDTH 16 - /* Macro for allocating structures */ #define kfd_alloc_struct(ptr_to_struct) \ ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) @@ -116,6 +139,11 @@ extern int debug_largebar; */ extern int ignore_crat; +/* + * Set sh_mem_config.retry_disable on Vega10 + */ +extern int vega10_noretry; + /** * enum kfd_sched_policy * @@ -148,6 +176,8 @@ enum cache_policy { cache_policy_noncoherent }; +#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) + struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); @@ -160,6 +190,7 @@ struct kfd_device_info { const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; + unsigned int doorbell_size; size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; @@ -173,6 +204,7 @@ struct kfd_mem_obj { uint32_t range_end; uint64_t gpu_addr; uint32_t *cpu_ptr; + void *gtt_mem; }; struct kfd_vmid_info { @@ -364,7 +396,7 @@ struct queue_properties { uint32_t queue_percent; uint32_t *read_ptr; uint32_t *write_ptr; - uint32_t __iomem *doorbell_ptr; + void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; bool is_evicted; @@ -427,6 +459,7 @@ struct queue { uint32_t queue; unsigned int sdma_id; + unsigned int doorbell_id; struct kfd_process *process; struct kfd_dev *device; @@ -501,6 +534,9 @@ struct qcm_process_device { /* IB memory */ uint64_t ib_base; void *ib_kaddr; + + /* doorbell resources per process per device */ + unsigned long *doorbell_bitmap; }; /* KFD Memory Eviction */ @@ -512,6 +548,8 @@ struct qcm_process_device { /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10 +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); @@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); +int kfd_process_evict_queues(struct kfd_process *p); +int kfd_process_restore_queues(struct kfd_process *p); void kfd_suspend_all_processes(void); int kfd_resume_all_processes(void); @@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); /* KFD process API for creating and translating handles */ @@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void); void kfd_pasid_free(unsigned int pasid); /* Doorbells */ +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); int kfd_doorbell_init(struct kfd_dev *kfd); void kfd_doorbell_fini(struct kfd_dev *kfd); -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma); +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); u32 read_kernel_doorbell(u32 __iomem *db); -void write_kernel_doorbell(u32 __iomem *db, u32 value); -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell(void __iomem *db, u32 value); +void write_kernel_doorbell64(void __iomem *db, u64 value); +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id); + unsigned int doorbell_id); phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process); int kfd_alloc_process_doorbells(struct kfd_process *process); @@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -832,8 +877,42 @@ struct packet_manager { bool allocated; struct kfd_mem_obj *ib_buffer_obj; unsigned int ib_size_bytes; + + const struct packet_manager_funcs *pmf; +}; + +struct packet_manager_funcs { + /* Support ASIC-specific packet formats for PM4 packets */ + int (*map_process)(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd); + int (*runlist)(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain); + int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); + int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static); + int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter mode, + uint32_t filter_param, bool reset, + unsigned int sdma_engine); + int (*query_status)(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value); + int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); + + /* Packet sizes */ + int map_process_size; + int runlist_size; + int set_resources_size; + int map_queues_size; + int unmap_queues_size; + int query_status_size; + int release_mem_size; }; +extern const struct packet_manager_funcs kfd_vi_pm_funcs; +extern const struct packet_manager_funcs kfd_v9_pm_funcs; + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); void pm_uninit(struct packet_manager *pm); int pm_send_set_resources(struct packet_manager *pm, @@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); +/* Following PM funcs can be shared among VI and AI */ +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ extern const struct kfd_event_interrupt_class event_interrupt_class_cik; +extern const struct kfd_event_interrupt_class event_interrupt_class_v9; + extern const struct kfd_device_global_init_class device_global_init_class_cik; void kfd_event_init_process(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1711ad0642f7..1d80b4f7c681 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); + kfree(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); kfree(pdd); @@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; - offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) + << PAGE_SHIFT; qpd->tba_addr = (int64_t)vm_mmap(filep, 0, KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, offset); @@ -585,6 +587,31 @@ err_alloc_process: return ERR_PTR(err); } +static int init_doorbell_bitmap(struct qcm_process_device *qpd, + struct kfd_dev *dev) +{ + unsigned int i; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) + return 0; + + qpd->doorbell_bitmap = + kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + BITS_PER_BYTE), GFP_KERNEL); + if (!qpd->doorbell_bitmap) + return -ENOMEM; + + /* Mask out any reserved doorbells */ + for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++) + if ((dev->shared_resources.reserved_doorbell_mask & i) == + dev->shared_resources.reserved_doorbell_val) { + set_bit(i, qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x\n", i); + } + + return 0; +} + struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p) { @@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; + if (init_doorbell_bitmap(&pdd->qpd, dev)) { + pr_err("Failed to init doorbell for process\n"); + kfree(pdd); + return NULL; + } + pdd->dev = dev; INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); @@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -static int process_evict_queues(struct kfd_process *p) +int kfd_process_evict_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r = 0; @@ -844,7 +877,7 @@ fail: } /* process_restore_queues - Restore all user queues of a process */ -static int process_restore_queues(struct kfd_process *p) +int kfd_process_restore_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r, ret = 0; @@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work) flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid %d\n", p->pasid); - ret = process_evict_queues(p); + ret = kfd_process_evict_queues(p); if (!ret) { dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work) return; } - ret = process_restore_queues(p); + ret = kfd_process_restore_queues(p); if (!ret) pr_debug("Finished restoring pasid %d\n", p->pasid); else @@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - if (process_evict_queues(p)) + if (kfd_process_evict_queues(p)) pr_err("Failed to suspend process %d\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void) return ret; } -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma) { - struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); struct kfd_process_device *pdd; struct qcm_process_device *qpd; - if (!dev) - return -EINVAL; if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { pr_err("Incorrect CWSR mapping size.\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7817e327ea6d..d65ce0436b31 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; - q_properties->doorbell_off = - kfd_queue_id_to_doorbell(dev, pqm->process, qid); - /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; @@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("DQM create queue failed\n"); + pr_err("Pasid %d DQM create queue %d failed. ret %d\n", + pqm->process->pasid, type, retval); goto err_create_queue; } + if (q) + /* Return the doorbell offset within the doorbell page + * to the caller so it can be passed up to user mode + * (in bytes). + */ + properties->doorbell_off = + (q->properties.doorbell_off * sizeof(uint32_t)) & + (kfd_doorbell_process_slice(dev) - 1); + pr_debug("PQM After DQM create queue\n"); list_add(&pqn->process_queue_list, &pqm->queues); @@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { - pr_debug("Destroy queue failed, returned %d\n", retval); - goto err_destroy_queue; + pr_err("Pasid %d destroy queue %d failed, ret %d\n", + pqm->process->pasid, + pqn->q->properties.queue_id, retval); + if (retval != -ETIME) + goto err_destroy_queue; } uninit_queue(pqn->q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a5315d4f1c95..6dcd621e5b71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q) pr_debug("Queue Address: 0x%llX\n", q->queue_address); pr_debug("Queue Id: %u\n", q->queue_id); pr_debug("Queue Process Vmid: %u\n", q->vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off); } @@ -53,8 +53,8 @@ void print_queue(struct queue *q) pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address); pr_debug("Queue Id: %u\n", q->properties.queue_id); pr_debug("Queue Process Vmid: %u\n", q->properties.vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off); pr_debug("Queue MQD Address: 0x%p\n", q->mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ac28abc94e57..bc95d4dfee2e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; default: WARN(1, "Unexpected ASIC family %u", dev->gpu->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index eb54cfcaf039..7d9c3f948dff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -45,6 +45,7 @@ #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 struct kfd_node_properties { diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h new file mode 100644 index 000000000000..0bc0b25cb410 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -0,0 +1,47 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef HSA_SOC15_INT_H_INCLUDED +#define HSA_SOC15_INT_H_INCLUDED + +#include "soc15_ih_clientid.h" + +#define SOC15_INTSRC_CP_END_OF_PIPE 181 +#define SOC15_INTSRC_CP_BAD_OPCODE 183 +#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 +#define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_SDMA_TRAP 224 + + +#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff) +#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff) +#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff) +#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf) +#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1) +#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff) +#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4])) +#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5])) +#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6])) +#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7])) + +#endif + diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 237289a72bb7..5733fbee07f7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means Queue n is available for KFD */ DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); + /* Doorbell assignments (SOC15 and later chips only). Only + * specific doorbells are routed to each SDMA engine. Others + * are routed to IH and VCN. They are not usable by the CP. + * + * Any doorbell number D that satisfies the following condition + * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val + * + * KFD currently uses 1024 (= 0x3ff) doorbells per process. If + * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means + * mask would be set to 0x1f8 and val set to 0x0f0. + */ + unsigned int sdma_doorbell[2][2]; + unsigned int reserved_doorbell_mask; + unsigned int reserved_doorbell_val; + /* Base address of doorbell aperture. */ phys_addr_t doorbell_physical_address; @@ -173,8 +188,6 @@ struct tile_config { * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp * scheduling mode. Only used for no cp scheduling mode. * - * @init_pipeline: Initialized the compute pipelines. - * * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp * sceduling mode. * @@ -274,9 +287,6 @@ struct kfd2kgd_calls { int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); - int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); - int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, @@ -382,6 +392,10 @@ struct kfd2kgd_calls { * * @resume: Notifies amdkfd about a resume action done to a kgd device * + * @quiesce_mm: Quiesce all user queue access to specified MM address space + * + * @resume_mm: Resume user queue access to specified MM address space + * * @schedule_evict_and_restore_process: Schedules work queue that will prepare * for safe eviction of KFD BOs that belong to the specified process. * @@ -399,6 +413,8 @@ struct kgd2kfd_calls { void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); void (*suspend)(struct kfd_dev *kfd); int (*resume)(struct kfd_dev *kfd); + int (*quiesce_mm)(struct mm_struct *mm); + int (*resume_mm)(struct mm_struct *mm); int (*schedule_evict_and_restore_process)(struct mm_struct *mm, struct dma_fence *fence); }; diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index 2fb25abaf7c8..ceaf4932258d 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -29,10 +29,10 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_rb_base; uint32_t sdmax_rlcx_rb_base_hi; uint32_t sdmax_rlcx_rb_rptr; + uint32_t sdmax_rlcx_rb_rptr_hi; uint32_t sdmax_rlcx_rb_wptr; + uint32_t sdmax_rlcx_rb_wptr_hi; uint32_t sdmax_rlcx_rb_wptr_poll_cntl; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; uint32_t sdmax_rlcx_rb_rptr_addr_hi; uint32_t sdmax_rlcx_rb_rptr_addr_lo; uint32_t sdmax_rlcx_ib_cntl; @@ -44,29 +44,29 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_skip_cntl; uint32_t sdmax_rlcx_context_status; uint32_t sdmax_rlcx_doorbell; - uint32_t sdmax_rlcx_virtual_addr; - uint32_t sdmax_rlcx_ape1_cntl; + uint32_t sdmax_rlcx_status; uint32_t sdmax_rlcx_doorbell_log; - uint32_t reserved_22; - uint32_t reserved_23; - uint32_t reserved_24; - uint32_t reserved_25; - uint32_t reserved_26; - uint32_t reserved_27; - uint32_t reserved_28; - uint32_t reserved_29; - uint32_t reserved_30; - uint32_t reserved_31; - uint32_t reserved_32; - uint32_t reserved_33; - uint32_t reserved_34; - uint32_t reserved_35; - uint32_t reserved_36; - uint32_t reserved_37; - uint32_t reserved_38; - uint32_t reserved_39; - uint32_t reserved_40; - uint32_t reserved_41; + uint32_t sdmax_rlcx_watermark; + uint32_t sdmax_rlcx_doorbell_offset; + uint32_t sdmax_rlcx_csa_addr_lo; + uint32_t sdmax_rlcx_csa_addr_hi; + uint32_t sdmax_rlcx_ib_sub_remain; + uint32_t sdmax_rlcx_preempt; + uint32_t sdmax_rlcx_dummy_reg; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; + uint32_t sdmax_rlcx_rb_aql_cntl; + uint32_t sdmax_rlcx_minor_ptr_update; + uint32_t sdmax_rlcx_midcmd_data0; + uint32_t sdmax_rlcx_midcmd_data1; + uint32_t sdmax_rlcx_midcmd_data2; + uint32_t sdmax_rlcx_midcmd_data3; + uint32_t sdmax_rlcx_midcmd_data4; + uint32_t sdmax_rlcx_midcmd_data5; + uint32_t sdmax_rlcx_midcmd_data6; + uint32_t sdmax_rlcx_midcmd_data7; + uint32_t sdmax_rlcx_midcmd_data8; + uint32_t sdmax_rlcx_midcmd_cntl; uint32_t reserved_42; uint32_t reserved_43; uint32_t reserved_44; diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig index 592b9d2ec034..944e440c4fde 100644 --- a/drivers/gpu/drm/bridge/adv7511/Kconfig +++ b/drivers/gpu/drm/bridge/adv7511/Kconfig @@ -1,5 +1,5 @@ config DRM_I2C_ADV7511 - tristate "AV7511 encoder" + tristate "ADV7511 encoder" depends on OF select DRM_KMS_HELPER select REGMAP_I2C diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 2614cea538e2..73021b388e12 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1127,7 +1127,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) } if (adv7511->gpio_pd) { - mdelay(5); + usleep_range(5000, 6000); gpiod_set_value_cansleep(adv7511->gpio_pd, 0); } diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c index 498d5948d1a8..9837c8d69e69 100644 --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c @@ -56,7 +56,9 @@ static int dumb_vga_get_modes(struct drm_connector *connector) } drm_mode_connector_update_edid_property(connector, edid); - return drm_add_edid_modes(connector, edid); + ret = drm_add_edid_modes(connector, edid); + kfree(edid); + return ret; fallback: /* diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 3c181d217f8b..26a22f529e9b 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1425,7 +1425,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; - + /* Nothing to do for same crtc*/ + if (plane_state->crtc == crtc) + return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 9cb2209f6fc8..130da5195f3b 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -766,7 +766,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, if (crtc_state->enable) drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2); - plane_state->visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale); + plane_state->visible = drm_rect_clip_scaled(src, dst, &clip); drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index b3cde897cd80..9b9ba5d5ec0c 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1069,7 +1069,7 @@ int drm_mode_create_tv_properties(struct drm_device *dev, goto nomem; for (i = 0; i < num_modes; i++) - drm_property_add_enum(dev->mode_config.tv_mode_property, i, + drm_property_add_enum(dev->mode_config.tv_mode_property, i, modes[i]); dev->mode_config.tv_brightness_property = @@ -1156,7 +1156,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, { struct drm_device *dev = connector->dev; struct drm_property *scaling_mode_property; - int i, j = 0; + int i; const unsigned valid_scaling_mode_mask = (1U << ARRAY_SIZE(drm_scaling_mode_enum_list)) - 1; @@ -1177,7 +1177,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, if (!(BIT(i) & scaling_mode_mask)) continue; - ret = drm_property_add_enum(scaling_mode_property, j++, + ret = drm_property_add_enum(scaling_mode_property, drm_scaling_mode_enum_list[i].type, drm_scaling_mode_enum_list[i].name); @@ -1531,8 +1531,10 @@ static struct drm_encoder *drm_connector_get_encoder(struct drm_connector *conne return connector->encoder; } -static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode, - const struct drm_file *file_priv) +static bool +drm_mode_expose_to_userspace(const struct drm_display_mode *mode, + const struct list_head *export_list, + const struct drm_file *file_priv) { /* * If user-space hasn't configured the driver to expose the stereo 3D @@ -1540,6 +1542,23 @@ static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode, */ if (!file_priv->stereo_allowed && drm_mode_is_stereo(mode)) return false; + /* + * If user-space hasn't configured the driver to expose the modes + * with aspect-ratio, don't expose them. However if such a mode + * is unique, let it be exposed, but reset the aspect-ratio flags + * while preparing the list of user-modes. + */ + if (!file_priv->aspect_ratio_allowed) { + struct drm_display_mode *mode_itr; + + list_for_each_entry(mode_itr, export_list, export_head) + if (drm_mode_match(mode_itr, mode, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) + return false; + } return true; } @@ -1559,6 +1578,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, struct drm_mode_modeinfo u_mode; struct drm_mode_modeinfo __user *mode_ptr; uint32_t __user *encoder_ptr; + LIST_HEAD(export_list); if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -1607,21 +1627,31 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, /* delayed so we get modes regardless of pre-fill_modes state */ list_for_each_entry(mode, &connector->modes, head) - if (drm_mode_expose_to_userspace(mode, file_priv)) + if (drm_mode_expose_to_userspace(mode, &export_list, + file_priv)) { + list_add_tail(&mode->export_head, &export_list); mode_count++; + } /* * This ioctl is called twice, once to determine how much space is * needed, and the 2nd time to fill it. + * The modes that need to be exposed to the user are maintained in the + * 'export_list'. When the ioctl is called first time to determine the, + * space, the export_list gets filled, to find the no.of modes. In the + * 2nd time, the user modes are filled, one by one from the export_list. */ if ((out_resp->count_modes >= mode_count) && mode_count) { copied = 0; mode_ptr = (struct drm_mode_modeinfo __user *)(unsigned long)out_resp->modes_ptr; - list_for_each_entry(mode, &connector->modes, head) { - if (!drm_mode_expose_to_userspace(mode, file_priv)) - continue; - + list_for_each_entry(mode, &export_list, export_head) { drm_mode_convert_to_umode(&u_mode, mode); + /* + * Reset aspect ratio flags of user-mode, if modes with + * aspect-ratio are not supported. + */ + if (!file_priv->aspect_ratio_allowed) + u_mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; if (copy_to_user(mode_ptr + copied, &u_mode, sizeof(u_mode))) { ret = -EFAULT; diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index a231dd5dce16..98a36e6c69ad 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -449,6 +449,8 @@ int drm_mode_getcrtc(struct drm_device *dev, crtc_resp->mode_valid = 0; } } + if (!file_priv->aspect_ratio_allowed) + crtc_resp->mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; drm_modeset_unlock(&crtc->mutex); return 0; @@ -628,6 +630,13 @@ retry: ret = -ENOMEM; goto out; } + if (!file_priv->aspect_ratio_allowed && + (crtc_req->mode.flags & DRM_MODE_FLAG_PIC_AR_MASK) != DRM_MODE_FLAG_PIC_AR_NONE) { + DRM_DEBUG_KMS("Unexpected aspect-ratio flag bits\n"); + ret = -EINVAL; + goto out; + } + ret = drm_mode_convert_umode(dev, mode, &crtc_req->mode); if (ret) { diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index ffe14ec3e7f2..36c7609a4bd5 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -119,18 +119,32 @@ u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SI EXPORT_SYMBOL(drm_dp_get_adjust_request_pre_emphasis); void drm_dp_link_train_clock_recovery_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { - if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0) + int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) + DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n", + rd_interval); + + if (rd_interval == 0 || dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14) udelay(100); else - mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4); + mdelay(rd_interval * 4); } EXPORT_SYMBOL(drm_dp_link_train_clock_recovery_delay); void drm_dp_link_train_channel_eq_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { - if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0) + int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) + DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n", + rd_interval); + + if (rd_interval == 0) udelay(400); else - mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4); + mdelay(rd_interval * 4); } EXPORT_SYMBOL(drm_dp_link_train_channel_eq_delay); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 32a83b41ab61..f6910ebe4d0e 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -99,8 +99,6 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, return &dev->primary; case DRM_MINOR_RENDER: return &dev->render; - case DRM_MINOR_CONTROL: - return &dev->control; default: BUG(); } @@ -567,7 +565,6 @@ err_ctxbitmap: err_minors: drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); - drm_minor_free(dev, DRM_MINOR_CONTROL); drm_fs_inode_free(dev->anon_inode); err_free: mutex_destroy(&dev->master_mutex); @@ -603,7 +600,6 @@ void drm_dev_fini(struct drm_device *dev) drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); - drm_minor_free(dev, DRM_MINOR_CONTROL); mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->ctxlist_mutex); @@ -796,10 +792,6 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) mutex_lock(&drm_global_mutex); - ret = drm_minor_register(dev, DRM_MINOR_CONTROL); - if (ret) - goto err_minors; - ret = drm_minor_register(dev, DRM_MINOR_RENDER); if (ret) goto err_minors; @@ -837,7 +829,6 @@ err_minors: remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); - drm_minor_unregister(dev, DRM_MINOR_CONTROL); out_unlock: mutex_unlock(&drm_global_mutex); return ret; @@ -882,7 +873,6 @@ void drm_dev_unregister(struct drm_device *dev) remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); - drm_minor_unregister(dev, DRM_MINOR_CONTROL); } EXPORT_SYMBOL(drm_dev_unregister); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 08d33b48b14a..40e1e24f2ff0 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2930,11 +2930,15 @@ cea_mode_alternate_timings(u8 vic, struct drm_display_mode *mode) static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_match, unsigned int clock_tolerance) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) return 0; + if (to_match->picture_aspect_ratio) + match_flags |= DRM_MODE_MATCH_ASPECT_RATIO; + for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) { struct drm_display_mode cea_mode = edid_cea_modes[vic]; unsigned int clock1, clock2; @@ -2948,7 +2952,7 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m continue; do { - if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode)) + if (drm_mode_match(to_match, &cea_mode, match_flags)) return vic; } while (cea_mode_alternate_timings(vic, &cea_mode)); } @@ -2965,11 +2969,15 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m */ u8 drm_match_cea_mode(const struct drm_display_mode *to_match) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) return 0; + if (to_match->picture_aspect_ratio) + match_flags |= DRM_MODE_MATCH_ASPECT_RATIO; + for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) { struct drm_display_mode cea_mode = edid_cea_modes[vic]; unsigned int clock1, clock2; @@ -2983,7 +2991,7 @@ u8 drm_match_cea_mode(const struct drm_display_mode *to_match) continue; do { - if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode)) + if (drm_mode_match(to_match, &cea_mode, match_flags)) return vic; } while (cea_mode_alternate_timings(vic, &cea_mode)); } @@ -3030,6 +3038,7 @@ hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode) static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_match, unsigned int clock_tolerance) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) @@ -3047,7 +3056,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_ abs(to_match->clock - clock2) > clock_tolerance) continue; - if (drm_mode_equal_no_clocks(to_match, hdmi_mode)) + if (drm_mode_match(to_match, hdmi_mode, match_flags)) return vic; } @@ -3064,6 +3073,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_ */ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) @@ -3079,7 +3089,7 @@ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match) if ((KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock1) || KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock2)) && - drm_mode_equal_no_clocks_no_stereo(to_match, hdmi_mode)) + drm_mode_match(to_match, hdmi_mode, match_flags)) return vic; } return 0; @@ -4823,6 +4833,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, bool is_hdmi2_sink) { + enum hdmi_picture_aspect picture_aspect; int err; if (!frame || !mode) @@ -4865,13 +4876,23 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, * Populate picture aspect ratio from either * user input (if specified) or from the CEA mode list. */ - if (mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_4_3 || - mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_16_9) - frame->picture_aspect = mode->picture_aspect_ratio; - else if (frame->video_code > 0) - frame->picture_aspect = drm_get_cea_aspect_ratio( - frame->video_code); + picture_aspect = mode->picture_aspect_ratio; + if (picture_aspect == HDMI_PICTURE_ASPECT_NONE) + picture_aspect = drm_get_cea_aspect_ratio(frame->video_code); + + /* + * The infoframe can't convey anything but none, 4:3 + * and 16:9, so if the user has asked for anything else + * we can only satisfy it by specifying the right VIC. + */ + if (picture_aspect > HDMI_PICTURE_ASPECT_16_9) { + if (picture_aspect != + drm_get_cea_aspect_ratio(frame->video_code)) + return -EINVAL; + picture_aspect = HDMI_PICTURE_ASPECT_NONE; + } + frame->picture_aspect = picture_aspect; frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE; frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 0646b108030b..2ee1eaa66188 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2183,7 +2183,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper, for (j = 0; j < i; j++) { if (!enabled[j]) continue; - if (!drm_mode_equal(modes[j], modes[i])) + if (!drm_mode_match(modes[j], modes[i], + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) can_clone = false; } } @@ -2203,7 +2207,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper, fb_helper_conn = fb_helper->connector_info[i]; list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) { - if (drm_mode_equal(mode, dmt_mode)) + if (drm_mode_match(mode, dmt_mode, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) modes[i] = mode; } if (!modes[i]) diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 8c4d32adcc17..bfedceff87bb 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -484,8 +484,7 @@ int drm_mode_getfb(struct drm_device *dev, * backwards-compatibility reasons, we cannot make GET_FB() privileged, * so just return an invalid handle for non-masters. */ - if (!drm_is_current_master(file_priv) && !capable(CAP_SYS_ADMIN) && - !drm_is_control_client(file_priv)) { + if (!drm_is_current_master(file_priv) && !capable(CAP_SYS_ADMIN)) { r->handle = 0; ret = 0; goto out; diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index f8e96e648acf..67b1fca39aa6 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -105,7 +105,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd, .desc = compat_ptr(v32.desc), }; err = drm_ioctl_kernel(file, drm_version, &v, - DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW); + DRM_UNLOCKED|DRM_RENDER_ALLOW); if (err) return err; @@ -885,7 +885,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, return -EFAULT; err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64, - DRM_CONTROL_ALLOW|DRM_UNLOCKED); + DRM_UNLOCKED); if (err) return err; diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index af782911c505..0d4cfb232576 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -324,6 +324,15 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) return -EINVAL; file_priv->atomic = req->value; file_priv->universal_planes = req->value; + /* + * No atomic user-space blows up on aspect ratio mode bits. + */ + file_priv->aspect_ratio_allowed = req->value; + break; + case DRM_CLIENT_CAP_ASPECT_RATIO: + if (req->value > 1) + return -EINVAL; + file_priv->aspect_ratio_allowed = req->value; break; default: return -EINVAL; @@ -510,13 +519,7 @@ int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) /* MASTER is only for master or control clients */ if (unlikely((flags & DRM_MASTER) && - !drm_is_current_master(file_priv) && - !drm_is_control_client(file_priv))) - return -EACCES; - - /* Control clients must be explicitly allowed */ - if (unlikely(!(flags & DRM_CONTROL_ALLOW) && - drm_is_control_client(file_priv))) + !drm_is_current_master(file_priv))) return -EACCES; /* Render clients must be explicitly allowed */ @@ -539,7 +542,7 @@ EXPORT_SYMBOL(drm_ioctl_permit); /* Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, - DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW), + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY), @@ -613,41 +616,41 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), @@ -665,10 +668,10 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index e82b61e08f8c..c78ca0e84ffd 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -939,17 +939,68 @@ struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev, } EXPORT_SYMBOL(drm_mode_duplicate); +static bool drm_mode_match_timings(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return mode1->hdisplay == mode2->hdisplay && + mode1->hsync_start == mode2->hsync_start && + mode1->hsync_end == mode2->hsync_end && + mode1->htotal == mode2->htotal && + mode1->hskew == mode2->hskew && + mode1->vdisplay == mode2->vdisplay && + mode1->vsync_start == mode2->vsync_start && + mode1->vsync_end == mode2->vsync_end && + mode1->vtotal == mode2->vtotal && + mode1->vscan == mode2->vscan; +} + +static bool drm_mode_match_clock(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + /* + * do clock check convert to PICOS + * so fb modes get matched the same + */ + if (mode1->clock && mode2->clock) + return KHZ2PICOS(mode1->clock) == KHZ2PICOS(mode2->clock); + else + return mode1->clock == mode2->clock; +} + +static bool drm_mode_match_flags(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) == + (mode2->flags & ~DRM_MODE_FLAG_3D_MASK); +} + +static bool drm_mode_match_3d_flags(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return (mode1->flags & DRM_MODE_FLAG_3D_MASK) == + (mode2->flags & DRM_MODE_FLAG_3D_MASK); +} + +static bool drm_mode_match_aspect_ratio(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return mode1->picture_aspect_ratio == mode2->picture_aspect_ratio; +} + /** - * drm_mode_equal - test modes for equality + * drm_mode_match - test modes for (partial) equality * @mode1: first mode * @mode2: second mode + * @match_flags: which parts need to match (DRM_MODE_MATCH_*) * * Check to see if @mode1 and @mode2 are equivalent. * * Returns: - * True if the modes are equal, false otherwise. + * True if the modes are (partially) equal, false otherwise. */ -bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) +bool drm_mode_match(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2, + unsigned int match_flags) { if (!mode1 && !mode2) return true; @@ -957,15 +1008,49 @@ bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_displ if (!mode1 || !mode2) return false; - /* do clock check convert to PICOS so fb modes get matched - * the same */ - if (mode1->clock && mode2->clock) { - if (KHZ2PICOS(mode1->clock) != KHZ2PICOS(mode2->clock)) - return false; - } else if (mode1->clock != mode2->clock) + if (match_flags & DRM_MODE_MATCH_TIMINGS && + !drm_mode_match_timings(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_CLOCK && + !drm_mode_match_clock(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_FLAGS && + !drm_mode_match_flags(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_3D_FLAGS && + !drm_mode_match_3d_flags(mode1, mode2)) return false; - return drm_mode_equal_no_clocks(mode1, mode2); + if (match_flags & DRM_MODE_MATCH_ASPECT_RATIO && + !drm_mode_match_aspect_ratio(mode1, mode2)) + return false; + + return true; +} +EXPORT_SYMBOL(drm_mode_match); + +/** + * drm_mode_equal - test modes for equality + * @mode1: first mode + * @mode2: second mode + * + * Check to see if @mode1 and @mode2 are equivalent. + * + * Returns: + * True if the modes are equal, false otherwise. + */ +bool drm_mode_equal(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS| + DRM_MODE_MATCH_ASPECT_RATIO); } EXPORT_SYMBOL(drm_mode_equal); @@ -980,13 +1065,13 @@ EXPORT_SYMBOL(drm_mode_equal); * Returns: * True if the modes are equal, false otherwise. */ -bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) +bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) { - if ((mode1->flags & DRM_MODE_FLAG_3D_MASK) != - (mode2->flags & DRM_MODE_FLAG_3D_MASK)) - return false; - - return drm_mode_equal_no_clocks_no_stereo(mode1, mode2); + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS); } EXPORT_SYMBOL(drm_mode_equal_no_clocks); @@ -1004,21 +1089,9 @@ EXPORT_SYMBOL(drm_mode_equal_no_clocks); bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) { - if (mode1->hdisplay == mode2->hdisplay && - mode1->hsync_start == mode2->hsync_start && - mode1->hsync_end == mode2->hsync_end && - mode1->htotal == mode2->htotal && - mode1->hskew == mode2->hskew && - mode1->vdisplay == mode2->vdisplay && - mode1->vsync_start == mode2->vsync_start && - mode1->vsync_end == mode2->vsync_end && - mode1->vtotal == mode2->vtotal && - mode1->vscan == mode2->vscan && - (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) == - (mode2->flags & ~DRM_MODE_FLAG_3D_MASK)) - return true; - - return false; + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_FLAGS); } EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo); @@ -1575,6 +1648,26 @@ void drm_mode_convert_to_umode(struct drm_mode_modeinfo *out, out->vrefresh = in->vrefresh; out->flags = in->flags; out->type = in->type; + + switch (in->picture_aspect_ratio) { + case HDMI_PICTURE_ASPECT_4_3: + out->flags |= DRM_MODE_FLAG_PIC_AR_4_3; + break; + case HDMI_PICTURE_ASPECT_16_9: + out->flags |= DRM_MODE_FLAG_PIC_AR_16_9; + break; + case HDMI_PICTURE_ASPECT_64_27: + out->flags |= DRM_MODE_FLAG_PIC_AR_64_27; + break; + case HDMI_PICTURE_ASPECT_256_135: + out->flags |= DRM_MODE_FLAG_PIC_AR_256_135; + break; + case HDMI_PICTURE_ASPECT_RESERVED: + default: + out->flags |= DRM_MODE_FLAG_PIC_AR_NONE; + break; + } + strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); out->name[DRM_DISPLAY_MODE_LEN-1] = 0; } @@ -1621,6 +1714,30 @@ int drm_mode_convert_umode(struct drm_device *dev, strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); out->name[DRM_DISPLAY_MODE_LEN-1] = 0; + /* Clearing picture aspect ratio bits from out flags, + * as the aspect-ratio information is not stored in + * flags for kernel-mode, but in picture_aspect_ratio. + */ + out->flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; + + switch (in->flags & DRM_MODE_FLAG_PIC_AR_MASK) { + case DRM_MODE_FLAG_PIC_AR_4_3: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_4_3; + break; + case DRM_MODE_FLAG_PIC_AR_16_9: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_16_9; + break; + case DRM_MODE_FLAG_PIC_AR_64_27: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_64_27; + break; + case DRM_MODE_FLAG_PIC_AR_256_135: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_256_135; + break; + default: + out->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; + break; + } + out->status = drm_mode_validate_driver(dev, out); if (out->status != MODE_OK) return -EINVAL; diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index caebddda8bce..fe9c6c731e87 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -172,10 +172,9 @@ int drm_get_panel_orientation_quirk(int width, int height) if (!bios_date) continue; - for (i = 0; data->bios_dates[i]; i++) { - if (!strcmp(data->bios_dates[i], bios_date)) - return data->orientation; - } + i = match_string(data->bios_dates, -1, bios_date); + if (i >= 0) + return data->orientation; } return DRM_MODE_PANEL_ORIENTATION_UNKNOWN; diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index caf675e3e692..397b46b33739 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -409,7 +409,10 @@ void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf) struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; - return dev->driver->gem_prime_vmap(obj); + if (dev->driver->gem_prime_vmap) + return dev->driver->gem_prime_vmap(obj); + else + return NULL; } EXPORT_SYMBOL(drm_gem_dmabuf_vmap); @@ -426,7 +429,8 @@ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; - dev->driver->gem_prime_vunmap(obj, vaddr); + if (dev->driver->gem_prime_vunmap) + dev->driver->gem_prime_vunmap(obj, vaddr); } EXPORT_SYMBOL(drm_gem_dmabuf_vunmap); diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 8f4672daac7f..1f8031e30f53 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -169,9 +169,9 @@ struct drm_property *drm_property_create_enum(struct drm_device *dev, return NULL; for (i = 0; i < num_values; i++) { - ret = drm_property_add_enum(property, i, - props[i].type, - props[i].name); + ret = drm_property_add_enum(property, + props[i].type, + props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; @@ -209,7 +209,7 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, uint64_t supported_bits) { struct drm_property *property; - int i, ret, index = 0; + int i, ret; int num_values = hweight64(supported_bits); flags |= DRM_MODE_PROP_BITMASK; @@ -221,14 +221,9 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, if (!(supported_bits & (1ULL << props[i].type))) continue; - if (WARN_ON(index >= num_values)) { - drm_property_destroy(dev, property); - return NULL; - } - - ret = drm_property_add_enum(property, index++, - props[i].type, - props[i].name); + ret = drm_property_add_enum(property, + props[i].type, + props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; @@ -376,7 +371,6 @@ EXPORT_SYMBOL(drm_property_create_bool); /** * drm_property_add_enum - add a possible value to an enumeration property * @property: enumeration property to change - * @index: index of the new enumeration * @value: value of the new enumeration * @name: symbolic name of the new enumeration * @@ -388,10 +382,11 @@ EXPORT_SYMBOL(drm_property_create_bool); * Returns: * Zero on success, error code on failure. */ -int drm_property_add_enum(struct drm_property *property, int index, +int drm_property_add_enum(struct drm_property *property, uint64_t value, const char *name) { struct drm_property_enum *prop_enum; + int index = 0; if (WARN_ON(strlen(name) >= DRM_PROP_NAME_LEN)) return -EINVAL; @@ -411,8 +406,12 @@ int drm_property_add_enum(struct drm_property *property, int index, list_for_each_entry(prop_enum, &property->enum_list, head) { if (WARN_ON(prop_enum->value == value)) return -EINVAL; + index++; } + if (WARN_ON(index >= property->num_values)) + return -EINVAL; + prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL); if (!prop_enum) return -ENOMEM; diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c index 9817c1445ba9..8c057829b804 100644 --- a/drivers/gpu/drm/drm_rect.c +++ b/drivers/gpu/drm/drm_rect.c @@ -50,13 +50,25 @@ bool drm_rect_intersect(struct drm_rect *r1, const struct drm_rect *r2) } EXPORT_SYMBOL(drm_rect_intersect); +static u32 clip_scaled(u32 src, u32 dst, u32 clip) +{ + u64 tmp = mul_u32_u32(src, dst - clip); + + /* + * Round toward 1.0 when clipping so that we don't accidentally + * change upscaling to downscaling or vice versa. + */ + if (src < (dst << 16)) + return DIV_ROUND_UP_ULL(tmp, dst); + else + return DIV_ROUND_DOWN_ULL(tmp, dst); +} + /** * drm_rect_clip_scaled - perform a scaled clip operation * @src: source window rectangle * @dst: destination window rectangle * @clip: clip rectangle - * @hscale: horizontal scaling factor - * @vscale: vertical scaling factor * * Clip rectangle @dst by rectangle @clip. Clip rectangle @src by the * same amounts multiplied by @hscale and @vscale. @@ -66,33 +78,44 @@ EXPORT_SYMBOL(drm_rect_intersect); * %false otherwise */ bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, - const struct drm_rect *clip, - int hscale, int vscale) + const struct drm_rect *clip) { int diff; diff = clip->x1 - dst->x1; if (diff > 0) { - int64_t tmp = src->x1 + (int64_t) diff * hscale; - src->x1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_w = clip_scaled(drm_rect_width(src), + drm_rect_width(dst), diff); + + src->x1 = clamp_t(int64_t, src->x2 - new_src_w, INT_MIN, INT_MAX); + dst->x1 = clip->x1; } diff = clip->y1 - dst->y1; if (diff > 0) { - int64_t tmp = src->y1 + (int64_t) diff * vscale; - src->y1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_h = clip_scaled(drm_rect_height(src), + drm_rect_height(dst), diff); + + src->y1 = clamp_t(int64_t, src->y2 - new_src_h, INT_MIN, INT_MAX); + dst->y1 = clip->y1; } diff = dst->x2 - clip->x2; if (diff > 0) { - int64_t tmp = src->x2 - (int64_t) diff * hscale; - src->x2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_w = clip_scaled(drm_rect_width(src), + drm_rect_width(dst), diff); + + src->x2 = clamp_t(int64_t, src->x1 + new_src_w, INT_MIN, INT_MAX); + dst->x2 = clip->x2; } diff = dst->y2 - clip->y2; if (diff > 0) { - int64_t tmp = src->y2 - (int64_t) diff * vscale; - src->y2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_h = clip_scaled(drm_rect_height(src), + drm_rect_height(dst), diff); + + src->y2 = clamp_t(int64_t, src->y1 + new_src_h, INT_MIN, INT_MAX); + dst->y2 = clip->y2; } - return drm_rect_intersect(dst, clip); + return drm_rect_visible(dst); } EXPORT_SYMBOL(drm_rect_clip_scaled); @@ -106,7 +129,10 @@ static int drm_calc_scale(int src, int dst) if (dst == 0) return 0; - scale = src / dst; + if (src > (dst << 16)) + return DIV_ROUND_UP(src, dst); + else + scale = src / dst; return scale; } @@ -121,6 +147,10 @@ static int drm_calc_scale(int src, int dst) * Calculate the horizontal scaling factor as * (@src width) / (@dst width). * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The horizontal scaling factor, or errno of out of limits. */ @@ -152,6 +182,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale); * Calculate the vertical scaling factor as * (@src height) / (@dst height). * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The vertical scaling factor, or errno of out of limits. */ @@ -189,6 +223,10 @@ EXPORT_SYMBOL(drm_rect_calc_vscale); * If the calculated scaling factor is above @max_vscale, * decrease the height of rectangle @src to compensate. * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The horizontal scaling factor. */ @@ -239,6 +277,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale_relaxed); * If the calculated scaling factor is above @max_vscale, * decrease the height of rectangle @src to compensate. * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The vertical scaling factor. */ @@ -373,8 +415,8 @@ EXPORT_SYMBOL(drm_rect_rotate); * them when doing a rotatation and its inverse. * That is, if you do :: * - * DRM_MODE_PROP_ROTATE(&r, width, height, rotation); - * DRM_MODE_ROTATE_inv(&r, width, height, rotation); + * drm_rect_rotate(&r, width, height, rotation); + * drm_rect_rotate_inv(&r, width, height, rotation); * * you will always get back the original rectangle. */ diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 1c5b5ce1fd7f..b3c1daad1169 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -331,9 +331,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) struct device *kdev; int r; - if (minor->type == DRM_MINOR_CONTROL) - minor_str = "controlD%d"; - else if (minor->type == DRM_MINOR_RENDER) + if (minor->type == DRM_MINOR_RENDER) minor_str = "renderD%d"; else minor_str = "card%d"; diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 735ce47688f9..208bc27be3cc 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -1,6 +1,6 @@ config DRM_EXYNOS tristate "DRM Support for Samsung SoC EXYNOS Series" - depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM) + depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM) select DRM_KMS_HELPER select VIDEOMODE_HELPERS select SND_SOC_HDMI_CODEC if SND_SOC @@ -95,21 +95,31 @@ config DRM_EXYNOS_G2D help Choose this option if you want to use Exynos G2D for DRM. +config DRM_EXYNOS_IPP + bool + config DRM_EXYNOS_FIMC bool "FIMC" - depends on BROKEN && MFD_SYSCON + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos FIMC for DRM. config DRM_EXYNOS_ROTATOR bool "Rotator" - depends on BROKEN + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos Rotator for DRM. +config DRM_EXYNOS_SCALER + bool "Scaler" + select DRM_EXYNOS_IPP + help + Choose this option if you want to use Exynos Scaler for DRM. + config DRM_EXYNOS_GSC bool "GScaler" - depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n + depends on VIDEO_SAMSUNG_EXYNOS_GSC=n + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index a51c5459bb13..3b323f1e0475 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -18,8 +18,10 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o +exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o +exynosdrm-$(CONFIG_DRM_EXYNOS_SCALER) += exynos_drm_scaler.o exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o exynosdrm-$(CONFIG_DRM_EXYNOS_MIC) += exynos_drm_mic.o diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 39284bb7c2c2..a81b4a5e24a7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -27,15 +27,23 @@ #include "exynos_drm_fb.h" #include "exynos_drm_gem.h" #include "exynos_drm_plane.h" +#include "exynos_drm_ipp.h" #include "exynos_drm_vidi.h" #include "exynos_drm_g2d.h" #include "exynos_drm_iommu.h" #define DRIVER_NAME "exynos" #define DRIVER_DESC "Samsung SoC DRM" -#define DRIVER_DATE "20110530" +#define DRIVER_DATE "20180330" + +/* + * Interface history: + * + * 1.0 - Original version + * 1.1 - Upgrade IPP driver to version 2.0 + */ #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 0 +#define DRIVER_MINOR 1 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) { @@ -88,6 +96,16 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_AUTH | DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_RESOURCES, + exynos_drm_ipp_get_res_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_CAPS, exynos_drm_ipp_get_caps_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_LIMITS, + exynos_drm_ipp_get_limits_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_COMMIT, exynos_drm_ipp_commit_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), }; static const struct file_operations exynos_drm_driver_fops = { @@ -184,6 +202,7 @@ struct exynos_drm_driver_info { #define DRM_COMPONENT_DRIVER BIT(0) /* supports component framework */ #define DRM_VIRTUAL_DEVICE BIT(1) /* create virtual platform device */ #define DRM_DMA_DEVICE BIT(2) /* can be used for dma allocations */ +#define DRM_FIMC_DEVICE BIT(3) /* devices shared with V4L2 subsystem */ #define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL) @@ -223,10 +242,16 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = { DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D), }, { DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC), + DRM_COMPONENT_DRIVER | DRM_FIMC_DEVICE, }, { DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR), + DRM_COMPONENT_DRIVER + }, { + DRV_PTR(scaler_driver, CONFIG_DRM_EXYNOS_SCALER), + DRM_COMPONENT_DRIVER }, { DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC), + DRM_COMPONENT_DRIVER }, { &exynos_drm_platform_driver, DRM_VIRTUAL_DEVICE @@ -254,7 +279,11 @@ static struct component_match *exynos_drm_match_add(struct device *dev) &info->driver->driver, (void *)platform_bus_type.match))) { put_device(p); - component_match_add(dev, &match, compare_dev, d); + + if (!(info->flags & DRM_FIMC_DEVICE) || + exynos_drm_check_fimc_device(d) == 0) + component_match_add(dev, &match, + compare_dev, d); p = d; } put_device(p); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 075957cb6ba1..0f6d079a55c9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -273,6 +273,15 @@ static inline int exynos_dpi_bind(struct drm_device *dev, } #endif +#ifdef CONFIG_DRM_EXYNOS_FIMC +int exynos_drm_check_fimc_device(struct device *dev); +#else +static inline int exynos_drm_check_fimc_device(struct device *dev) +{ + return 0; +} +#endif + int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); @@ -288,6 +297,7 @@ extern struct platform_driver vidi_driver; extern struct platform_driver g2d_driver; extern struct platform_driver fimc_driver; extern struct platform_driver rotator_driver; +extern struct platform_driver scaler_driver; extern struct platform_driver gsc_driver; extern struct platform_driver mic_driver; #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 7904ffa9abfb..eae44fd714f0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -270,7 +270,6 @@ struct exynos_dsi { u32 lanes; u32 mode_flags; u32 format; - struct videomode vm; int state; struct drm_property *brightness; @@ -881,30 +880,30 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi) static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi) { - struct videomode *vm = &dsi->vm; + struct drm_display_mode *m = &dsi->encoder.crtc->state->adjusted_mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { reg = DSIM_CMD_ALLOW(0xf) - | DSIM_STABLE_VFP(vm->vfront_porch) - | DSIM_MAIN_VBP(vm->vback_porch); + | DSIM_STABLE_VFP(m->vsync_start - m->vdisplay) + | DSIM_MAIN_VBP(m->vtotal - m->vsync_end); exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg); - reg = DSIM_MAIN_HFP(vm->hfront_porch) - | DSIM_MAIN_HBP(vm->hback_porch); + reg = DSIM_MAIN_HFP(m->hsync_start - m->hdisplay) + | DSIM_MAIN_HBP(m->htotal - m->hsync_end); exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(vm->vsync_len) - | DSIM_MAIN_HSA(vm->hsync_len); + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + | DSIM_MAIN_HSA(m->hsync_end - m->hsync_start); exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg); } - reg = DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) | - DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol); + reg = DSIM_MAIN_HRESOL(m->hdisplay, num_bits_resol) | + DSIM_MAIN_VRESOL(m->vdisplay, num_bits_resol); exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg); - dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive); + dev_dbg(dsi->dev, "LCD size = %dx%d\n", m->hdisplay, m->vdisplay); } static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable) @@ -1485,26 +1484,7 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder) return 0; } -static void exynos_dsi_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct exynos_dsi *dsi = encoder_to_dsi(encoder); - struct videomode *vm = &dsi->vm; - struct drm_display_mode *m = adjusted_mode; - - vm->hactive = m->hdisplay; - vm->vactive = m->vdisplay; - vm->vfront_porch = m->vsync_start - m->vdisplay; - vm->vback_porch = m->vtotal - m->vsync_end; - vm->vsync_len = m->vsync_end - m->vsync_start; - vm->hfront_porch = m->hsync_start - m->hdisplay; - vm->hback_porch = m->htotal - m->hsync_end; - vm->hsync_len = m->hsync_end - m->hsync_start; -} - static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = { - .mode_set = exynos_dsi_mode_set, .enable = exynos_dsi_enable, .disable = exynos_dsi_disable, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 5b18b5c5fdf2..4dfbfc7f3b84 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -12,6 +12,7 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> @@ -24,8 +25,8 @@ #include <drm/exynos_drm.h> #include "regs-fimc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_fimc.h" /* * FIMC stands for Fully Interactive Mobile Camera and @@ -33,23 +34,6 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * FIMC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> FIMC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> FIMC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> FIMC H/W ----> FIMD. - */ - -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. */ #define FIMC_MAX_DEVS 4 @@ -59,29 +43,19 @@ #define FIMC_BUF_STOP 1 #define FIMC_BUF_START 2 #define FIMC_WIDTH_ITU_709 1280 -#define FIMC_REFRESH_MAX 60 -#define FIMC_REFRESH_MIN 12 -#define FIMC_CROP_MAX 8192 -#define FIMC_CROP_MIN 32 -#define FIMC_SCALE_MAX 4224 -#define FIMC_SCALE_MIN 32 +#define FIMC_AUTOSUSPEND_DELAY 2000 + +static unsigned int fimc_mask = 0xc; +module_param_named(fimc_devs, fimc_mask, uint, 0644); +MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM"); #define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct fimc_context, ippdrv); -enum fimc_wb { - FIMC_WB_NONE, - FIMC_WB_A, - FIMC_WB_B, -}; enum { FIMC_CLK_LCLK, FIMC_CLK_GATE, FIMC_CLK_WB_A, FIMC_CLK_WB_B, - FIMC_CLK_MUX, - FIMC_CLK_PARENT, FIMC_CLKS_MAX }; @@ -90,12 +64,8 @@ static const char * const fimc_clock_names[] = { [FIMC_CLK_GATE] = "fimc", [FIMC_CLK_WB_A] = "pxl_async0", [FIMC_CLK_WB_B] = "pxl_async1", - [FIMC_CLK_MUX] = "mux", - [FIMC_CLK_PARENT] = "parent", }; -#define FIMC_DEFAULT_LCLK_FREQUENCY 133000000UL - /* * A structure of scaler. * @@ -107,7 +77,7 @@ static const char * const fimc_clock_names[] = { * @vratio: vertical ratio. */ struct fimc_scaler { - bool range; + bool range; bool bypass; bool up_h; bool up_v; @@ -116,56 +86,32 @@ struct fimc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual table 43-1. - * @in_hori: scaler input horizontal size. - * @bypass: scaler bypass mode. - * @dst_h_wo_rot: target horizontal size without output rotation. - * @dst_h_rot: target horizontal size with output rotation. - * @rl_w_wo_rot: real width without input rotation. - * @rl_h_rot: real height without output rotation. - */ -struct fimc_capability { - /* scaler */ - u32 in_hori; - u32 bypass; - /* output rotator */ - u32 dst_h_wo_rot; - u32 dst_h_rot; - /* input rotator */ - u32 rl_w_wo_rot; - u32 rl_h_rot; -}; - -/* * A structure of fimc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. * @lock: locking of operations. * @clocks: fimc clocks. - * @clk_frequency: LCLK clock frequency. - * @sysreg: handle to SYSREG block regmap. * @sc: scaler infomations. * @pol: porarity of writeback. * @id: fimc id. * @irq: irq number. - * @suspended: qos operations. */ struct fimc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; spinlock_t lock; struct clk *clocks[FIMC_CLKS_MAX]; - u32 clk_frequency; - struct regmap *sysreg; struct fimc_scaler sc; int id; int irq; - bool suspended; }; static u32 fimc_read(struct fimc_context *ctx, u32 reg) @@ -217,19 +163,10 @@ static void fimc_sw_reset(struct fimc_context *ctx) fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ); } -static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx) -{ - return regmap_update_bits(ctx->sysreg, SYSREG_CAMERA_BLK, - SYSREG_FIMD0WB_DEST_MASK, - ctx->id << SYSREG_FIMD0WB_DEST_SHIFT); -} - -static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) +static void fimc_set_type_ctrl(struct fimc_context *ctx) { u32 cfg; - DRM_DEBUG_KMS("wb[%d]\n", wb); - cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK | EXYNOS_CIGCTRL_SELCAM_ITU_MASK | @@ -238,23 +175,10 @@ static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) EXYNOS_CIGCTRL_SELWB_CAMIF_MASK | EXYNOS_CIGCTRL_SELWRITEBACK_MASK); - switch (wb) { - case FIMC_WB_A: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_B: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_NONE: - default: - cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | - EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELCAM_MIPI_A | - EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); - break; - } + cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | + EXYNOS_CIGCTRL_SELWRITEBACK_A | + EXYNOS_CIGCTRL_SELCAM_MIPI_A | + EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); fimc_write(ctx, cfg, EXYNOS_CIGCTRL); } @@ -296,7 +220,6 @@ static void fimc_clear_irq(struct fimc_context *ctx) static bool fimc_check_ovf(struct fimc_context *ctx) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 status, flag; status = fimc_read(ctx, EXYNOS_CISTATUS); @@ -310,7 +233,7 @@ static bool fimc_check_ovf(struct fimc_context *ctx) EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB | EXYNOS_CIWDOFST_CLROVFICR); - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); return true; } @@ -376,10 +299,8 @@ static void fimc_handle_lastend(struct fimc_context *ctx, bool enable) fimc_write(ctx, cfg, EXYNOS_CIOCTRL); } - -static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -392,12 +313,12 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; default: /* bypass */ break; @@ -438,20 +359,13 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR | EXYNOS_MSCTRL_C_INT_IN_2PLANE); break; - default: - dev_err(ippdrv->dev, "invalid source yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); - - return 0; } -static int fimc_src_set_fmt(struct device *dev, u32 fmt) +static void fimc_src_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -485,9 +399,6 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid source format 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); @@ -495,22 +406,22 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_src_set_fmt_order(ctx, fmt); + fimc_src_set_fmt_order(ctx, fmt); } -static int fimc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg1, cfg2; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[%x]\n", rotation); cfg1 = fimc_read(ctx, EXYNOS_MSCTRL); cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR | @@ -520,61 +431,56 @@ static int fimc_src_set_transf(struct device *dev, cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg1, EXYNOS_MSCTRL); fimc_write(ctx, cfg2, EXYNOS_CITRGFMT); - *swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0; - - return 0; } -static int fimc_set_window(struct fimc_context *ctx, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_set_window(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { u32 cfg, h1, h2, v1, v2; /* cropped image */ - h1 = pos->x; - h2 = sz->hsize - pos->w - pos->x; - v1 = pos->y; - v2 = sz->vsize - pos->h - pos->y; + h1 = buf->rect.x; + h2 = buf->buf.width - buf->rect.w - buf->rect.x; + v1 = buf->rect.y; + v2 = buf->buf.height - buf->rect.h - buf->rect.y; DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n", - pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize); + buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h, + buf->buf.width, buf->buf.height); DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2); /* @@ -592,42 +498,30 @@ static int fimc_set_window(struct fimc_context *ctx, cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) | EXYNOS_CIWDOFST2_WINVEROFST2(v2)); fimc_write(ctx, cfg, EXYNOS_CIWDOFST2); - - return 0; } -static int fimc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_src_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; u32 cfg; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGISIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGISIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* set input DMA image size */ cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE); cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK | EXYNOS_CIREAL_ISIZE_WIDTH_MASK); - cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) | - EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h)); + cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(buf->rect.w) | + EXYNOS_CIREAL_ISIZE_HEIGHT(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE); /* @@ -635,91 +529,34 @@ static int fimc_src_set_size(struct device *dev, int swap, * for now, we support only ITU601 8 bit mode */ cfg = (EXYNOS_CISRCFMT_ITU601_8BIT | - EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) | - EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize)); + EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) | + EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_CISRCFMT); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIIYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIIYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIIYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIIYOFF); - cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICBOFF); - cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICROFF); - return fimc_set_window(ctx, &img_pos, &img_sz); + fimc_set_window(ctx, buf); } -static int fimc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_src_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > FIMC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_SRC]; - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIIYSA0); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICRSA0); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICRSA0); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIIYSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICBSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICRSA0); - break; - default: - /* bypass */ - break; - } - - return 0; + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIIYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIICBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIICRSA(0)); } -static struct exynos_drm_ipp_ops fimc_src_ops = { - .set_fmt = fimc_src_set_fmt, - .set_transf = fimc_src_set_transf, - .set_size = fimc_src_set_size, - .set_addr = fimc_src_set_addr, -}; - -static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -732,11 +569,11 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_XRGB8888: cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 | EXYNOS_CISCCTRL_EXTRGB_EXTENSION); @@ -784,20 +621,13 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR; cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE; break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CIOCTRL); - - return 0; } -static int fimc_dst_set_fmt(struct device *dev, u32 fmt) +static void fimc_dst_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -837,10 +667,6 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid target format 0x%x.\n", - fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); @@ -849,73 +675,67 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_dst_set_fmt_order(ctx, fmt); + fimc_dst_set_fmt_order(ctx, fmt); } -static int fimc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_dst_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[0x%x]\n", rotation); cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK; cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE | EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); - *swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0; - - return 0; } static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg, cfg_ext, shfactor; u32 pre_dst_width, pre_dst_height; u32 hfactor, vfactor; @@ -942,13 +762,13 @@ static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, /* fimc_ippdrv_check_property assures that dividers are not null */ hfactor = fls(src_w / dst_w / 2); if (hfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return -EINVAL; } vfactor = fls(src_h / dst_h / 2); if (vfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return -EINVAL; } @@ -1019,83 +839,77 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc) fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN); } -static int fimc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_dst_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; - u32 cfg; + u32 cfg, cfg_ext; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGOSIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* CSC ITU */ cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~EXYNOS_CIGCTRL_CSC_MASK; - if (sz->hsize >= FIMC_WIDTH_ITU_709) + if (buf->buf.width >= FIMC_WIDTH_ITU_709) cfg |= EXYNOS_CIGCTRL_CSC_ITU709; else cfg |= EXYNOS_CIGCTRL_CSC_ITU601; fimc_write(ctx, cfg, EXYNOS_CIGCTRL); - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT); /* target image size */ cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK | EXYNOS_CITRGFMT_TARGETV_MASK); - cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) | - EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h)); + if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.h) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.w)); + else + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.w) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CITRGFMT); /* target area */ - cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h); + cfg = EXYNOS_CITAREA_TARGET_AREA(buf->rect.w * buf->rect.h); fimc_write(ctx, cfg, EXYNOS_CITAREA); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOYOFF); - cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCBOFF); - cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCROFF); - - return 0; } static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) + bool enqueue) { unsigned long flags; u32 buf_num; u32 cfg; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); + DRM_DEBUG_KMS("buf_id[%d]enqueu[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ); - if (buf_type == IPP_BUF_ENQUEUE) + if (enqueue) cfg |= (1 << buf_id); else cfg &= ~(1 << buf_id); @@ -1104,88 +918,29 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, buf_num = hweight32(cfg); - if (buf_type == IPP_BUF_ENQUEUE && buf_num >= FIMC_BUF_START) + if (enqueue && buf_num >= FIMC_BUF_START) fimc_mask_irq(ctx, true); - else if (buf_type == IPP_BUF_DEQUEUE && buf_num <= FIMC_BUF_STOP) + else if (!enqueue && buf_num <= FIMC_BUF_STOP) fimc_mask_irq(ctx, false); spin_unlock_irqrestore(&ctx->lock, flags); } -static int fimc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_dst_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIOYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIOCBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIOCRSA(0)); - if (buf_id > FIMC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_DST]; - - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIOYSA(buf_id)); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCRSA(buf_id)); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCRSA(buf_id)); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id)); - break; - default: - /* bypass */ - break; - } - - fimc_dst_set_buf_seq(ctx, buf_id, buf_type); - - return 0; + fimc_dst_set_buf_seq(ctx, 0, true); } -static struct exynos_drm_ipp_ops fimc_dst_ops = { - .set_fmt = fimc_dst_set_fmt, - .set_transf = fimc_dst_set_transf, - .set_size = fimc_dst_set_size, - .set_addr = fimc_dst_set_addr, -}; +static void fimc_stop(struct fimc_context *ctx); static irqreturn_t fimc_irq_handler(int irq, void *dev_id) { struct fimc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; int buf_id; DRM_DEBUG_KMS("fimc id[%d]\n", ctx->id); @@ -1203,170 +958,19 @@ static irqreturn_t fimc_irq_handler(int irq, void *dev_id) DRM_DEBUG_KMS("buf_id[%d]\n", buf_id); - fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - queue_work(ippdrv->event_workq, &event_work->work); - - return IRQ_HANDLED; -} - -static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = FIMC_REFRESH_MIN; - prop_list->refresh_max = FIMC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) | - (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = FIMC_CROP_MAX; - prop_list->crop_max.vsize = FIMC_CROP_MAX; - prop_list->crop_min.hsize = FIMC_CROP_MIN; - prop_list->crop_min.vsize = FIMC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = FIMC_SCALE_MAX; - prop_list->scale_max.vsize = FIMC_SCALE_MAX; - prop_list->scale_min.hsize = FIMC_SCALE_MIN; - prop_list->scale_min.vsize = FIMC_SCALE_MIN; - - return 0; -} - -static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int fimc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!fimc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } - - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } - - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, 0); } - return 0; + fimc_dst_set_buf_seq(ctx, buf_id, false); + fimc_stop(ctx); -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); - } - - return -EINVAL; + return IRQ_HANDLED; } static void fimc_clear_addr(struct fimc_context *ctx) @@ -1386,10 +990,8 @@ static void fimc_clear_addr(struct fimc_context *ctx) } } -static int fimc_ippdrv_reset(struct device *dev) +static void fimc_reset(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - /* reset h/w block */ fimc_sw_reset(ctx); @@ -1397,82 +999,26 @@ static int fimc_ippdrv_reset(struct device *dev) memset(&ctx->sc, 0x0, sizeof(ctx->sc)); fimc_clear_addr(ctx); - - return 0; } -static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_start(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; - int ret, i; u32 cfg0, cfg1; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - fimc_mask_irq(ctx, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } - - ret = fimc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } - - /* If set ture, we can save jpeg about screen */ + /* If set true, we can save jpeg about screen */ fimc_handle_jpeg(ctx, false); fimc_set_scaler(ctx, &ctx->sc); - switch (cmd) { - case IPP_CMD_M2M: - fimc_set_type_ctrl(ctx, FIMC_WB_NONE); - fimc_handle_lastend(ctx, false); - - /* setup dma */ - cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); - cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; - fimc_write(ctx, cfg0, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - fimc_set_type_ctrl(ctx, FIMC_WB_A); - fimc_handle_lastend(ctx, true); - - /* setup FIMD */ - ret = fimc_set_camblk_fimd0_wb(ctx); - if (ret < 0) { - dev_err(dev, "camblk setup failed.\n"); - return ret; - } + fimc_set_type_ctrl(ctx); + fimc_handle_lastend(ctx, false); - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* setup dma */ + cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); + cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; + fimc_write(ctx, cfg0, EXYNOS_MSCTRL); /* Reset status */ fimc_write(ctx, 0x0, EXYNOS_CISTATUS); @@ -1498,36 +1044,18 @@ static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK); - if (cmd == IPP_CMD_M2M) - fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); - - return 0; + fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); } -static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_stop(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; u32 cfg; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - switch (cmd) { - case IPP_CMD_M2M: - /* Source clear */ - cfg = fimc_read(ctx, EXYNOS_MSCTRL); - cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg &= ~EXYNOS_MSCTRL_ENVID; - fimc_write(ctx, cfg, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; - } + /* Source clear */ + cfg = fimc_read(ctx, EXYNOS_MSCTRL); + cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg &= ~EXYNOS_MSCTRL_ENVID; + fimc_write(ctx, cfg, EXYNOS_MSCTRL); fimc_mask_irq(ctx, false); @@ -1545,6 +1073,87 @@ static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE); } +static int fimc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + fimc_src_set_fmt(ctx, task->src.buf.fourcc, task->src.buf.modifier); + fimc_src_set_size(ctx, &task->src); + fimc_src_set_transf(ctx, DRM_MODE_ROTATE_0); + fimc_src_set_addr(ctx, &task->src); + fimc_dst_set_fmt(ctx, task->dst.buf.fourcc, task->dst.buf.modifier); + fimc_dst_set_transf(ctx, task->transform.rotation); + fimc_dst_set_size(ctx, &task->dst); + fimc_dst_set_addr(ctx, &task->dst); + fimc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + fimc_start(ctx); + + return 0; +} + +static void fimc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + fimc_reset(ctx); + + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; + + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); + } +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = fimc_commit, + .abort = fimc_abort, +}; + +static int fimc_bind(struct device *dev, struct device *master, void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "fimc"); + + dev_info(dev, "The exynos fimc has been probed successfully\n"); + + return 0; +} + +static void fimc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); +} + +static const struct component_ops fimc_component_ops = { + .bind = fimc_bind, + .unbind = fimc_unbind, +}; + static void fimc_put_clocks(struct fimc_context *ctx) { int i; @@ -1559,7 +1168,7 @@ static void fimc_put_clocks(struct fimc_context *ctx) static int fimc_setup_clocks(struct fimc_context *ctx) { - struct device *fimc_dev = ctx->ippdrv.dev; + struct device *fimc_dev = ctx->dev; struct device *dev; int ret, i; @@ -1574,8 +1183,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) ctx->clocks[i] = clk_get(dev, fimc_clock_names[i]); if (IS_ERR(ctx->clocks[i])) { - if (i >= FIMC_CLK_MUX) - break; ret = PTR_ERR(ctx->clocks[i]); dev_err(fimc_dev, "failed to get clock: %s\n", fimc_clock_names[i]); @@ -1583,20 +1190,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) } } - /* Optional FIMC LCLK parent clock setting */ - if (!IS_ERR(ctx->clocks[FIMC_CLK_PARENT])) { - ret = clk_set_parent(ctx->clocks[FIMC_CLK_MUX], - ctx->clocks[FIMC_CLK_PARENT]); - if (ret < 0) { - dev_err(fimc_dev, "failed to set parent.\n"); - goto e_clk_free; - } - } - - ret = clk_set_rate(ctx->clocks[FIMC_CLK_LCLK], ctx->clk_frequency); - if (ret < 0) - goto e_clk_free; - ret = clk_prepare_enable(ctx->clocks[FIMC_CLK_LCLK]); if (!ret) return ret; @@ -1605,57 +1198,118 @@ e_clk_free: return ret; } -static int fimc_parse_dt(struct fimc_context *ctx) +int exynos_drm_check_fimc_device(struct device *dev) { - struct device_node *node = ctx->ippdrv.dev->of_node; + unsigned int id = of_alias_get_id(dev->of_node, "fimc"); - /* Handle only devices that support the LCD Writeback data path */ - if (!of_property_read_bool(node, "samsung,lcd-wb")) - return -ENODEV; + if (id >= 0 && (BIT(id) & fimc_mask)) + return 0; + return -ENODEV; +} - if (of_property_read_u32(node, "clock-frequency", - &ctx->clk_frequency)) - ctx->clk_frequency = FIMC_DEFAULT_LCLK_FREQUENCY; +static const unsigned int fimc_formats[] = { + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, + DRM_FORMAT_YUV444, +}; - ctx->id = of_alias_get_id(node, "fimc"); +static const unsigned int fimc_tiled_formats[] = { + DRM_FORMAT_NV12, DRM_FORMAT_NV21, +}; - if (ctx->id < 0) { - dev_err(ctx->ippdrv.dev, "failed to get node alias id.\n"); - return -EINVAL; - } +static const struct drm_exynos_ipp_limit fimc_4210_limits_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4224, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1920 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; - return 0; -} +static const struct drm_exynos_ipp_limit fimc_4210_limits_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 1920, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1366 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1920, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1366, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; static int fimc_probe(struct platform_device *pdev) { + const struct drm_exynos_ipp_limit *limits; + struct exynos_drm_ipp_formats *formats; struct device *dev = &pdev->dev; struct fimc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; int ret; + int i, j, num_limits, num_formats; - if (!dev->of_node) { - dev_err(dev, "device tree node not found.\n"); + if (exynos_drm_check_fimc_device(dev) != 0) return -ENODEV; - } ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->ippdrv.dev = dev; + ctx->dev = dev; + ctx->id = of_alias_get_id(dev->of_node, "fimc"); - ret = fimc_parse_dt(ctx); - if (ret < 0) - return ret; + /* construct formats/limits array */ + num_formats = ARRAY_SIZE(fimc_formats) + ARRAY_SIZE(fimc_tiled_formats); + formats = devm_kzalloc(dev, sizeof(*formats) * num_formats, GFP_KERNEL); + if (!formats) + return -ENOMEM; + + /* linear formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_v1); + } else { + limits = fimc_4210_limits_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_v2); + } + for (i = 0; i < ARRAY_SIZE(fimc_formats); i++) { + formats[i].fourcc = fimc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = limits; + formats[i].num_limits = num_limits; + } - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_err(dev, "syscon regmap lookup failed.\n"); - return PTR_ERR(ctx->sysreg); + /* tiled formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_tiled_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v1); + } else { + limits = fimc_4210_limits_tiled_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v2); } + for (j = i, i = 0; i < ARRAY_SIZE(fimc_tiled_formats); j++, i++) { + formats[j].fourcc = fimc_tiled_formats[i]; + formats[j].modifier = DRM_FORMAT_MOD_SAMSUNG_64_32_TILE; + formats[j].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[j].limits = limits; + formats[j].num_limits = num_limits; + } + + ctx->formats = formats; + ctx->num_formats = num_formats; /* resource memory */ ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1670,9 +1324,8 @@ static int fimc_probe(struct platform_device *pdev) return -ENOENT; } - ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, fimc_irq_handler, - IRQF_ONESHOT, "drm_fimc", ctx); + ret = devm_request_irq(dev, res->start, fimc_irq_handler, + 0, dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1682,39 +1335,24 @@ static int fimc_probe(struct platform_device *pdev) if (ret < 0) return ret; - ippdrv = &ctx->ippdrv; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops; - ippdrv->check_property = fimc_ippdrv_check_property; - ippdrv->reset = fimc_ippdrv_reset; - ippdrv->start = fimc_ippdrv_start; - ippdrv->stop = fimc_ippdrv_stop; - ret = fimc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_put_clk; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - spin_lock_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, FIMC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm fimc device.\n"); + ret = component_add(dev, &fimc_component_ops); + if (ret) goto err_pm_dis; - } dev_info(dev, "drm fimc registered successfully.\n"); return 0; err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); -err_put_clk: fimc_put_clocks(ctx); return ret; @@ -1724,42 +1362,24 @@ static int fimc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &fimc_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); fimc_put_clocks(ctx); - pm_runtime_set_suspended(dev); - pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); - clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); - clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = true; - } - - return 0; -} - static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, false); + clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); + return 0; } static int fimc_runtime_resume(struct device *dev) @@ -1767,8 +1387,7 @@ static int fimc_runtime_resume(struct device *dev) struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, true); + return clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); } #endif @@ -1795,4 +1414,3 @@ struct platform_driver fimc_driver = { .pm = &fimc_pm_ops, }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h deleted file mode 100644 index 127a424c5fdf..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_FIMC_H_ -#define _EXYNOS_DRM_FIMC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_FIMC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index d42ae2bc3e56..01b1570d0c3a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -121,6 +121,12 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = { .has_limited_fmt = 1, }; +static struct fimd_driver_data s5pv210_fimd_driver_data = { + .timing_base = 0x0, + .has_shadowcon = 1, + .has_clksel = 1, +}; + static struct fimd_driver_data exynos3_fimd_driver_data = { .timing_base = 0x20000, .lcdblk_offset = 0x210, @@ -193,6 +199,8 @@ struct fimd_context { static const struct of_device_id fimd_driver_dt_match[] = { { .compatible = "samsung,s3c6400-fimd", .data = &s3c64xx_fimd_driver_data }, + { .compatible = "samsung,s5pv210-fimd", + .data = &s5pv210_fimd_driver_data }, { .compatible = "samsung,exynos3250-fimd", .data = &exynos3_fimd_driver_data }, { .compatible = "samsung,exynos4210-fimd", diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 11cc01b47bc0..6e1494fa71b4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -431,37 +431,24 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, return 0; } -int exynos_drm_gem_fault(struct vm_fault *vmf) +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj); unsigned long pfn; pgoff_t page_offset; - int ret; page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) { DRM_ERROR("invalid page offset\n"); - ret = -EINVAL; - goto out; + return VM_FAULT_SIGBUS; } pfn = page_to_pfn(exynos_gem->pages[page_offset]); - ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); - -out: - switch (ret) { - case 0: - case -ERESTARTSYS: - case -EINTR: - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } + return vmf_insert_mixed(vma, vmf->address, + __pfn_to_pfn_t(pfn, PFN_DEV)); } static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 5a4c7de80f65..9057d7f1d6ed 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -13,6 +13,7 @@ #define _EXYNOS_DRM_GEM_H_ #include <drm/drm_gem.h> +#include <linux/mm_types.h> #define to_exynos_gem(x) container_of(x, struct exynos_drm_gem, base) @@ -111,7 +112,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args); /* page fault handler and mmap fault address(virtual) to physical memory. */ -int exynos_drm_gem_fault(struct vm_fault *vmf); +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf); /* set vm_flags and we can change the vm attribute to other one at here. */ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 0506b2b17ac1..e99dd1e4ba65 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -12,18 +12,20 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/clk.h> #include <linux/pm_runtime.h> #include <linux/mfd/syscon.h> +#include <linux/of_device.h> #include <linux/regmap.h> #include <drm/drmP.h> #include <drm/exynos_drm.h> #include "regs-gsc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_gsc.h" /* * GSC stands for General SCaler and @@ -31,26 +33,10 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * GSC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> GSC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> GSC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> GSC H/W ----> FIMD, Mixer. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. - */ -#define GSC_MAX_DEVS 4 +#define GSC_MAX_CLOCKS 8 #define GSC_MAX_SRC 4 #define GSC_MAX_DST 16 #define GSC_RESET_TIMEOUT 50 @@ -65,8 +51,6 @@ #define GSC_SC_DOWN_RATIO_4_8 131072 #define GSC_SC_DOWN_RATIO_3_8 174762 #define GSC_SC_DOWN_RATIO_2_8 262144 -#define GSC_REFRESH_MIN 12 -#define GSC_REFRESH_MAX 60 #define GSC_CROP_MAX 8192 #define GSC_CROP_MIN 32 #define GSC_SCALE_MAX 4224 @@ -77,10 +61,9 @@ #define GSC_COEF_H_8T 8 #define GSC_COEF_V_4T 4 #define GSC_COEF_DEPTH 3 +#define GSC_AUTOSUSPEND_DELAY 2000 #define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct gsc_context, ippdrv); #define gsc_read(offset) readl(ctx->regs + (offset)) #define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset)) @@ -104,50 +87,47 @@ struct gsc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual 49.2 features. - * @tile_w: tile mode or rotation width. - * @tile_h: tile mode or rotation height. - * @w: other cases width. - * @h: other cases height. - */ -struct gsc_capability { - /* tile or rotation */ - u32 tile_w; - u32 tile_h; - /* other cases */ - u32 w; - u32 h; -}; - -/* * A structure of gsc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. - * @sysreg: handle to SYSREG block regmap. - * @lock: locking of operations. * @gsc_clk: gsc gate clock. * @sc: scaler infomations. * @id: gsc id. * @irq: irq number. * @rotation: supports rotation of src. - * @suspended: qos operations. */ struct gsc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; - struct regmap *sysreg; - struct mutex lock; - struct clk *gsc_clk; + const char **clk_names; + struct clk *clocks[GSC_MAX_CLOCKS]; + int num_clocks; struct gsc_scaler sc; int id; int irq; bool rotation; - bool suspended; +}; + +/** + * struct gsc_driverdata - per device type driver data for init time. + * + * @limits: picture size limits array + * @clk_names: names of clocks needed by this variant + * @num_clocks: the number of clocks needed by this variant + */ +struct gsc_driverdata { + const struct drm_exynos_ipp_limit *limits; + int num_limits; + const char *clk_names[GSC_MAX_CLOCKS]; + int num_clocks; }; /* 8-tap Filter Coefficient */ @@ -438,25 +418,6 @@ static int gsc_sw_reset(struct gsc_context *ctx) return 0; } -static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable) -{ - unsigned int gscblk_cfg; - - if (!ctx->sysreg) - return; - - regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg); - - if (enable) - gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) | - GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) | - GSC_BLK_SW_RESET_WB_DEST(ctx->id); - else - gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id); - - regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg); -} - static void gsc_handle_irq(struct gsc_context *ctx, bool enable, bool overflow, bool done) { @@ -487,10 +448,8 @@ static void gsc_handle_irq(struct gsc_context *ctx, bool enable, } -static int gsc_src_set_fmt(struct device *dev, u32 fmt) +static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -506,6 +465,7 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= GSC_IN_RGB565; break; case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: cfg |= GSC_IN_XRGB8888; break; case DRM_FORMAT_BGRX8888: @@ -548,115 +508,84 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_ROT_MASK; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_Y) cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; + case DRM_MODE_ROTATE_90: + cfg |= GSC_IN_ROT_90; + if (rotation & DRM_MODE_REFLECT_Y) + cfg |= GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_X) + cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } -static int gsc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_src_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) | - GSC_SRCIMG_OFFSET_Y(img_pos.y)); + cfg = (GSC_SRCIMG_OFFSET_X(buf->rect.x) | + GSC_SRCIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_SRCIMG_OFFSET); /* cropped size */ - cfg = (GSC_CROPPED_WIDTH(img_pos.w) | - GSC_CROPPED_HEIGHT(img_pos.h)); + cfg = (GSC_CROPPED_WIDTH(buf->rect.w) | + GSC_CROPPED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_CROPPED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_SRCIMG_SIZE); cfg &= ~(GSC_SRCIMG_HEIGHT_MASK | GSC_SRCIMG_WIDTH_MASK); - cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) | - GSC_SRCIMG_HEIGHT(sz->vsize)); + cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) | + GSC_SRCIMG_HEIGHT(buf->buf.height)); gsc_write(cfg, GSC_SRCIMG_SIZE); cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_IN_RGB_HD_WIDE; else @@ -668,103 +597,39 @@ static int gsc_src_set_size(struct device *dev, int swap, cfg |= GSC_IN_RGB_SD_NARROW; gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - /* mask register set */ cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - return -EINVAL; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK); - - return 0; } -static int gsc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_addr(struct gsc_context *ctx, u32 buf_id, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_IN_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_IN_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_IN_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_IN_BASE_ADDR_CR(buf_id)); - return gsc_src_set_buf_seq(ctx, buf_id, buf_type); + gsc_src_set_buf_seq(ctx, buf_id, true); } -static struct exynos_drm_ipp_ops gsc_src_ops = { - .set_fmt = gsc_src_set_fmt, - .set_transf = gsc_src_set_transf, - .set_size = gsc_src_set_size, - .set_addr = gsc_src_set_addr, -}; - -static int gsc_dst_set_fmt(struct device *dev, u32 fmt) +static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -779,8 +644,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_RGB565: cfg |= GSC_OUT_RGB565; break; + case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB8888: - cfg |= GSC_OUT_XRGB8888; + cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_GLOBAL_ALPHA(0xff)); break; case DRM_FORMAT_BGRX8888: cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP); @@ -819,69 +685,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_OUT_CON); - - return 0; -} - -static int gsc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - u32 cfg; - - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - - cfg = gsc_read(GSC_IN_CON); - cfg &= ~GSC_IN_ROT_MASK; - - switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: - cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_270: - cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; - } - - gsc_write(cfg, GSC_IN_CON); - - ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio) @@ -919,9 +725,9 @@ static void gsc_get_prescaler_shfactor(u32 hratio, u32 vratio, u32 *shfactor) } static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; u32 src_w, src_h, dst_w, dst_h; int ret = 0; @@ -939,13 +745,13 @@ static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return ret; } ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return ret; } @@ -1039,47 +845,37 @@ static void gsc_set_scaler(struct gsc_context *ctx, struct gsc_scaler *sc) gsc_write(cfg, GSC_MAIN_V_RATIO); } -static int gsc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_dst_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_DSTIMG_OFFSET_X(pos->x) | - GSC_DSTIMG_OFFSET_Y(pos->y)); + cfg = (GSC_DSTIMG_OFFSET_X(buf->rect.x) | + GSC_DSTIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_DSTIMG_OFFSET); /* scaled size */ - cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h)); + if (ctx->rotation) + cfg = (GSC_SCALED_WIDTH(buf->rect.h) | + GSC_SCALED_HEIGHT(buf->rect.w)); + else + cfg = (GSC_SCALED_WIDTH(buf->rect.w) | + GSC_SCALED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_SCALED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_DSTIMG_SIZE); - cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | - GSC_DSTIMG_WIDTH_MASK); - cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) | - GSC_DSTIMG_HEIGHT(sz->vsize)); + cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK); + cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) | + GSC_DSTIMG_HEIGHT(buf->buf.height); gsc_write(cfg, GSC_DSTIMG_SIZE); cfg = gsc_read(GSC_OUT_CON); cfg &= ~GSC_OUT_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_OUT_RGB_HD_WIDE; else @@ -1091,8 +887,6 @@ static int gsc_dst_set_size(struct device *dev, int swap, cfg |= GSC_OUT_RGB_SD_NARROW; gsc_write(cfg, GSC_OUT_CON); - - return 0; } static int gsc_dst_get_buf_seq(struct gsc_context *ctx) @@ -1111,35 +905,16 @@ static int gsc_dst_get_buf_seq(struct gsc_context *ctx) return buf_num; } -static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - int ret = 0; - - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - - mutex_lock(&ctx->lock); /* mask register set */ cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - ret = -EINVAL; - goto err_unlock; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; @@ -1148,94 +923,29 @@ static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK); /* interrupt enable */ - if (buf_type == IPP_BUF_ENQUEUE && - gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) + if (enqueue && gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) gsc_handle_irq(ctx, true, false, true); /* interrupt disable */ - if (buf_type == IPP_BUF_DEQUEUE && - gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) + if (!enqueue && gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) gsc_handle_irq(ctx, false, false, true); - -err_unlock: - mutex_unlock(&ctx->lock); - return ret; } -static int gsc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_addr(struct gsc_context *ctx, + u32 buf_id, struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_OUT_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_OUT_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_OUT_BASE_ADDR_CR(buf_id)); - return gsc_dst_set_buf_seq(ctx, buf_id, buf_type); -} - -static struct exynos_drm_ipp_ops gsc_dst_ops = { - .set_fmt = gsc_dst_set_fmt, - .set_transf = gsc_dst_set_transf, - .set_size = gsc_dst_set_size, - .set_addr = gsc_dst_set_addr, -}; - -static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->gsc_clk); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->gsc_clk); - ctx->suspended = true; - } - - return 0; + gsc_dst_set_buf_seq(ctx, buf_id, true); } static int gsc_get_src_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_SRC; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1249,19 +959,15 @@ static int gsc_get_src_buf_index(struct gsc_context *ctx) } } + DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, + curr_index, buf_id); + if (buf_id == GSC_MAX_SRC) { DRM_ERROR("failed to get in buffer index.\n"); return -EINVAL; } - ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } - - DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, - curr_index, buf_id); + gsc_src_set_buf_seq(ctx, buf_id, false); return buf_id; } @@ -1270,7 +976,6 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_DST; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1289,11 +994,7 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) return -EINVAL; } - ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } + gsc_dst_set_buf_seq(ctx, buf_id, false); DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, curr_index, buf_id); @@ -1304,215 +1005,55 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) static irqreturn_t gsc_irq_handler(int irq, void *dev_id) { struct gsc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; u32 status; - int buf_id[EXYNOS_DRM_OPS_MAX]; + int err = 0; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); status = gsc_read(GSC_IRQ); if (status & GSC_IRQ_STATUS_OR_IRQ) { - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); - return IRQ_NONE; + err = -EINVAL; } if (status & GSC_IRQ_STATUS_OR_FRM_DONE) { - dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n", - ctx->id, status); - - buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_SRC] < 0) - return IRQ_HANDLED; - - buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_DST] < 0) - return IRQ_HANDLED; - - DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", - buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_SRC] = - buf_id[EXYNOS_DRM_OPS_SRC]; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } - - return IRQ_HANDLED; -} - -static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = GSC_REFRESH_MIN; - prop_list->refresh_max = GSC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = GSC_CROP_MAX; - prop_list->crop_max.vsize = GSC_CROP_MAX; - prop_list->crop_min.hsize = GSC_CROP_MIN; - prop_list->crop_min.vsize = GSC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = GSC_SCALE_MAX; - prop_list->scale_max.vsize = GSC_SCALE_MAX; - prop_list->scale_min.hsize = GSC_SCALE_MIN; - prop_list->scale_min.vsize = GSC_SCALE_MIN; - - return 0; -} - -static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int gsc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + int src_buf_id, dst_buf_id; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!gsc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } + dev_dbg(ctx->dev, "occurred frame done at %d, status 0x%x.\n", + ctx->id, status); - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + src_buf_id = gsc_get_src_buf_index(ctx); + dst_buf_id = gsc_get_dst_buf_index(ctx); - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } + DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", src_buf_id, + dst_buf_id); - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + if (src_buf_id < 0 || dst_buf_id < 0) + err = -EINVAL; } - return 0; - -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, err); } - return -EINVAL; + return IRQ_HANDLED; } - -static int gsc_ippdrv_reset(struct device *dev) +static int gsc_reset(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); struct gsc_scaler *sc = &ctx->sc; int ret; /* reset h/w block */ ret = gsc_sw_reset(ctx); if (ret < 0) { - dev_err(dev, "failed to reset hardware.\n"); + dev_err(ctx->dev, "failed to reset hardware.\n"); return ret; } @@ -1523,166 +1064,172 @@ static int gsc_ippdrv_reset(struct device *dev) return 0; } -static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_start(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; u32 cfg; - int ret, i; - - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; gsc_handle_irq(ctx, true, false, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } + /* enable one shot */ + cfg = gsc_read(GSC_ENABLE); + cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | + GSC_ENABLE_CLK_GATE_MODE_MASK); + cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; + gsc_write(cfg, GSC_ENABLE); - switch (cmd) { - case IPP_CMD_M2M: - /* enable one shot */ - cfg = gsc_read(GSC_ENABLE); - cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | - GSC_ENABLE_CLK_GATE_MODE_MASK); - cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; - gsc_write(cfg, GSC_ENABLE); - - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_WB: - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - - /* src local path */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB); - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_OUTPUT: - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst local path */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* src dma memory */ + cfg = gsc_read(GSC_IN_CON); + cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); + cfg |= GSC_IN_PATH_MEMORY; + gsc_write(cfg, GSC_IN_CON); - ret = gsc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } + /* dst dma memory */ + cfg = gsc_read(GSC_OUT_CON); + cfg |= GSC_OUT_PATH_MEMORY; + gsc_write(cfg, GSC_OUT_CON); gsc_set_scaler(ctx, &ctx->sc); cfg = gsc_read(GSC_ENABLE); cfg |= GSC_ENABLE_ON; gsc_write(cfg, GSC_ENABLE); +} + +static int gsc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct gsc_context *ctx = container_of(ipp, struct gsc_context, ipp); + int ret; + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + ret = gsc_reset(ctx); + if (ret) { + pm_runtime_put_autosuspend(ctx->dev); + ctx->task = NULL; + return ret; + } + + gsc_src_set_fmt(ctx, task->src.buf.fourcc); + gsc_src_set_transf(ctx, task->transform.rotation); + gsc_src_set_size(ctx, &task->src); + gsc_src_set_addr(ctx, 0, &task->src); + gsc_dst_set_fmt(ctx, task->dst.buf.fourcc); + gsc_dst_set_size(ctx, &task->dst); + gsc_dst_set_addr(ctx, 0, &task->dst); + gsc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + gsc_start(ctx); return 0; } -static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; - u32 cfg; + struct gsc_context *ctx = + container_of(ipp, struct gsc_context, ipp); - DRM_DEBUG_KMS("cmd[%d]\n", cmd); + gsc_reset(ctx); + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - switch (cmd) { - case IPP_CMD_M2M: - /* bypass */ - break; - case IPP_CMD_WB: - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); } +} - gsc_handle_irq(ctx, false, false, true); +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = gsc_commit, + .abort = gsc_abort, +}; - /* reset sequence */ - gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK); +static int gsc_bind(struct device *dev, struct device *master, void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; - cfg = gsc_read(GSC_ENABLE); - cfg &= ~GSC_ENABLE_ON; - gsc_write(cfg, GSC_ENABLE); + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "gsc"); + + dev_info(dev, "The exynos gscaler has been probed successfully\n"); + + return 0; +} + +static void gsc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); } +static const struct component_ops gsc_component_ops = { + .bind = gsc_bind, + .unbind = gsc_unbind, +}; + +static const unsigned int gsc_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_BGRX8888, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, +}; + static int gsc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct gsc_driverdata *driver_data; + struct exynos_drm_ipp_formats *formats; struct gsc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; - int ret; + int ret, i; ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - if (dev->of_node) { - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_warn(dev, "failed to get system register.\n"); - ctx->sysreg = NULL; - } + formats = devm_kzalloc(dev, sizeof(*formats) * + (ARRAY_SIZE(gsc_formats)), GFP_KERNEL); + if (!formats) + return -ENOMEM; + + driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev); + ctx->dev = dev; + ctx->num_clocks = driver_data->num_clocks; + ctx->clk_names = driver_data->clk_names; + + for (i = 0; i < ARRAY_SIZE(gsc_formats); i++) { + formats[i].fourcc = gsc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = driver_data->limits; + formats[i].num_limits = driver_data->num_limits; } + ctx->formats = formats; + ctx->num_formats = ARRAY_SIZE(gsc_formats); /* clock control */ - ctx->gsc_clk = devm_clk_get(dev, "gscl"); - if (IS_ERR(ctx->gsc_clk)) { - dev_err(dev, "failed to get gsc clock.\n"); - return PTR_ERR(ctx->gsc_clk); + for (i = 0; i < ctx->num_clocks; i++) { + ctx->clocks[i] = devm_clk_get(dev, ctx->clk_names[i]); + if (IS_ERR(ctx->clocks[i])) { + dev_err(dev, "failed to get clock: %s\n", + ctx->clk_names[i]); + return PTR_ERR(ctx->clocks[i]); + } } /* resource memory */ @@ -1699,8 +1246,8 @@ static int gsc_probe(struct platform_device *pdev) } ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, gsc_irq_handler, - IRQF_ONESHOT, "drm_gsc", ctx); + ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0, + dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1709,38 +1256,22 @@ static int gsc_probe(struct platform_device *pdev) /* context initailization */ ctx->id = pdev->id; - ippdrv = &ctx->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops; - ippdrv->check_property = gsc_ippdrv_check_property; - ippdrv->reset = gsc_ippdrv_reset; - ippdrv->start = gsc_ippdrv_start; - ippdrv->stop = gsc_ippdrv_stop; - ret = gsc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - return ret; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - - mutex_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, GSC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm gsc device.\n"); - goto err_ippdrv_register; - } + ret = component_add(dev, &gsc_component_ops); + if (ret) + goto err_pm_dis; dev_info(dev, "drm gsc registered successfully.\n"); return 0; -err_ippdrv_register: +err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -1748,13 +1279,8 @@ err_ippdrv_register: static int gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); - mutex_destroy(&ctx->lock); - - pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; @@ -1763,19 +1289,32 @@ static int gsc_remove(struct platform_device *pdev) static int __maybe_unused gsc_runtime_suspend(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, false); + for (i = ctx->num_clocks - 1; i >= 0; i--) + clk_disable_unprepare(ctx->clocks[i]); + + return 0; } static int __maybe_unused gsc_runtime_resume(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i, ret; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, true); + for (i = 0; i < ctx->num_clocks; i++) { + ret = clk_prepare_enable(ctx->clocks[i]); + if (ret) { + while (--i > 0) + clk_disable_unprepare(ctx->clocks[i]); + return ret; + } + } + return 0; } static const struct dev_pm_ops gsc_pm_ops = { @@ -1784,9 +1323,66 @@ static const struct dev_pm_ops gsc_pm_ops = { SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL) }; +static const struct drm_exynos_ipp_limit gsc_5250_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2048 }, .v = { 16, 2048 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5420_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 16, 2016 }, .v = { 8, 2016 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5433_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static struct gsc_driverdata gsc_exynos5250_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5250_limits, + .num_limits = ARRAY_SIZE(gsc_5250_limits), +}; + +static struct gsc_driverdata gsc_exynos5420_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5420_limits, + .num_limits = ARRAY_SIZE(gsc_5420_limits), +}; + +static struct gsc_driverdata gsc_exynos5433_drvdata = { + .clk_names = {"pclk", "aclk", "aclk_xiu", "aclk_gsclbend"}, + .num_clocks = 4, + .limits = gsc_5433_limits, + .num_limits = ARRAY_SIZE(gsc_5433_limits), +}; + static const struct of_device_id exynos_drm_gsc_of_match[] = { - { .compatible = "samsung,exynos5-gsc" }, - { }, + { + .compatible = "samsung,exynos5-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5250-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5420-gsc", + .data = &gsc_exynos5420_drvdata, + }, { + .compatible = "samsung,exynos5433-gsc", + .data = &gsc_exynos5433_drvdata, + }, { + }, }; MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match); @@ -1800,4 +1396,3 @@ struct platform_driver gsc_driver = { .of_match_table = of_match_ptr(exynos_drm_gsc_of_match), }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h deleted file mode 100644 index 29ec1c5efcf2..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_GSC_H_ -#define _EXYNOS_DRM_GSC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - * Mixer output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_GSC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c new file mode 100644 index 000000000000..26374e58c557 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -0,0 +1,916 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Authors: + * Marek Szyprowski <m.szyprowski@samsung.com> + * + * Exynos DRM Image Post Processing (IPP) related functions + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + */ + + +#include <drm/drmP.h> +#include <drm/drm_mode.h> +#include <uapi/drm/exynos_drm.h> + +#include "exynos_drm_drv.h" +#include "exynos_drm_gem.h" +#include "exynos_drm_ipp.h" + +static int num_ipp; +static LIST_HEAD(ipp_list); + +/** + * exynos_drm_ipp_register - Register a new picture processor hardware module + * @dev: DRM device + * @ipp: ipp module to init + * @funcs: callbacks for the new ipp object + * @caps: bitmask of ipp capabilities (%DRM_EXYNOS_IPP_CAP_*) + * @formats: array of supported formats + * @num_formats: size of the supported formats array + * @name: name (for debugging purposes) + * + * Initializes a ipp module. + * + * Returns: + * Zero on success, error code on failure. + */ +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name) +{ + WARN_ON(!ipp); + WARN_ON(!funcs); + WARN_ON(!formats); + WARN_ON(!num_formats); + + spin_lock_init(&ipp->lock); + INIT_LIST_HEAD(&ipp->todo_list); + init_waitqueue_head(&ipp->done_wq); + ipp->dev = dev; + ipp->funcs = funcs; + ipp->capabilities = caps; + ipp->name = name; + ipp->formats = formats; + ipp->num_formats = num_formats; + + /* ipp_list modification is serialized by component framework */ + list_add_tail(&ipp->head, &ipp_list); + ipp->id = num_ipp++; + + DRM_DEBUG_DRIVER("Registered ipp %d\n", ipp->id); + + return 0; +} + +/** + * exynos_drm_ipp_unregister - Unregister the picture processor module + * @dev: DRM device + * @ipp: ipp module + */ +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp) +{ + WARN_ON(ipp->task); + WARN_ON(!list_empty(&ipp->todo_list)); + list_del(&ipp->head); +} + +/** + * exynos_drm_ipp_ioctl_get_res_ioctl - enumerate all ipp modules + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a list of ipp ids. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + struct exynos_drm_ipp *ipp; + uint32_t __user *ipp_ptr = (uint32_t __user *) + (unsigned long)resp->ipp_id_ptr; + unsigned int count = num_ipp, copied = 0; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (count && resp->count_ipps >= count) { + list_for_each_entry(ipp, &ipp_list, head) { + if (put_user(ipp->id, ipp_ptr + copied)) + return -EFAULT; + copied++; + } + } + resp->count_ipps = count; + + return 0; +} + +static inline struct exynos_drm_ipp *__ipp_get(uint32_t id) +{ + struct exynos_drm_ipp *ipp; + + list_for_each_entry(ipp, &ipp_list, head) + if (ipp->id == id) + return ipp; + return NULL; +} + +/** + * exynos_drm_ipp_ioctl_get_caps - get ipp module capabilities and formats + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module capabilities. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_caps *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->formats_ptr; + struct exynos_drm_ipp *ipp; + int i; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + resp->ipp_id = ipp->id; + resp->capabilities = ipp->capabilities; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (resp->formats_count >= ipp->num_formats) { + for (i = 0; i < ipp->num_formats; i++) { + struct drm_exynos_ipp_format tmp = { + .fourcc = ipp->formats[i].fourcc, + .type = ipp->formats[i].type, + .modifier = ipp->formats[i].modifier, + }; + + if (copy_to_user(ptr, &tmp, sizeof(tmp))) + return -EFAULT; + ptr += sizeof(tmp); + } + } + resp->formats_count = ipp->num_formats; + + return 0; +} + +static inline const struct exynos_drm_ipp_formats *__ipp_format_get( + struct exynos_drm_ipp *ipp, uint32_t fourcc, + uint64_t mod, unsigned int type) +{ + int i; + + for (i = 0; i < ipp->num_formats; i++) { + if ((ipp->formats[i].type & type) && + ipp->formats[i].fourcc == fourcc && + ipp->formats[i].modifier == mod) + return &ipp->formats[i]; + } + return NULL; +} + +/** + * exynos_drm_ipp_get_limits_ioctl - get ipp module limits + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module limitations for provided + * picture format. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_limits *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->limits_ptr; + const struct exynos_drm_ipp_formats *format; + struct exynos_drm_ipp *ipp; + + if (resp->type != DRM_EXYNOS_IPP_FORMAT_SOURCE && + resp->type != DRM_EXYNOS_IPP_FORMAT_DESTINATION) + return -EINVAL; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + format = __ipp_format_get(ipp, resp->fourcc, resp->modifier, + resp->type); + if (!format) + return -EINVAL; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (format->num_limits && resp->limits_count >= format->num_limits) + if (copy_to_user((void __user *)ptr, format->limits, + sizeof(*format->limits) * format->num_limits)) + return -EFAULT; + resp->limits_count = format->num_limits; + + return 0; +} + +struct drm_pending_exynos_ipp_event { + struct drm_pending_event base; + struct drm_exynos_ipp_event event; +}; + +static inline struct exynos_drm_ipp_task * + exynos_drm_ipp_task_alloc(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + + task = kzalloc(sizeof(*task), GFP_KERNEL); + if (!task) + return NULL; + + task->dev = ipp->dev; + task->ipp = ipp; + + /* some defaults */ + task->src.rect.w = task->dst.rect.w = UINT_MAX; + task->src.rect.h = task->dst.rect.h = UINT_MAX; + task->transform.rotation = DRM_MODE_ROTATE_0; + + DRM_DEBUG_DRIVER("Allocated task %pK\n", task); + + return task; +} + +static const struct exynos_drm_param_map { + unsigned int id; + unsigned int size; + unsigned int offset; +} exynos_drm_ipp_params_maps[] = { + { + DRM_EXYNOS_IPP_TASK_BUFFER | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, src.buf), + }, { + DRM_EXYNOS_IPP_TASK_BUFFER | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, dst.buf), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, src.rect), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, dst.rect), + }, { + DRM_EXYNOS_IPP_TASK_TRANSFORM, + sizeof(struct drm_exynos_ipp_task_transform), + offsetof(struct exynos_drm_ipp_task, transform), + }, { + DRM_EXYNOS_IPP_TASK_ALPHA, + sizeof(struct drm_exynos_ipp_task_alpha), + offsetof(struct exynos_drm_ipp_task, alpha), + }, +}; + +static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, + struct drm_exynos_ioctl_ipp_commit *arg) +{ + const struct exynos_drm_param_map *map = exynos_drm_ipp_params_maps; + void __user *params = (void __user *)(unsigned long)arg->params_ptr; + unsigned int size = arg->params_size; + uint32_t id; + int i; + + while (size) { + if (get_user(id, (uint32_t __user *)params)) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(exynos_drm_ipp_params_maps); i++) + if (map[i].id == id) + break; + if (i == ARRAY_SIZE(exynos_drm_ipp_params_maps) || + map[i].size > size) + return -EINVAL; + + if (copy_from_user((void *)task + map[i].offset, params, + map[i].size)) + return -EFAULT; + + params += map[i].size; + size -= map[i].size; + } + + DRM_DEBUG_DRIVER("Got task %pK configuration from userspace\n", task); + return 0; +} + +static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf, + struct drm_file *filp) +{ + int ret = 0; + int i; + + /* basic checks */ + if (buf->buf.width == 0 || buf->buf.height == 0) + return -EINVAL; + buf->format = drm_format_info(buf->buf.fourcc); + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int width = (i == 0) ? buf->buf.width : + DIV_ROUND_UP(buf->buf.width, buf->format->hsub); + + if (buf->buf.pitch[i] == 0) + buf->buf.pitch[i] = width * buf->format->cpp[i]; + if (buf->buf.pitch[i] < width * buf->format->cpp[i]) + return -EINVAL; + if (!buf->buf.gem_id[i]) + return -ENOENT; + } + + /* pitch for additional planes must match */ + if (buf->format->num_planes > 2 && + buf->buf.pitch[1] != buf->buf.pitch[2]) + return -EINVAL; + + /* get GEM buffers and check their size */ + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int height = (i == 0) ? buf->buf.height : + DIV_ROUND_UP(buf->buf.height, buf->format->vsub); + unsigned long size = height * buf->buf.pitch[i]; + struct drm_gem_object *obj = drm_gem_object_lookup(filp, + buf->buf.gem_id[i]); + if (!obj) { + ret = -ENOENT; + goto gem_free; + } + buf->exynos_gem[i] = to_exynos_gem(obj); + + if (size + buf->buf.offset[i] > buf->exynos_gem[i]->size) { + i++; + ret = -EINVAL; + goto gem_free; + } + buf->dma_addr[i] = buf->exynos_gem[i]->dma_addr + + buf->buf.offset[i]; + } + + return 0; +gem_free: + while (i--) { + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); + buf->exynos_gem[i] = NULL; + } + return ret; +} + +static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) +{ + int i; + + if (!buf->exynos_gem[0]) + return; + for (i = 0; i < buf->format->num_planes; i++) + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); +} + +static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + DRM_DEBUG_DRIVER("Freeing task %pK\n", task); + + exynos_drm_ipp_task_release_buf(&task->src); + exynos_drm_ipp_task_release_buf(&task->dst); + if (task->event) + drm_event_cancel_free(ipp->dev, &task->event->base); + kfree(task); +} + +struct drm_ipp_limit { + struct drm_exynos_ipp_limit_val h; + struct drm_exynos_ipp_limit_val v; +}; + +enum drm_ipp_size_id { + IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX +}; + +static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = { + [IPP_LIMIT_BUFFER] = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_AREA] = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_ROTATED] = { DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED, + DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, +}; + +static inline void __limit_set_val(unsigned int *ptr, unsigned int val) +{ + if (!*ptr) + *ptr = val; +} + +static void __get_size_limit(const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, enum drm_ipp_size_id id, + struct drm_ipp_limit *res) +{ + const struct drm_exynos_ipp_limit *l = limits; + int i = 0; + + memset(res, 0, sizeof(*res)); + for (i = 0; limit_id_fallback[id][i]; i++) + for (l = limits; l - limits < num_limits; l++) { + if (((l->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) != + DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE) || + ((l->type & DRM_EXYNOS_IPP_LIMIT_SIZE_MASK) != + limit_id_fallback[id][i])) + continue; + __limit_set_val(&res->h.min, l->h.min); + __limit_set_val(&res->h.max, l->h.max); + __limit_set_val(&res->h.align, l->h.align); + __limit_set_val(&res->v.min, l->v.min); + __limit_set_val(&res->v.max, l->v.max); + __limit_set_val(&res->v.align, l->v.align); + } +} + +static inline bool __align_check(unsigned int val, unsigned int align) +{ + if (align && (val & (align - 1))) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (align %d)\n", + val, align); + return false; + } + return true; +} + +static inline bool __size_limit_check(unsigned int val, + struct drm_exynos_ipp_limit_val *l) +{ + if ((l->min && val < l->min) || (l->max && val > l->max)) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (min %d, max %d)\n", + val, l->min, l->max); + return false; + } + return __align_check(val, l->align); +} + +static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf, + const struct drm_exynos_ipp_limit *limits, unsigned int num_limits, + bool rotate, bool swap) +{ + enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA; + struct drm_ipp_limit l; + struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v; + + if (!limits) + return 0; + + __get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l); + if (!__size_limit_check(buf->buf.width, &l.h) || + !__size_limit_check(buf->buf.height, &l.v)) + return -EINVAL; + + if (swap) { + lv = &l.h; + lh = &l.v; + } + __get_size_limit(limits, num_limits, id, &l); + if (!__size_limit_check(buf->rect.w, lh) || + !__align_check(buf->rect.x, lh->align) || + !__size_limit_check(buf->rect.h, lv) || + !__align_check(buf->rect.y, lv->align)) + return -EINVAL; + + return 0; +} + +static inline bool __scale_limit_check(unsigned int src, unsigned int dst, + unsigned int min, unsigned int max) +{ + if ((max && (dst << 16) > src * max) || + (min && (dst << 16) < src * min)) { + DRM_DEBUG_DRIVER("Scale from %d to %d exceeds HW limits (ratio min %d.%05d, max %d.%05d)\n", + src, dst, + min >> 16, 100000 * (min & 0xffff) / (1 << 16), + max >> 16, 100000 * (max & 0xffff) / (1 << 16)); + return false; + } + return true; +} + +static int exynos_drm_ipp_check_scale_limits( + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst, + const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, bool swap) +{ + const struct drm_exynos_ipp_limit_val *lh, *lv; + int dw, dh; + + for (; num_limits; limits++, num_limits--) + if ((limits->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) == + DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE) + break; + if (!num_limits) + return 0; + + lh = (!swap) ? &limits->h : &limits->v; + lv = (!swap) ? &limits->v : &limits->h; + dw = (!swap) ? dst->w : dst->h; + dh = (!swap) ? dst->h : dst->w; + + if (!__scale_limit_check(src->w, dw, lh->min, lh->max) || + !__scale_limit_check(src->h, dh, lv->min, lv->max)) + return -EINVAL; + + return 0; +} + +static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) +{ + struct exynos_drm_ipp *ipp = task->ipp; + const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt; + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + unsigned int rotation = task->transform.rotation; + int ret = 0; + bool swap = drm_rotation_90_or_270(rotation); + bool rotate = (rotation != DRM_MODE_ROTATE_0); + bool scale = false; + + DRM_DEBUG_DRIVER("Checking task %pK\n", task); + + if (src->rect.w == UINT_MAX) + src->rect.w = src->buf.width; + if (src->rect.h == UINT_MAX) + src->rect.h = src->buf.height; + if (dst->rect.w == UINT_MAX) + dst->rect.w = dst->buf.width; + if (dst->rect.h == UINT_MAX) + dst->rect.h = dst->buf.height; + + if (src->rect.x + src->rect.w > (src->buf.width) || + src->rect.y + src->rect.h > (src->buf.height) || + dst->rect.x + dst->rect.w > (dst->buf.width) || + dst->rect.y + dst->rect.h > (dst->buf.height)) { + DRM_DEBUG_DRIVER("Task %pK: defined area is outside provided buffers\n", + task); + return -EINVAL; + } + + if ((!swap && (src->rect.w != dst->rect.w || + src->rect.h != dst->rect.h)) || + (swap && (src->rect.w != dst->rect.h || + src->rect.h != dst->rect.w))) + scale = true; + + if ((!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CROP) && + (src->rect.x || src->rect.y || dst->rect.x || dst->rect.y)) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_ROTATE) && rotate) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && + src->buf.fourcc != dst->buf.fourcc)) { + DRM_DEBUG_DRIVER("Task %pK: hw capabilities exceeded\n", task); + return -EINVAL; + } + + src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_SOURCE); + if (!src_fmt) { + DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits, + src_fmt->num_limits, + rotate, false); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + src_fmt->limits, + src_fmt->num_limits, swap); + if (ret) + return ret; + + dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_DESTINATION); + if (!dst_fmt) { + DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits, + dst_fmt->num_limits, + false, swap); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + dst_fmt->limits, + dst_fmt->num_limits, swap); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Task %pK: all checks done.\n", task); + + return ret; +} + +static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, + struct drm_file *filp) +{ + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + int ret = 0; + + DRM_DEBUG_DRIVER("Setting buffer for task %pK\n", task); + + ret = exynos_drm_ipp_task_setup_buffer(src, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: src buffer setup failed\n", task); + return ret; + } + ret = exynos_drm_ipp_task_setup_buffer(dst, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: dst buffer setup failed\n", task); + return ret; + } + + DRM_DEBUG_DRIVER("Task %pK: buffers prepared.\n", task); + + return ret; +} + + +static int exynos_drm_ipp_event_create(struct exynos_drm_ipp_task *task, + struct drm_file *file_priv, uint64_t user_data) +{ + struct drm_pending_exynos_ipp_event *e = NULL; + int ret; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->event.base.type = DRM_EXYNOS_IPP_EVENT; + e->event.base.length = sizeof(e->event); + e->event.user_data = user_data; + + ret = drm_event_reserve_init(task->dev, file_priv, &e->base, + &e->event.base); + if (ret) + goto free; + + task->event = e; + return 0; +free: + kfree(e); + return ret; +} + +static void exynos_drm_ipp_event_send(struct exynos_drm_ipp_task *task) +{ + struct timespec64 now; + + ktime_get_ts64(&now); + task->event->event.tv_sec = now.tv_sec; + task->event->event.tv_usec = now.tv_nsec / NSEC_PER_USEC; + task->event->event.sequence = atomic_inc_return(&task->ipp->sequence); + + drm_send_event(task->dev, &task->event->base); +} + +static int exynos_drm_ipp_task_cleanup(struct exynos_drm_ipp_task *task) +{ + int ret = task->ret; + + if (ret == 0 && task->event) { + exynos_drm_ipp_event_send(task); + /* ensure event won't be canceled on task free */ + task->event = NULL; + } + + exynos_drm_ipp_task_free(task->ipp, task); + return ret; +} + +static void exynos_drm_ipp_cleanup_work(struct work_struct *work) +{ + struct exynos_drm_ipp_task *task = container_of(work, + struct exynos_drm_ipp_task, cleanup_work); + + exynos_drm_ipp_task_cleanup(task); +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp); + +/** + * exynos_drm_ipp_task_done - finish given task and set return code + * @task: ipp task to finish + * @ret: error code or 0 if operation has been performed successfully + */ +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) +{ + struct exynos_drm_ipp *ipp = task->ipp; + unsigned long flags; + + DRM_DEBUG_DRIVER("ipp: %d, task %pK done: %d\n", ipp->id, task, ret); + + spin_lock_irqsave(&ipp->lock, flags); + if (ipp->task == task) + ipp->task = NULL; + task->flags |= DRM_EXYNOS_IPP_TASK_DONE; + task->ret = ret; + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); + wake_up(&ipp->done_wq); + + if (task->flags & DRM_EXYNOS_IPP_TASK_ASYNC) { + INIT_WORK(&task->cleanup_work, exynos_drm_ipp_cleanup_work); + schedule_work(&task->cleanup_work); + } +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + unsigned long flags; + int ret; + + DRM_DEBUG_DRIVER("ipp: %d, try to run new task\n", ipp->id); + + spin_lock_irqsave(&ipp->lock, flags); + + if (ipp->task || list_empty(&ipp->todo_list)) { + spin_unlock_irqrestore(&ipp->lock, flags); + return; + } + + task = list_first_entry(&ipp->todo_list, struct exynos_drm_ipp_task, + head); + list_del_init(&task->head); + ipp->task = task; + + spin_unlock_irqrestore(&ipp->lock, flags); + + DRM_DEBUG_DRIVER("ipp: %d, selected task %pK to run\n", ipp->id, task); + + ret = ipp->funcs->commit(ipp, task); + if (ret) + exynos_drm_ipp_task_done(task, ret); +} + +static void exynos_drm_ipp_schedule_task(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + list_add(&task->head, &ipp->todo_list); + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); +} + +static void exynos_drm_ipp_task_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + if (task->flags & DRM_EXYNOS_IPP_TASK_DONE) { + /* already completed task */ + exynos_drm_ipp_task_cleanup(task); + } else if (ipp->task != task) { + /* task has not been scheduled for execution yet */ + list_del_init(&task->head); + exynos_drm_ipp_task_cleanup(task); + } else { + /* + * currently processed task, call abort() and perform + * cleanup with async worker + */ + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + spin_unlock_irqrestore(&ipp->lock, flags); + if (ipp->funcs->abort) + ipp->funcs->abort(ipp, task); + return; + } + spin_unlock_irqrestore(&ipp->lock, flags); +} + +/** + * exynos_drm_ipp_commit_ioctl - perform image processing operation + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a ipp task from the set of properties provided from the user + * and try to schedule it to framebuffer processor hardware. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_commit *arg = data; + struct exynos_drm_ipp *ipp; + struct exynos_drm_ipp_task *task; + int ret = 0; + + if ((arg->flags & ~DRM_EXYNOS_IPP_FLAGS) || arg->reserved) + return -EINVAL; + + /* can't test and expect an event at the same time */ + if ((arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) && + (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT)) + return -EINVAL; + + ipp = __ipp_get(arg->ipp_id); + if (!ipp) + return -ENOENT; + + task = exynos_drm_ipp_task_alloc(ipp); + if (!task) + return -ENOMEM; + + ret = exynos_drm_ipp_task_set(task, arg); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_check(task); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_setup_buffers(task, file_priv); + if (ret || arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) + goto free; + + if (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT) { + ret = exynos_drm_ipp_event_create(task, file_priv, + arg->user_data); + if (ret) + goto free; + } + + /* + * Queue task for processing on the hardware. task object will be + * then freed after exynos_drm_ipp_task_done() + */ + if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { + DRM_DEBUG_DRIVER("ipp: %d, nonblocking processing task %pK\n", + ipp->id, task); + + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + exynos_drm_ipp_schedule_task(task->ipp, task); + ret = 0; + } else { + DRM_DEBUG_DRIVER("ipp: %d, processing task %pK\n", ipp->id, + task); + exynos_drm_ipp_schedule_task(ipp, task); + ret = wait_event_interruptible(ipp->done_wq, + task->flags & DRM_EXYNOS_IPP_TASK_DONE); + if (ret) + exynos_drm_ipp_task_abort(ipp, task); + else + ret = exynos_drm_ipp_task_cleanup(task); + } + return ret; +free: + exynos_drm_ipp_task_free(ipp, task); + + return ret; +} diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h new file mode 100644 index 000000000000..0b27d4a9bf94 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef _EXYNOS_DRM_IPP_H_ +#define _EXYNOS_DRM_IPP_H_ + +#include <drm/drmP.h> + +struct exynos_drm_ipp; +struct exynos_drm_ipp_task; + +/** + * struct exynos_drm_ipp_funcs - exynos_drm_ipp control functions + */ +struct exynos_drm_ipp_funcs { + /** + * @commit: + * + * This is the main entry point to start framebuffer processing + * in the hardware. The exynos_drm_ipp_task has been already validated. + * This function must not wait until the device finishes processing. + * When the driver finishes processing, it has to call + * exynos_exynos_drm_ipp_task_done() function. + * + * RETURNS: + * + * 0 on success or negative error codes in case of failure. + */ + int (*commit)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); + + /** + * @abort: + * + * Informs the driver that it has to abort the currently running + * task as soon as possible (i.e. as soon as it can stop the device + * safely), even if the task would not have been finished by then. + * After the driver performs the necessary steps, it has to call + * exynos_drm_ipp_task_done() (as if the task ended normally). + * This function does not have to (and will usually not) wait + * until the device enters a state when it can be stopped. + */ + void (*abort)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); +}; + +/** + * struct exynos_drm_ipp - central picture processor module structure + */ +struct exynos_drm_ipp { + struct drm_device *dev; + struct list_head head; + unsigned int id; + + const char *name; + const struct exynos_drm_ipp_funcs *funcs; + unsigned int capabilities; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + atomic_t sequence; + + spinlock_t lock; + struct exynos_drm_ipp_task *task; + struct list_head todo_list; + wait_queue_head_t done_wq; +}; + +struct exynos_drm_ipp_buffer { + struct drm_exynos_ipp_task_buffer buf; + struct drm_exynos_ipp_task_rect rect; + + struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; + const struct drm_format_info *format; + dma_addr_t dma_addr[MAX_FB_BUFFER]; +}; + +/** + * struct exynos_drm_ipp_task - a structure describing transformation that + * has to be performed by the picture processor hardware module + */ +struct exynos_drm_ipp_task { + struct drm_device *dev; + struct exynos_drm_ipp *ipp; + struct list_head head; + + struct exynos_drm_ipp_buffer src; + struct exynos_drm_ipp_buffer dst; + + struct drm_exynos_ipp_task_transform transform; + struct drm_exynos_ipp_task_alpha alpha; + + struct work_struct cleanup_work; + unsigned int flags; + int ret; + + struct drm_pending_exynos_ipp_event *event; +}; + +#define DRM_EXYNOS_IPP_TASK_DONE (1 << 0) +#define DRM_EXYNOS_IPP_TASK_ASYNC (1 << 1) + +struct exynos_drm_ipp_formats { + uint32_t fourcc; + uint32_t type; + uint64_t modifier; + const struct drm_exynos_ipp_limit *limits; + unsigned int num_limits; +}; + +/* helper macros to set exynos_drm_ipp_formats structure and limits*/ +#define IPP_SRCDST_MFORMAT(f, m, l) \ + .fourcc = DRM_FORMAT_##f, .modifier = m, .limits = l, \ + .num_limits = ARRAY_SIZE(l), \ + .type = (DRM_EXYNOS_IPP_FORMAT_SOURCE | \ + DRM_EXYNOS_IPP_FORMAT_DESTINATION) + +#define IPP_SRCDST_FORMAT(f, l) IPP_SRCDST_MFORMAT(f, 0, l) + +#define IPP_SIZE_LIMIT(l, val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE | \ + DRM_EXYNOS_IPP_LIMIT_SIZE_##l), val + +#define IPP_SCALE_LIMIT(val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE), val + +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name); +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp); + +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret); + +#ifdef CONFIG_DRM_EXYNOS_IPP +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); +#else +static inline int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + + resp->count_ipps = 0; + return 0; +} +static inline int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +#endif +#endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 79282a820ecc..1a76dd3d52e1 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/err.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -22,29 +23,18 @@ #include <drm/exynos_drm.h> #include "regs-rotator.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" /* * Rotator supports image crop/rotator and input/output DMA operations. * input DMA reads image data from the memory. * output DMA writes image data to memory. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> Rotator H/W ----> Memory. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. need to add supported list in prop_list. - */ +#define ROTATOR_AUTOSUSPEND_DELAY 2000 -#define get_rot_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct rot_context, ippdrv); -#define rot_read(offset) readl(rot->regs + (offset)) +#define rot_read(offset) readl(rot->regs + (offset)) #define rot_write(cfg, offset) writel(cfg, rot->regs + (offset)) enum rot_irq_status { @@ -52,54 +42,28 @@ enum rot_irq_status { ROT_IRQ_STATUS_ILLEGAL = 9, }; -/* - * A structure of limitation. - * - * @min_w: minimum width. - * @min_h: minimum height. - * @max_w: maximum width. - * @max_h: maximum height. - * @align: align size. - */ -struct rot_limit { - u32 min_w; - u32 min_h; - u32 max_w; - u32 max_h; - u32 align; -}; - -/* - * A structure of limitation table. - * - * @ycbcr420_2p: case of YUV. - * @rgb888: case of RGB. - */ -struct rot_limit_table { - struct rot_limit ycbcr420_2p; - struct rot_limit rgb888; +struct rot_variant { + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; }; /* * A structure of rotator context. * @ippdrv: prepare initialization using ippdrv. - * @regs_res: register resources. * @regs: memory mapped io registers. * @clock: rotator gate clock. * @limit_tbl: limitation of rotator. * @irq: irq number. - * @cur_buf_id: current operation buffer id. - * @suspended: suspended state. */ struct rot_context { - struct exynos_drm_ippdrv ippdrv; - struct resource *regs_res; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; void __iomem *regs; struct clk *clock; - struct rot_limit_table *limit_tbl; - int irq; - int cur_buf_id[EXYNOS_DRM_OPS_MAX]; - bool suspended; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct exynos_drm_ipp_task *task; }; static void rotator_reg_set_irq(struct rot_context *rot, bool enable) @@ -114,15 +78,6 @@ static void rotator_reg_set_irq(struct rot_context *rot, bool enable) rot_write(val, ROT_CONFIG); } -static u32 rotator_reg_get_fmt(struct rot_context *rot) -{ - u32 val = rot_read(ROT_CONTROL); - - val &= ROT_CONTROL_FMT_MASK; - - return val; -} - static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) { u32 val = rot_read(ROT_STATUS); @@ -138,9 +93,6 @@ static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) static irqreturn_t rotator_irq_handler(int irq, void *arg) { struct rot_context *rot = arg; - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = c_node->event_work; enum rot_irq_status irq_status; u32 val; @@ -152,56 +104,21 @@ static irqreturn_t rotator_irq_handler(int irq, void *arg) val |= ROT_STATUS_IRQ_PENDING((u32)irq_status); rot_write(val, ROT_STATUS); - if (irq_status == ROT_IRQ_STATUS_COMPLETE) { - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - rot->cur_buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } else { - DRM_ERROR("the SFR is set illegally\n"); + if (rot->task) { + struct exynos_drm_ipp_task *task = rot->task; + + rot->task = NULL; + pm_runtime_mark_last_busy(rot->dev); + pm_runtime_put_autosuspend(rot->dev); + exynos_drm_ipp_task_done(task, + irq_status == ROT_IRQ_STATUS_COMPLETE ? 0 : -EINVAL); } return IRQ_HANDLED; } -static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize, - u32 *vsize) +static void rotator_src_set_fmt(struct rot_context *rot, u32 fmt) { - struct rot_limit_table *limit_tbl = rot->limit_tbl; - struct rot_limit *limit; - u32 mask, val; - - /* Get size limit */ - if (fmt == ROT_CONTROL_FMT_RGB888) - limit = &limit_tbl->rgb888; - else - limit = &limit_tbl->ycbcr420_2p; - - /* Get mask for rounding to nearest aligned val */ - mask = ~((1 << limit->align) - 1); - - /* Set aligned width */ - val = ROT_ALIGN(*hsize, limit->align, mask); - if (val < limit->min_w) - *hsize = ROT_MIN(limit->min_w, mask); - else if (val > limit->max_w) - *hsize = ROT_MAX(limit->max_w, mask); - else - *hsize = val; - - /* Set aligned height */ - val = ROT_ALIGN(*vsize, limit->align, mask); - if (val < limit->min_h) - *vsize = ROT_MIN(limit->min_h, mask); - else if (val > limit->max_h) - *vsize = ROT_MAX(limit->max_h, mask); - else - *vsize = val; -} - -static int rotator_src_set_fmt(struct device *dev, u32 fmt) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; val = rot_read(ROT_CONTROL); @@ -214,515 +131,176 @@ static int rotator_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_XRGB8888: val |= ROT_CONTROL_FMT_RGB888; break; - default: - DRM_ERROR("invalid image format\n"); - return -EINVAL; } rot_write(val, ROT_CONTROL); - - return 0; } -static inline bool rotator_check_reg_fmt(u32 fmt) +static void rotator_src_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) || - (fmt == ROT_CONTROL_FMT_RGB888)) - return true; - - return false; -} - -static int rotator_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) -{ - struct rot_context *rot = dev_get_drvdata(dev); - u32 fmt, hsize, vsize; u32 val; - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); - /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_SRC_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_SRC_CROP_POS); - val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w); + val = ROT_SRC_CROP_SIZE_H(buf->rect.h) | + ROT_SRC_CROP_SIZE_W(buf->rect.w); rot_write(val, ROT_SRC_CROP_SIZE); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_SRC_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_SRC_BUF_ADDR(1)); } -static int rotator_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_dst_set_transf(struct rot_context *rot, + unsigned int rotation) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - - val = rot_read(ROT_SRC_BUF_SIZE); - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_SRC_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_SRC_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } - - return 0; -} - -static int rotator_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; /* Set transform configuration */ val = rot_read(ROT_CONTROL); val &= ~ROT_CONTROL_FLIP_MASK; - switch (flip) { - case EXYNOS_DRM_FLIP_VERTICAL: - val |= ROT_CONTROL_FLIP_VERTICAL; - break; - case EXYNOS_DRM_FLIP_HORIZONTAL: + if (rotation & DRM_MODE_REFLECT_X) val |= ROT_CONTROL_FLIP_HORIZONTAL; - break; - default: - /* Flip None */ - break; - } + if (rotation & DRM_MODE_REFLECT_Y) + val |= ROT_CONTROL_FLIP_VERTICAL; val &= ~ROT_CONTROL_ROT_MASK; - switch (degree) { - case EXYNOS_DRM_DEGREE_90: + if (rotation & DRM_MODE_ROTATE_90) val |= ROT_CONTROL_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: + else if (rotation & DRM_MODE_ROTATE_180) val |= ROT_CONTROL_ROT_180; - break; - case EXYNOS_DRM_DEGREE_270: + else if (rotation & DRM_MODE_ROTATE_270) val |= ROT_CONTROL_ROT_270; - break; - default: - /* Rotation 0 Degree */ - break; - } rot_write(val, ROT_CONTROL); - - /* Check degree for setting buffer size swap */ - if ((degree == EXYNOS_DRM_DEGREE_90) || - (degree == EXYNOS_DRM_DEGREE_270)) - *swap = true; - else - *swap = false; - - return 0; } -static int rotator_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) +static void rotator_dst_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - struct rot_context *rot = dev_get_drvdata(dev); - u32 val, fmt, hsize, vsize; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); + u32 val; /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_DST_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_DST_CROP_POS); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_DST_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_DST_BUF_ADDR(1)); } -static int rotator_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_start(struct rot_context *rot) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - /* Get buf size */ - val = rot_read(ROT_DST_BUF_SIZE); - - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_DST_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_DST_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } + u32 val; - return 0; + /* Set interrupt enable */ + rotator_reg_set_irq(rot, true); + + val = rot_read(ROT_CONTROL); + val |= ROT_CONTROL_START; + rot_write(val, ROT_CONTROL); } -static struct exynos_drm_ipp_ops rot_src_ops = { - .set_fmt = rotator_src_set_fmt, - .set_size = rotator_src_set_size, - .set_addr = rotator_src_set_addr, -}; +static int rotator_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct rot_context *rot = + container_of(ipp, struct rot_context, ipp); -static struct exynos_drm_ipp_ops rot_dst_ops = { - .set_transf = rotator_dst_set_transf, - .set_size = rotator_dst_set_size, - .set_addr = rotator_dst_set_addr, -}; + pm_runtime_get_sync(rot->dev); + rot->task = task; -static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 0; - prop_list->crop = 0; - prop_list->scale = 0; + rotator_src_set_fmt(rot, task->src.buf.fourcc); + rotator_src_set_buf(rot, &task->src); + rotator_dst_set_transf(rot, task->transform.rotation); + rotator_dst_set_buf(rot, &task->dst); + rotator_start(rot); return 0; } -static inline bool rotator_check_drm_fmt(u32 fmt) -{ - switch (fmt) { - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_NV12: - return true; - default: - DRM_DEBUG_KMS("not support format\n"); - return false; - } -} - -static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} +static const struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = rotator_commit, +}; -static int rotator_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) +static int rotator_bind(struct device *dev, struct device *master, void *data) { - struct drm_exynos_ipp_config *src_config = - &property->config[EXYNOS_DRM_OPS_SRC]; - struct drm_exynos_ipp_config *dst_config = - &property->config[EXYNOS_DRM_OPS_DST]; - struct drm_exynos_pos *src_pos = &src_config->pos; - struct drm_exynos_pos *dst_pos = &dst_config->pos; - struct drm_exynos_sz *src_sz = &src_config->sz; - struct drm_exynos_sz *dst_sz = &dst_config->sz; - bool swap = false; - - /* Check format configuration */ - if (src_config->fmt != dst_config->fmt) { - DRM_DEBUG_KMS("not support csc feature\n"); - return -EINVAL; - } - - if (!rotator_check_drm_fmt(dst_config->fmt)) { - DRM_DEBUG_KMS("invalid format\n"); - return -EINVAL; - } - - /* Check transform configuration */ - if (src_config->degree != EXYNOS_DRM_DEGREE_0) { - DRM_DEBUG_KMS("not support source-side rotation\n"); - return -EINVAL; - } - - switch (dst_config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - /* No problem */ - break; - default: - DRM_DEBUG_KMS("invalid degree\n"); - return -EINVAL; - } - - if (src_config->flip != EXYNOS_DRM_FLIP_NONE) { - DRM_DEBUG_KMS("not support source-side flip\n"); - return -EINVAL; - } + struct rot_context *rot = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - if (!rotator_check_drm_flip(dst_config->flip)) { - DRM_DEBUG_KMS("invalid flip\n"); - return -EINVAL; - } + rot->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); - /* Check size configuration */ - if ((src_pos->x + src_pos->w > src_sz->hsize) || - (src_pos->y + src_pos->h > src_sz->vsize)) { - DRM_DEBUG_KMS("out of source buffer bound\n"); - return -EINVAL; - } + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE, + rot->formats, rot->num_formats, "rotator"); - if (swap) { - if ((dst_pos->x + dst_pos->h > dst_sz->vsize) || - (dst_pos->y + dst_pos->w > dst_sz->hsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } else { - if ((dst_pos->x + dst_pos->w > dst_sz->hsize) || - (dst_pos->y + dst_pos->h > dst_sz->vsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } + dev_info(dev, "The exynos rotator has been probed successfully\n"); return 0; } -static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void rotator_unbind(struct device *dev, struct device *master, + void *data) { struct rot_context *rot = dev_get_drvdata(dev); - u32 val; - - if (rot->suspended) { - DRM_ERROR("suspended state\n"); - return -EPERM; - } - - if (cmd != IPP_CMD_M2M) { - DRM_ERROR("not support cmd: %d\n", cmd); - return -EINVAL; - } - - /* Set interrupt enable */ - rotator_reg_set_irq(rot, true); - - val = rot_read(ROT_CONTROL); - val |= ROT_CONTROL_START; - - rot_write(val, ROT_CONTROL); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - return 0; + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(rot->drm_dev, rot->dev); } -static struct rot_limit_table rot_limit_tbl_4210 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_64K, - .max_h = SZ_64K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_16K, - .max_h = SZ_16K, - .align = 2, - }, -}; - -static struct rot_limit_table rot_limit_tbl_4x12 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 2, - }, +static const struct component_ops rotator_component_ops = { + .bind = rotator_bind, + .unbind = rotator_unbind, }; -static struct rot_limit_table rot_limit_tbl_5250 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 1, - }, -}; - -static const struct of_device_id exynos_rotator_match[] = { - { - .compatible = "samsung,exynos4210-rotator", - .data = &rot_limit_tbl_4210, - }, - { - .compatible = "samsung,exynos4212-rotator", - .data = &rot_limit_tbl_4x12, - }, - { - .compatible = "samsung,exynos5250-rotator", - .data = &rot_limit_tbl_5250, - }, - {}, -}; -MODULE_DEVICE_TABLE(of, exynos_rotator_match); - static int rotator_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct resource *regs_res; struct rot_context *rot; - struct exynos_drm_ippdrv *ippdrv; + const struct rot_variant *variant; + int irq; int ret; - if (!dev->of_node) { - dev_err(dev, "cannot find of_node.\n"); - return -ENODEV; - } - rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL); if (!rot) return -ENOMEM; - rot->limit_tbl = (struct rot_limit_table *) - of_device_get_match_data(dev); - rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rot->regs = devm_ioremap_resource(dev, rot->regs_res); + variant = of_device_get_match_data(dev); + rot->formats = variant->formats; + rot->num_formats = variant->num_formats; + rot->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rot->regs = devm_ioremap_resource(dev, regs_res); if (IS_ERR(rot->regs)) return PTR_ERR(rot->regs); - rot->irq = platform_get_irq(pdev, 0); - if (rot->irq < 0) { + irq = platform_get_irq(pdev, 0); + if (irq < 0) { dev_err(dev, "failed to get irq\n"); - return rot->irq; + return irq; } - ret = devm_request_threaded_irq(dev, rot->irq, NULL, - rotator_irq_handler, IRQF_ONESHOT, "drm_rotator", rot); + ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev), + rot); if (ret < 0) { dev_err(dev, "failed to request irq\n"); return ret; @@ -734,35 +312,19 @@ static int rotator_probe(struct platform_device *pdev) return PTR_ERR(rot->clock); } + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, ROTATOR_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - - ippdrv = &rot->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops; - ippdrv->check_property = rotator_ippdrv_check_property; - ippdrv->start = rotator_ippdrv_start; - ret = rotator_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_ippdrv_register; - } - - DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv); - platform_set_drvdata(pdev, rot); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm rotator device\n"); - goto err_ippdrv_register; - } - - dev_info(dev, "The exynos rotator is probed successfully\n"); + ret = component_add(dev, &rotator_component_ops); + if (ret) + goto err_component; return 0; -err_ippdrv_register: +err_component: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -770,45 +332,101 @@ err_ippdrv_register: static int rotator_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct rot_context *rot = dev_get_drvdata(dev); - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &rotator_component_ops); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int rotator_clk_crtl(struct rot_context *rot, bool enable) -{ - if (enable) { - clk_prepare_enable(rot->clock); - rot->suspended = false; - } else { - clk_disable_unprepare(rot->clock); - rot->suspended = true; - } - - return 0; -} - static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, false); + clk_disable_unprepare(rot->clock); + return 0; } static int rotator_runtime_resume(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, true); + return clk_prepare_enable(rot->clock); } #endif +static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_32K }, .v = { 32, SZ_32K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4210_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4412_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4412_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_5250_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_5250_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct rot_variant rotator_4210_data = { + .formats = rotator_4210_formats, + .num_formats = ARRAY_SIZE(rotator_4210_formats), +}; + +static const struct rot_variant rotator_4412_data = { + .formats = rotator_4412_formats, + .num_formats = ARRAY_SIZE(rotator_4412_formats), +}; + +static const struct rot_variant rotator_5250_data = { + .formats = rotator_5250_formats, + .num_formats = ARRAY_SIZE(rotator_5250_formats), +}; + +static const struct of_device_id exynos_rotator_match[] = { + { + .compatible = "samsung,exynos4210-rotator", + .data = &rotator_4210_data, + }, { + .compatible = "samsung,exynos4212-rotator", + .data = &rotator_4412_data, + }, { + .compatible = "samsung,exynos5250-rotator", + .data = &rotator_5250_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_rotator_match); + static const struct dev_pm_ops rotator_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) @@ -820,7 +438,7 @@ struct platform_driver rotator_driver = { .probe = rotator_probe, .remove = rotator_remove, .driver = { - .name = "exynos-rot", + .name = "exynos-rotator", .owner = THIS_MODULE, .pm = &rotator_pm_ops, .of_match_table = exynos_rotator_match, diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c new file mode 100644 index 000000000000..63b05b7c846a --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -0,0 +1,694 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Author: + * Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundationr + */ + +#include <linux/kernel.h> +#include <linux/component.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> + +#include <drm/drmP.h> +#include <drm/exynos_drm.h> +#include "regs-scaler.h" +#include "exynos_drm_fb.h" +#include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" +#include "exynos_drm_ipp.h" + +#define scaler_read(offset) readl(scaler->regs + (offset)) +#define scaler_write(cfg, offset) writel(cfg, scaler->regs + (offset)) +#define SCALER_MAX_CLK 4 +#define SCALER_AUTOSUSPEND_DELAY 2000 + +struct scaler_data { + const char *clk_name[SCALER_MAX_CLK]; + unsigned int num_clk; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; +}; + +struct scaler_context { + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + void __iomem *regs; + struct clk *clock[SCALER_MAX_CLK]; + struct exynos_drm_ipp_task *task; + const struct scaler_data *scaler_data; +}; + +static u32 scaler_get_format(u32 drm_fmt) +{ + switch (drm_fmt) { + case DRM_FORMAT_NV21: + return SCALER_YUV420_2P_UV; + case DRM_FORMAT_NV12: + return SCALER_YUV420_2P_VU; + case DRM_FORMAT_YUV420: + return SCALER_YUV420_3P; + case DRM_FORMAT_YUYV: + return SCALER_YUV422_1P_YUYV; + case DRM_FORMAT_UYVY: + return SCALER_YUV422_1P_UYVY; + case DRM_FORMAT_YVYU: + return SCALER_YUV422_1P_YVYU; + case DRM_FORMAT_NV61: + return SCALER_YUV422_2P_UV; + case DRM_FORMAT_NV16: + return SCALER_YUV422_2P_VU; + case DRM_FORMAT_YUV422: + return SCALER_YUV422_3P; + case DRM_FORMAT_NV42: + return SCALER_YUV444_2P_UV; + case DRM_FORMAT_NV24: + return SCALER_YUV444_2P_VU; + case DRM_FORMAT_YUV444: + return SCALER_YUV444_3P; + case DRM_FORMAT_RGB565: + return SCALER_RGB_565; + case DRM_FORMAT_XRGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_ARGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_XRGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_ARGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_XRGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_ARGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_RGBX8888: + return SCALER_RGBA8888; + case DRM_FORMAT_RGBA8888: + return SCALER_RGBA8888; + default: + break; + } + + return 0; +} + +static inline void scaler_enable_int(struct scaler_context *scaler) +{ + u32 val; + + val = SCALER_INT_EN_TIMEOUT | + SCALER_INT_EN_ILLEGAL_BLEND | + SCALER_INT_EN_ILLEGAL_RATIO | + SCALER_INT_EN_ILLEGAL_DST_HEIGHT | + SCALER_INT_EN_ILLEGAL_DST_WIDTH | + SCALER_INT_EN_ILLEGAL_DST_V_POS | + SCALER_INT_EN_ILLEGAL_DST_H_POS | + SCALER_INT_EN_ILLEGAL_DST_C_SPAN | + SCALER_INT_EN_ILLEGAL_DST_Y_SPAN | + SCALER_INT_EN_ILLEGAL_DST_CR_BASE | + SCALER_INT_EN_ILLEGAL_DST_CB_BASE | + SCALER_INT_EN_ILLEGAL_DST_Y_BASE | + SCALER_INT_EN_ILLEGAL_DST_COLOR | + SCALER_INT_EN_ILLEGAL_SRC_HEIGHT | + SCALER_INT_EN_ILLEGAL_SRC_WIDTH | + SCALER_INT_EN_ILLEGAL_SRC_CV_POS | + SCALER_INT_EN_ILLEGAL_SRC_CH_POS | + SCALER_INT_EN_ILLEGAL_SRC_YV_POS | + SCALER_INT_EN_ILLEGAL_SRC_YH_POS | + SCALER_INT_EN_ILLEGAL_DST_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_CR_BASE | + SCALER_INT_EN_ILLEGAL_SRC_CB_BASE | + SCALER_INT_EN_ILLEGAL_SRC_Y_BASE | + SCALER_INT_EN_ILLEGAL_SRC_COLOR | + SCALER_INT_EN_FRAME_END; + scaler_write(val, SCALER_INT_EN); +} + +static inline void scaler_set_src_fmt(struct scaler_context *scaler, + u32 src_fmt) +{ + u32 val; + + val = SCALER_SRC_CFG_SET_COLOR_FORMAT(src_fmt); + scaler_write(val, SCALER_SRC_CFG); +} + +static inline void scaler_set_src_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + static unsigned int bases[] = { + SCALER_SRC_Y_BASE, + SCALER_SRC_CB_BASE, + SCALER_SRC_CR_BASE, + }; + int i; + + for (i = 0; i < src_buf->format->num_planes; ++i) + scaler_write(src_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_src_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + u32 val; + + val = SCALER_SRC_SPAN_SET_Y_SPAN(src_buf->buf.pitch[0] / + src_buf->format->cpp[0]); + + if (src_buf->format->num_planes > 1) + val |= SCALER_SRC_SPAN_SET_C_SPAN(src_buf->buf.pitch[1]); + + scaler_write(val, SCALER_SRC_SPAN); +} + +static inline void scaler_set_src_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_Y_POS_SET_YH_POS(src_pos->x << 2); + val |= SCALER_SRC_Y_POS_SET_YV_POS(src_pos->y << 2); + scaler_write(val, SCALER_SRC_Y_POS); + scaler_write(val, SCALER_SRC_C_POS); /* ATTENTION! */ +} + +static inline void scaler_set_src_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_WH_SET_WIDTH(src_pos->w); + val |= SCALER_SRC_WH_SET_HEIGHT(src_pos->h); + scaler_write(val, SCALER_SRC_WH); +} + +static inline void scaler_set_dst_fmt(struct scaler_context *scaler, + u32 dst_fmt) +{ + u32 val; + + val = SCALER_DST_CFG_SET_COLOR_FORMAT(dst_fmt); + scaler_write(val, SCALER_DST_CFG); +} + +static inline void scaler_set_dst_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + static unsigned int bases[] = { + SCALER_DST_Y_BASE, + SCALER_DST_CB_BASE, + SCALER_DST_CR_BASE, + }; + int i; + + for (i = 0; i < dst_buf->format->num_planes; ++i) + scaler_write(dst_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_dst_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + u32 val; + + val = SCALER_DST_SPAN_SET_Y_SPAN(dst_buf->buf.pitch[0] / + dst_buf->format->cpp[0]); + + if (dst_buf->format->num_planes > 1) + val |= SCALER_DST_SPAN_SET_C_SPAN(dst_buf->buf.pitch[1]); + + scaler_write(val, SCALER_DST_SPAN); +} + +static inline void scaler_set_dst_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_WH_SET_WIDTH(dst_pos->w); + val |= SCALER_DST_WH_SET_HEIGHT(dst_pos->h); + scaler_write(val, SCALER_DST_WH); +} + +static inline void scaler_set_dst_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_POS_SET_H_POS(dst_pos->x); + val |= SCALER_DST_POS_SET_V_POS(dst_pos->y); + scaler_write(val, SCALER_DST_POS); +} + +static inline void scaler_set_hv_ratio(struct scaler_context *scaler, + unsigned int rotation, + struct drm_exynos_ipp_task_rect *src_pos, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val, h_ratio, v_ratio; + + if (drm_rotation_90_or_270(rotation)) { + h_ratio = (src_pos->h << 16) / dst_pos->w; + v_ratio = (src_pos->w << 16) / dst_pos->h; + } else { + h_ratio = (src_pos->w << 16) / dst_pos->w; + v_ratio = (src_pos->h << 16) / dst_pos->h; + } + + val = SCALER_H_RATIO_SET(h_ratio); + scaler_write(val, SCALER_H_RATIO); + + val = SCALER_V_RATIO_SET(v_ratio); + scaler_write(val, SCALER_V_RATIO); +} + +static inline void scaler_set_rotation(struct scaler_context *scaler, + unsigned int rotation) +{ + u32 val = 0; + + if (rotation & DRM_MODE_ROTATE_90) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_90); + else if (rotation & DRM_MODE_ROTATE_180) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_180); + else if (rotation & DRM_MODE_ROTATE_270) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_270); + if (rotation & DRM_MODE_REFLECT_X) + val |= SCALER_ROT_CFG_FLIP_X_EN; + if (rotation & DRM_MODE_REFLECT_Y) + val |= SCALER_ROT_CFG_FLIP_Y_EN; + scaler_write(val, SCALER_ROT_CFG); +} + +static inline void scaler_set_csc(struct scaler_context *scaler, + const struct drm_format_info *fmt) +{ + static const u32 csc_mtx[2][3][3] = { + { /* YCbCr to RGB */ + {0x254, 0x000, 0x331}, + {0x254, 0xf38, 0xe60}, + {0x254, 0x409, 0x000}, + }, + { /* RGB to YCbCr */ + {0x084, 0x102, 0x032}, + {0xfb4, 0xf6b, 0x0e1}, + {0x0e1, 0xf44, 0xfdc}, + }, + }; + int i, j, dir; + + switch (fmt->format) { + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB1555: + case DRM_FORMAT_ARGB1555: + case DRM_FORMAT_XRGB4444: + case DRM_FORMAT_ARGB4444: + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_RGBA8888: + dir = 1; + break; + default: + dir = 0; + } + + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + scaler_write(csc_mtx[dir][i][j], SCALER_CSC_COEF(j, i)); +} + +static inline void scaler_set_timer(struct scaler_context *scaler, + unsigned int timer, unsigned int divider) +{ + u32 val; + + val = SCALER_TIMEOUT_CTRL_TIMER_ENABLE; + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(timer); + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(divider); + scaler_write(val, SCALER_TIMEOUT_CTRL); +} + +static inline void scaler_start_hw(struct scaler_context *scaler) +{ + scaler_write(SCALER_CFG_START_CMD, SCALER_CFG); +} + +static int scaler_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct scaler_context *scaler = + container_of(ipp, struct scaler_context, ipp); + + u32 src_fmt = scaler_get_format(task->src.buf.fourcc); + struct drm_exynos_ipp_task_rect *src_pos = &task->src.rect; + + u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc); + struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect; + + scaler->task = task; + + pm_runtime_get_sync(scaler->dev); + + scaler_set_src_fmt(scaler, src_fmt); + scaler_set_src_base(scaler, &task->src); + scaler_set_src_span(scaler, &task->src); + scaler_set_src_luma_pos(scaler, src_pos); + scaler_set_src_wh(scaler, src_pos); + + scaler_set_dst_fmt(scaler, dst_fmt); + scaler_set_dst_base(scaler, &task->dst); + scaler_set_dst_span(scaler, &task->dst); + scaler_set_dst_luma_pos(scaler, dst_pos); + scaler_set_dst_wh(scaler, dst_pos); + + scaler_set_hv_ratio(scaler, task->transform.rotation, src_pos, dst_pos); + scaler_set_rotation(scaler, task->transform.rotation); + + scaler_set_csc(scaler, task->src.format); + + scaler_set_timer(scaler, 0xffff, 0xf); + + scaler_enable_int(scaler); + scaler_start_hw(scaler); + + return 0; +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = scaler_commit, +}; + +static inline void scaler_disable_int(struct scaler_context *scaler) +{ + scaler_write(0, SCALER_INT_EN); +} + +static inline u32 scaler_get_int_status(struct scaler_context *scaler) +{ + return scaler_read(SCALER_INT_STATUS); +} + +static inline bool scaler_task_done(u32 val) +{ + return val & SCALER_INT_STATUS_FRAME_END ? 0 : -EINVAL; +} + +static irqreturn_t scaler_irq_handler(int irq, void *arg) +{ + struct scaler_context *scaler = arg; + + u32 val = scaler_get_int_status(scaler); + + scaler_disable_int(scaler); + + if (scaler->task) { + struct exynos_drm_ipp_task *task = scaler->task; + + scaler->task = NULL; + pm_runtime_mark_last_busy(scaler->dev); + pm_runtime_put_autosuspend(scaler->dev); + exynos_drm_ipp_task_done(task, scaler_task_done(val)); + } + + return IRQ_HANDLED; +} + +static int scaler_bind(struct device *dev, struct device *master, void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + scaler->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + scaler->scaler_data->formats, + scaler->scaler_data->num_formats, "scaler"); + + dev_info(dev, "The exynos scaler has been probed successfully\n"); + + return 0; +} + +static void scaler_unbind(struct device *dev, struct device *master, + void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(scaler->drm_dev, scaler->dev); +} + +static const struct component_ops scaler_component_ops = { + .bind = scaler_bind, + .unbind = scaler_unbind, +}; + +static int scaler_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *regs_res; + struct scaler_context *scaler; + int irq; + int ret, i; + + scaler = devm_kzalloc(dev, sizeof(*scaler), GFP_KERNEL); + if (!scaler) + return -ENOMEM; + + scaler->scaler_data = + (struct scaler_data *)of_device_get_match_data(dev); + + scaler->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + scaler->regs = devm_ioremap_resource(dev, regs_res); + if (IS_ERR(scaler->regs)) + return PTR_ERR(scaler->regs); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to get irq\n"); + return irq; + } + + ret = devm_request_threaded_irq(dev, irq, NULL, scaler_irq_handler, + IRQF_ONESHOT, "drm_scaler", scaler); + if (ret < 0) { + dev_err(dev, "failed to request irq\n"); + return ret; + } + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) { + scaler->clock[i] = devm_clk_get(dev, + scaler->scaler_data->clk_name[i]); + if (IS_ERR(scaler->clock[i])) { + dev_err(dev, "failed to get clock\n"); + return PTR_ERR(scaler->clock[i]); + } + } + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, SCALER_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); + platform_set_drvdata(pdev, scaler); + + ret = component_add(dev, &scaler_component_ops); + if (ret) + goto err_ippdrv_register; + + return 0; + +err_ippdrv_register: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + return ret; +} + +static int scaler_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &scaler_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + + return 0; +} + +#ifdef CONFIG_PM + +static int clk_disable_unprepare_wrapper(struct clk *clk) +{ + clk_disable_unprepare(clk); + + return 0; +} + +static int scaler_clk_ctrl(struct scaler_context *scaler, bool enable) +{ + int (*clk_fun)(struct clk *clk), i; + + clk_fun = enable ? clk_prepare_enable : clk_disable_unprepare_wrapper; + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) + clk_fun(scaler->clock[i]); + + return 0; +} + +static int scaler_runtime_suspend(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, false); +} + +static int scaler_runtime_resume(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, true); +} +#endif + +static const struct dev_pm_ops scaler_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(scaler_runtime_suspend, scaler_runtime_resume, NULL) +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_hv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_h_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 1) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_one_pixel_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct exynos_drm_ipp_formats exynos5420_formats[] = { + /* SCALER_YUV420_2P_UV */ + { IPP_SRCDST_FORMAT(NV21, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_2P_VU */ + { IPP_SRCDST_FORMAT(NV12, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_3P */ + { IPP_SRCDST_FORMAT(YUV420, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV422_1P_YUYV */ + { IPP_SRCDST_FORMAT(YUYV, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_UYVY */ + { IPP_SRCDST_FORMAT(UYVY, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_YVYU */ + { IPP_SRCDST_FORMAT(YVYU, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_UV */ + { IPP_SRCDST_FORMAT(NV61, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_VU */ + { IPP_SRCDST_FORMAT(NV16, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_3P */ + { IPP_SRCDST_FORMAT(YUV422, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV444_2P_UV */ + { IPP_SRCDST_FORMAT(NV42, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_2P_VU */ + { IPP_SRCDST_FORMAT(NV24, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_3P */ + { IPP_SRCDST_FORMAT(YUV444, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGB_565 */ + { IPP_SRCDST_FORMAT(RGB565, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(XRGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(ARGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(XRGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(ARGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(XRGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(ARGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBX8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBA8888, scaler_5420_one_pixel_limits) }, +}; + +static const struct scaler_data exynos5420_data = { + .clk_name = {"mscl"}, + .num_clk = 1, + .formats = exynos5420_formats, + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct scaler_data exynos5433_data = { + .clk_name = {"pclk", "aclk", "aclk_xiu"}, + .num_clk = 3, + .formats = exynos5420_formats, /* intentional */ + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct of_device_id exynos_scaler_match[] = { + { + .compatible = "samsung,exynos5420-scaler", + .data = &exynos5420_data, + }, { + .compatible = "samsung,exynos5433-scaler", + .data = &exynos5433_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_scaler_match); + +struct platform_driver scaler_driver = { + .probe = scaler_probe, + .remove = scaler_remove, + .driver = { + .name = "exynos-scaler", + .owner = THIS_MODULE, + .pm = &scaler_pm_ops, + .of_match_table = exynos_scaler_match, + }, +}; diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index abd84cbcf1c2..09c4bc0b1859 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder) drm_mode_connector_attach_encoder(connector, encoder); if (hdata->bridge) { - encoder->bridge = hdata->bridge; - hdata->bridge->encoder = encoder; ret = drm_bridge_attach(encoder, hdata->bridge, NULL); if (ret) DRM_ERROR("Failed to attach bridge\n"); diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 257299ec95c4..272c79f5f5bf 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx, chroma_addr[1] = chroma_addr[0] + 0x40; } else { luma_addr[1] = luma_addr[0] + fb->pitches[0]; - chroma_addr[1] = chroma_addr[0] + fb->pitches[0]; + chroma_addr[1] = chroma_addr[0] + fb->pitches[1]; } } else { luma_addr[1] = 0; @@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx, spin_lock_irqsave(&ctx->reg_slock, flags); + vp_reg_write(ctx, VP_SHADOW_UPDATE, 1); /* interlace or progressive scan mode */ val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0); vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP); @@ -495,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx, vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) | VP_IMG_VSIZE(fb->height)); /* chroma plane for NV12/NV21 is half the height of the luma plane */ - vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) | + vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) | VP_IMG_VSIZE(fb->height / 2)); vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w); - vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); vp_reg_write(ctx, VP_SRC_H_POSITION, VP_SRC_H_POSITION_VAL(state->src.x)); - vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); - vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w); vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x); + if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2); } else { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y); } @@ -699,6 +702,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) /* interlace scan need to check shadow register */ if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && + vp_reg_read(ctx, VP_SHADOW_UPDATE)) + goto out; + + base = mixer_reg_read(ctx, MXR_CFG); + shadow = mixer_reg_read(ctx, MXR_CFG_S); + if (base != shadow) + goto out; + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); if (base != shadow) diff --git a/drivers/gpu/drm/exynos/regs-mixer.h b/drivers/gpu/drm/exynos/regs-mixer.h index c311f571bdf9..189cfa2470a8 100644 --- a/drivers/gpu/drm/exynos/regs-mixer.h +++ b/drivers/gpu/drm/exynos/regs-mixer.h @@ -47,6 +47,7 @@ #define MXR_MO 0x0304 #define MXR_RESOLUTION 0x0310 +#define MXR_CFG_S 0x2004 #define MXR_GRAPHIC0_BASE_S 0x2024 #define MXR_GRAPHIC1_BASE_S 0x2044 diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h new file mode 100644 index 000000000000..fc7ccad75e74 --- /dev/null +++ b/drivers/gpu/drm/exynos/regs-scaler.h @@ -0,0 +1,426 @@ +/* drivers/gpu/drm/exynos/regs-scaler.h + * + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * Register definition file for Samsung scaler driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef EXYNOS_REGS_SCALER_H +#define EXYNOS_REGS_SCALER_H + +/* Register part */ + +/* Global setting */ +#define SCALER_STATUS 0x0 /* no shadow */ +#define SCALER_CFG 0x4 + +/* Interrupt */ +#define SCALER_INT_EN 0x8 /* no shadow */ +#define SCALER_INT_STATUS 0xc /* no shadow */ + +/* SRC */ +#define SCALER_SRC_CFG 0x10 +#define SCALER_SRC_Y_BASE 0x14 +#define SCALER_SRC_CB_BASE 0x18 +#define SCALER_SRC_CR_BASE 0x294 +#define SCALER_SRC_SPAN 0x1c +#define SCALER_SRC_Y_POS 0x20 +#define SCALER_SRC_WH 0x24 +#define SCALER_SRC_C_POS 0x28 + +/* DST */ +#define SCALER_DST_CFG 0x30 +#define SCALER_DST_Y_BASE 0x34 +#define SCALER_DST_CB_BASE 0x38 +#define SCALER_DST_CR_BASE 0x298 +#define SCALER_DST_SPAN 0x3c +#define SCALER_DST_WH 0x40 +#define SCALER_DST_POS 0x44 + +/* Ratio */ +#define SCALER_H_RATIO 0x50 +#define SCALER_V_RATIO 0x54 + +/* Rotation */ +#define SCALER_ROT_CFG 0x58 + +/* Coefficient */ +/* + * YHCOEF_{x}{A|B|C|D} CHCOEF_{x}{A|B|C|D} + * + * A B C D A B C D + * 0 60 64 68 6c 140 144 148 14c + * 1 70 74 78 7c 150 154 158 15c + * 2 80 84 88 8c 160 164 168 16c + * 3 90 94 98 9c 170 174 178 17c + * 4 a0 a4 a8 ac 180 184 188 18c + * 5 b0 b4 b8 bc 190 194 198 19c + * 6 c0 c4 c8 cc 1a0 1a4 1a8 1ac + * 7 d0 d4 d8 dc 1b0 1b4 1b8 1bc + * 8 e0 e4 e8 ec 1c0 1c4 1c8 1cc + * + * + * YVCOEF_{x}{A|B} CVCOEF_{x}{A|B} + * + * A B A B + * 0 f0 f4 1d0 1d4 + * 1 f8 fc 1d8 1dc + * 2 100 104 1e0 1e4 + * 3 108 10c 1e8 1ec + * 4 110 114 1f0 1f4 + * 5 118 11c 1f8 1fc + * 6 120 124 200 204 + * 7 128 12c 208 20c + * 8 130 134 210 214 + */ +#define _SCALER_HCOEF_DELTA(r, c) ((r) * 0x10 + (c) * 0x4) +#define _SCALER_VCOEF_DELTA(r, c) ((r) * 0x8 + (c) * 0x4) + +#define SCALER_YHCOEF(r, c) (0x60 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_YVCOEF(r, c) (0xf0 + _SCALER_VCOEF_DELTA((r), (c))) +#define SCALER_CHCOEF(r, c) (0x140 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_CVCOEF(r, c) (0x1d0 + _SCALER_VCOEF_DELTA((r), (c))) + + +/* Color Space Conversion */ +#define SCALER_CSC_COEF(x, y) (0x220 + (y) * 0xc + (x) * 0x4) + +/* Dithering */ +#define SCALER_DITH_CFG 0x250 + +/* Version Number */ +#define SCALER_VER 0x260 /* no shadow */ + +/* Cycle count and Timeout */ +#define SCALER_CYCLE_COUNT 0x278 /* no shadow */ +#define SCALER_TIMEOUT_CTRL 0x2c0 /* no shadow */ +#define SCALER_TIMEOUT_CNT 0x2c4 /* no shadow */ + +/* Blending */ +#define SCALER_SRC_BLEND_COLOR 0x280 +#define SCALER_SRC_BLEND_ALPHA 0x284 +#define SCALER_DST_BLEND_COLOR 0x288 +#define SCALER_DST_BLEND_ALPHA 0x28c + +/* Color Fill */ +#define SCALER_FILL_COLOR 0x290 + +/* Multiple Command Queue */ +#define SCALER_ADDR_Q_CONFIG 0x2a0 /* no shadow */ +#define SCALER_SRC_ADDR_Q_STATUS 0x2a4 /* no shadow */ +#define SCALER_SRC_ADDR_Q 0x2a8 /* no shadow */ + +/* CRC */ +#define SCALER_CRC_COLOR00_10 0x2b0 /* no shadow */ +#define SCALER_CRC_COLOR20_30 0x2b4 /* no shadow */ +#define SCALER_CRC_COLOR01_11 0x2b8 /* no shadow */ +#define SCALER_CRC_COLOR21_31 0x2bc /* no shadow */ + +/* Shadow Registers */ +#define SCALER_SHADOW_OFFSET 0x1000 + + +/* Bit definition part */ +#define SCALER_MASK(hi_b, lo_b) ((1 << ((hi_b) - (lo_b) + 1)) - 1) +#define SCALER_GET(reg, hi_b, lo_b) \ + (((reg) >> (lo_b)) & SCALER_MASK(hi_b, lo_b)) +#define SCALER_SET(val, hi_b, lo_b) \ + (((val) & SCALER_MASK(hi_b, lo_b)) << lo_b) + +/* SCALER_STATUS */ +#define SCALER_STATUS_SCALER_RUNNING (1 << 1) +#define SCALER_STATUS_SCALER_READY_CLK_DOWN (1 << 0) + +/* SCALER_CFG */ +#define SCALER_CFG_FILL_EN (1 << 24) +#define SCALER_CFG_BLEND_COLOR_DIVIDE_ALPHA_EN (1 << 17) +#define SCALER_CFG_BLEND_EN (1 << 16) +#define SCALER_CFG_CSC_Y_OFFSET_SRC_EN (1 << 10) +#define SCALER_CFG_CSC_Y_OFFSET_DST_EN (1 << 9) +#define SCALER_CFG_16_BURST_MODE (1 << 8) +#define SCALER_CFG_SOFT_RESET (1 << 1) +#define SCALER_CFG_START_CMD (1 << 0) + +/* SCALER_INT_EN */ +#define SCALER_INT_EN_TIMEOUT (1 << 31) +#define SCALER_INT_EN_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_EN_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_EN_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_EN_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_EN_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_EN_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_EN_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_EN_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_EN_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_EN_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_EN_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_EN_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_EN_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_EN_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_EN_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_EN_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_EN_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_EN_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_EN_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_EN_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_EN_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_EN_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_EN_FRAME_END (1 << 0) + +/* SCALER_INT_STATUS */ +#define SCALER_INT_STATUS_TIMEOUT (1 << 31) +#define SCALER_INT_STATUS_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_STATUS_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_STATUS_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_STATUS_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_STATUS_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_STATUS_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_STATUS_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_STATUS_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_STATUS_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_STATUS_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_STATUS_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_STATUS_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_STATUS_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_STATUS_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_STATUS_FRAME_END (1 << 0) + +/* SCALER_SRC_CFG */ +#define SCALER_SRC_CFG_TILE_EN (1 << 10) +#define SCALER_SRC_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_SRC_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_SRC_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_SRC_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) +#define SCALER_YUV420_2P_UV 0 +#define SCALER_YUV422_2P_UV 2 +#define SCALER_YUV444_2P_UV 3 +#define SCALER_RGB_565 4 +#define SCALER_ARGB1555 5 +#define SCALER_ARGB8888 6 +#define SCALER_ARGB8888_PRE 7 +#define SCALER_YUV422_1P_YVYU 9 +#define SCALER_YUV422_1P_YUYV 10 +#define SCALER_YUV422_1P_UYVY 11 +#define SCALER_ARGB4444 12 +#define SCALER_L8A8 13 +#define SCALER_RGBA8888 14 +#define SCALER_L8 15 +#define SCALER_YUV420_2P_VU 16 +#define SCALER_YUV422_2P_VU 18 +#define SCALER_YUV444_2P_VU 19 +#define SCALER_YUV420_3P 20 +#define SCALER_YUV422_3P 22 +#define SCALER_YUV444_3P 23 + +/* SCALER_SRC_SPAN */ +#define SCALER_SRC_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_Y_POS */ +#define SCALER_SRC_Y_POS_GET_YH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_Y_POS_SET_YH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_Y_POS_GET_YV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_Y_POS_SET_YV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_SRC_WH */ +#define SCALER_SRC_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_C_POS */ +#define SCALER_SRC_C_POS_GET_CH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_C_POS_SET_CH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_C_POS_GET_CV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_C_POS_SET_CV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_DST_CFG */ +#define SCALER_DST_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_DST_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_DST_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_DST_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) + +/* SCALER_DST_SPAN */ +#define SCALER_DST_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_WH */ +#define SCALER_DST_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_POS */ +#define SCALER_DST_POS_GET_H_POS(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_POS_SET_H_POS(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_POS_GET_V_POS(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_POS_SET_V_POS(v) SCALER_SET(v, 13, 0) + +/* SCALER_H_RATIO */ +#define SCALER_H_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_H_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_V_RATIO */ +#define SCALER_V_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_V_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_ROT_CFG */ +#define SCALER_ROT_CFG_FLIP_X_EN (1 << 3) +#define SCALER_ROT_CFG_FLIP_Y_EN (1 << 2) +#define SCALER_ROT_CFG_GET_ROTMODE(r) SCALER_GET(r, 1, 0) +#define SCALER_ROT_CFG_SET_ROTMODE(v) SCALER_SET(v, 1, 0) +#define SCALER_ROT_MODE_90 1 +#define SCALER_ROT_MODE_180 2 +#define SCALER_ROT_MODE_270 3 + +/* SCALER_HCOEF, SCALER_VCOEF */ +#define SCALER_COEF_SHIFT(i) (16 * (1 - (i) % 2)) +#define SCALER_COEF_GET(r, i) \ + (((r) >> SCALER_COEF_SHIFT(i)) & 0x1ff) +#define SCALER_COEF_SET(v, i) \ + (((v) & 0x1ff) << SCALER_COEF_SHIFT(i)) + +/* SCALER_CSC_COEFxy */ +#define SCALER_CSC_COEF_GET(r) SCALER_GET(r, 11, 0) +#define SCALER_CSC_COEF_SET(v) SCALER_SET(v, 11, 0) + +/* SCALER_DITH_CFG */ +#define SCALER_DITH_CFG_GET_R_TYPE(r) SCALER_GET(r, 8, 6) +#define SCALER_DITH_CFG_SET_R_TYPE(v) SCALER_SET(v, 8, 6) +#define SCALER_DITH_CFG_GET_G_TYPE(r) SCALER_GET(r, 5, 3) +#define SCALER_DITH_CFG_SET_G_TYPE(v) SCALER_SET(v, 5, 3) +#define SCALER_DITH_CFG_GET_B_TYPE(r) SCALER_GET(r, 2, 0) +#define SCALER_DITH_CFG_SET_B_TYPE(v) SCALER_SET(v, 2, 0) + +/* SCALER_TIMEOUT_CTRL */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_VALUE(r) SCALER_GET(r, 31, 16) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(v) SCALER_SET(v, 31, 16) +#define SCALER_TIMEOUT_CTRL_GET_TIMER_DIV(r) SCALER_GET(r, 7, 4) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(v) SCALER_SET(v, 7, 4) +#define SCALER_TIMEOUT_CTRL_TIMER_ENABLE (1 << 0) + +/* SCALER_TIMEOUT_CNT */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_COUNT(r) SCALER_GET(r, 31, 16) + +/* SCALER_SRC_BLEND_COLOR */ +#define SCALER_SRC_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_SRC_BLEND_ALPHA */ +#define SCALER_SRC_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_COLOR */ +#define SCALER_DST_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_DST_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_DST_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_DST_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_DST_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_ALPHA */ +#define SCALER_DST_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_FILL_COLOR */ +#define SCALER_FILL_COLOR_GET_ALPHA(r) SCALER_GET(r, 31, 24) +#define SCALER_FILL_COLOR_SET_ALPHA(v) SCALER_SET(v, 31, 24) +#define SCALER_FILL_COLOR_GET_FILL_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_FILL_COLOR_SET_FILL_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_FILL_COLOR_GET_FILL_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_FILL_COLOR_SET_FILL_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_FILL_COLOR_GET_FILL_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_FILL_COLOR_SET_FILL_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_ADDR_Q_CONFIG */ +#define SCALER_ADDR_Q_CONFIG_RST (1 << 0) + +/* SCALER_SRC_ADDR_Q_STATUS */ +#define SCALER_SRC_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_SRC_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_SRC_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_SRC_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_SRC_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_SRC_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_SRC_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_DST_ADDR_Q_STATUS */ +#define SCALER_DST_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_DST_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_DST_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_DST_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_DST_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_DST_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_DST_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_DST_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_DST_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_CRC_COLOR00_10 */ +#define SCALER_CRC_COLOR00_10_GET_00(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR00_10_GET_10(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR20_30 */ +#define SCALER_CRC_COLOR20_30_GET_20(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR20_30_GET_30(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR01_11 */ +#define SCALER_CRC_COLOR01_11_GET_01(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR01_11_GET_11(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR21_31 */ +#define SCALER_CRC_COLOR21_31_GET_21(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR21_31_GET_31(r) SCALER_GET(r, 15, 0) + +#endif /* EXYNOS_REGS_SCALER_H */ diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c index 3a3bf752e03a..34b85767e4da 100644 --- a/drivers/gpu/drm/gma500/cdv_device.c +++ b/drivers/gpu/drm/gma500/cdv_device.c @@ -485,7 +485,7 @@ void cdv_intel_attach_force_audio_property(struct drm_connector *connector) return; for (i = 0; i < ARRAY_SIZE(force_audio_names); i++) - drm_property_add_enum(prop, i, i-1, force_audio_names[i]); + drm_property_add_enum(prop, i-1, force_audio_names[i]); dev_priv->force_audio_property = prop; } @@ -514,7 +514,7 @@ void cdv_intel_attach_broadcast_rgb_property(struct drm_connector *connector) return; for (i = 0; i < ARRAY_SIZE(broadcast_rgb_names); i++) - drm_property_add_enum(prop, i, i, broadcast_rgb_names[i]); + drm_property_add_enum(prop, i, broadcast_rgb_names[i]); dev_priv->broadcast_rgb_property = prop; } diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c index 8dc2b19f913b..f2ee6aa10afa 100644 --- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c +++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c @@ -2281,7 +2281,7 @@ static bool psb_intel_sdvo_tv_create_property(struct psb_intel_sdvo *psb_intel_s for (i = 0; i < psb_intel_sdvo_connector->format_supported_num; i++) drm_property_add_enum( - psb_intel_sdvo_connector->tv_format, i, + psb_intel_sdvo_connector->tv_format, i, tv_format_names[psb_intel_sdvo_connector->tv_format_supported[i]]); psb_intel_sdvo->tv_format_index = psb_intel_sdvo_connector->tv_format_supported[0]; diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 80efee1ff7f3..9de8b1c51a5c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -26,7 +26,7 @@ config DRM_I915_DEBUG select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y select STACKDEPOT if DRM=y # for DRM_DEBUG_MM - select DRM_DEBUG_MM_SELFTEST + select DRM_DEBUG_SELFTEST select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2a96d082addf..9c449b8d8eab 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2831,10 +2831,10 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id_ioctl, 0), DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7c6164d14bd8..15434cad5430 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -799,7 +799,6 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) port++; } - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); execlists_user_end(execlists); } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 96e213ec202d..25005023c243 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2779,9 +2779,8 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, return false; for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) - drm_property_add_enum( - intel_sdvo_connector->tv_format, i, - i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); + drm_property_add_enum(intel_sdvo_connector->tv_format, i, + tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; drm_object_attach_property(&intel_sdvo_connector->base.base.base, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 1597938d2451..ee23613f9fd4 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -936,22 +936,12 @@ intel_check_sprite_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = state->base.fb; - int crtc_x, crtc_y; - unsigned int crtc_w, crtc_h; - uint32_t src_x, src_y, src_w, src_h; - struct drm_rect *src = &state->base.src; - struct drm_rect *dst = &state->base.dst; - struct drm_rect clip = {}; int max_stride = INTEL_GEN(dev_priv) >= 9 ? 32768 : 16384; - int hscale, vscale; int max_scale, min_scale; bool can_scale; int ret; uint32_t pixel_format = 0; - *src = drm_plane_state_src(&state->base); - *dst = drm_plane_state_dest(&state->base); - if (!fb) { state->base.visible = false; return 0; @@ -990,64 +980,19 @@ intel_check_sprite_plane(struct intel_plane *plane, min_scale = plane->can_scale ? 1 : (1 << 16); } - /* - * FIXME the following code does a bunch of fuzzy adjustments to the - * coordinates and sizes. We probably need some way to decide whether - * more strict checking should be done instead. - */ - drm_rect_rotate(src, fb->width << 16, fb->height << 16, - state->base.rotation); - - hscale = drm_rect_calc_hscale_relaxed(src, dst, min_scale, max_scale); - BUG_ON(hscale < 0); - - vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale); - BUG_ON(vscale < 0); - - if (crtc_state->base.enable) - drm_mode_get_hv_timing(&crtc_state->base.mode, - &clip.x2, &clip.y2); - - state->base.visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale); - - crtc_x = dst->x1; - crtc_y = dst->y1; - crtc_w = drm_rect_width(dst); - crtc_h = drm_rect_height(dst); + ret = drm_atomic_helper_check_plane_state(&state->base, + &crtc_state->base, + min_scale, max_scale, + true, true); + if (ret) + return ret; if (state->base.visible) { - /* check again in case clipping clamped the results */ - hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); - if (hscale < 0) { - DRM_DEBUG_KMS("Horizontal scaling factor out of limits\n"); - drm_rect_debug_print("src: ", src, true); - drm_rect_debug_print("dst: ", dst, false); - - return hscale; - } - - vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); - if (vscale < 0) { - DRM_DEBUG_KMS("Vertical scaling factor out of limits\n"); - drm_rect_debug_print("src: ", src, true); - drm_rect_debug_print("dst: ", dst, false); - - return vscale; - } - - /* Make the source viewport size an exact multiple of the scaling factors. */ - drm_rect_adjust_size(src, - drm_rect_width(dst) * hscale - drm_rect_width(src), - drm_rect_height(dst) * vscale - drm_rect_height(src)); - - drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, - state->base.rotation); - - /* sanity check to make sure the src viewport wasn't enlarged */ - WARN_ON(src->x1 < (int) state->base.src_x || - src->y1 < (int) state->base.src_y || - src->x2 > (int) state->base.src_x + state->base.src_w || - src->y2 > (int) state->base.src_y + state->base.src_h); + struct drm_rect *src = &state->base.src; + struct drm_rect *dst = &state->base.dst; + unsigned int crtc_w = drm_rect_width(dst); + unsigned int crtc_h = drm_rect_height(dst); + uint32_t src_x, src_y, src_w, src_h; /* * Hardware doesn't handle subpixel coordinates. @@ -1060,59 +1005,40 @@ intel_check_sprite_plane(struct intel_plane *plane, src_y = src->y1 >> 16; src_h = drm_rect_height(src) >> 16; + src->x1 = src_x << 16; + src->x2 = (src_x + src_w) << 16; + src->y1 = src_y << 16; + src->y2 = (src_y + src_h) << 16; + if (intel_format_is_yuv(fb->format->format) && - fb->format->format != DRM_FORMAT_NV12) { - src_x &= ~1; - src_w &= ~1; - - /* - * Must keep src and dst the - * same if we can't scale. - */ - if (!can_scale) - crtc_w &= ~1; - - if (crtc_w == 0) - state->base.visible = false; + fb->format->format != DRM_FORMAT_NV12 && + (src_x % 2 || src_w % 2)) { + DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n", + src_x, src_w); + return -EINVAL; } - } - /* Check size restrictions when scaling */ - if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) { - unsigned int width_bytes; - int cpp = fb->format->cpp[0]; + /* Check size restrictions when scaling */ + if (src_w != crtc_w || src_h != crtc_h) { + unsigned int width_bytes; + int cpp = fb->format->cpp[0]; - WARN_ON(!can_scale); + WARN_ON(!can_scale); - /* FIXME interlacing min height is 6 */ + width_bytes = ((src_x * cpp) & 63) + src_w * cpp; - if (crtc_w < 3 || crtc_h < 3) - state->base.visible = false; - - if (src_w < 3 || src_h < 3) - state->base.visible = false; - - width_bytes = ((src_x * cpp) & 63) + src_w * cpp; - - if (INTEL_GEN(dev_priv) < 9 && (src_w > 2048 || src_h > 2048 || - width_bytes > 4096 || fb->pitches[0] > 4096)) { - DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n"); - return -EINVAL; + /* FIXME interlacing min height is 6 */ + if (INTEL_GEN(dev_priv) < 9 && ( + src_w < 3 || src_h < 3 || + src_w > 2048 || src_h > 2048 || + crtc_w < 3 || crtc_h < 3 || + width_bytes > 4096 || fb->pitches[0] > 4096)) { + DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n"); + return -EINVAL; + } } } - if (state->base.visible) { - src->x1 = src_x << 16; - src->x2 = (src_x + src_w) << 16; - src->y1 = src_y << 16; - src->y2 = (src_y + src_h) << 16; - } - - dst->x1 = crtc_x; - dst->x2 = crtc_x + crtc_w; - dst->y1 = crtc_y; - dst->y2 = crtc_y + crtc_h; - if (INTEL_GEN(dev_priv) >= 9) { ret = skl_check_plane_surface(crtc_state, state); if (ret) diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 294de4549922..119ec0a21de2 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -11,6 +11,7 @@ config DRM_MEDIATEK select DRM_PANEL select MEMORY select MTK_SMI + select VIDEOMODE_HELPERS help Choose this option if you have a Mediatek SoCs. The module will be called mediatek-drm diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index e80a603e5fb0..6c0ea39d5739 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <linux/clk.h> +#include <video/videomode.h> #include "mtk_dpi_regs.h" #include "mtk_drm_ddp_comp.h" @@ -429,34 +430,35 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, struct mtk_dpi_sync_param vsync_leven = { 0 }; struct mtk_dpi_sync_param vsync_rodd = { 0 }; struct mtk_dpi_sync_param vsync_reven = { 0 }; - unsigned long pix_rate; + struct videomode vm = { 0 }; unsigned long pll_rate; unsigned int factor; /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ - pix_rate = 1000UL * mode->clock; + if (mode->clock <= 27000) - factor = 16 * 3; + factor = 3 << 4; else if (mode->clock <= 84000) - factor = 8 * 3; + factor = 3 << 3; else if (mode->clock <= 167000) - factor = 4 * 3; + factor = 3 << 2; else - factor = 2 * 3; - pll_rate = pix_rate * factor; + factor = 3 << 1; + drm_display_mode_to_videomode(mode, &vm); + pll_rate = vm.pixelclock * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); clk_set_rate(dpi->tvd_clk, pll_rate); pll_rate = clk_get_rate(dpi->tvd_clk); - pix_rate = pll_rate / factor; - clk_set_rate(dpi->pixel_clk, pix_rate); - pix_rate = clk_get_rate(dpi->pixel_clk); + vm.pixelclock = pll_rate / factor; + clk_set_rate(dpi->pixel_clk, vm.pixelclock); + vm.pixelclock = clk_get_rate(dpi->pixel_clk); dev_dbg(dpi->dev, "Got PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); limit.c_bottom = 0x0010; limit.c_top = 0x0FE0; @@ -465,33 +467,31 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING; dpi_pol.de_pol = MTK_DPI_POLARITY_RISING; - dpi_pol.hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ? + dpi_pol.hsync_pol = vm.flags & DISPLAY_FLAGS_HSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - dpi_pol.vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ? + dpi_pol.vsync_pol = vm.flags & DISPLAY_FLAGS_VSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - - hsync.sync_width = mode->hsync_end - mode->hsync_start; - hsync.back_porch = mode->htotal - mode->hsync_end; - hsync.front_porch = mode->hsync_start - mode->hdisplay; + hsync.sync_width = vm.hsync_len; + hsync.back_porch = vm.hback_porch; + hsync.front_porch = vm.hfront_porch; hsync.shift_half_line = false; - - vsync_lodd.sync_width = mode->vsync_end - mode->vsync_start; - vsync_lodd.back_porch = mode->vtotal - mode->vsync_end; - vsync_lodd.front_porch = mode->vsync_start - mode->vdisplay; + vsync_lodd.sync_width = vm.vsync_len; + vsync_lodd.back_porch = vm.vback_porch; + vsync_lodd.front_porch = vm.vfront_porch; vsync_lodd.shift_half_line = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE && + if (vm.flags & DISPLAY_FLAGS_INTERLACED && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_leven = vsync_lodd; vsync_rodd = vsync_lodd; vsync_reven = vsync_lodd; vsync_leven.shift_half_line = true; vsync_reven.shift_half_line = true; - } else if (mode->flags & DRM_MODE_FLAG_INTERLACE && + } else if (vm.flags & DISPLAY_FLAGS_INTERLACED && !(mode->flags & DRM_MODE_FLAG_3D_MASK)) { vsync_leven = vsync_lodd; vsync_leven.shift_half_line = true; - } else if (!(mode->flags & DRM_MODE_FLAG_INTERLACE) && + } else if (!(vm.flags & DISPLAY_FLAGS_INTERLACED) && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_rodd = vsync_lodd; } @@ -505,12 +505,12 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, mtk_dpi_config_vsync_reven(dpi, &vsync_reven); mtk_dpi_config_3d(dpi, !!(mode->flags & DRM_MODE_FLAG_3D_MASK)); - mtk_dpi_config_interface(dpi, !!(mode->flags & - DRM_MODE_FLAG_INTERLACE)); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay / 2); + mtk_dpi_config_interface(dpi, !!(vm.flags & + DISPLAY_FLAGS_INTERLACED)); + if (vm.flags & DISPLAY_FLAGS_INTERLACED) + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive >> 1); else - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay); + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive); mtk_dpi_config_channel_limit(dpi, &limit); mtk_dpi_config_bit_num(dpi, dpi->bit_num); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index f595ac816b55..259b7b0de1d2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -220,7 +220,7 @@ struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev, mtk_gem = mtk_drm_gem_init(dev, attach->dmabuf->size); if (IS_ERR(mtk_gem)) - return ERR_PTR(PTR_ERR(mtk_gem)); + return ERR_CAST(mtk_gem); expected = sg_dma_address(sg->sgl); for_each_sg(sg->sgl, s, sg->nents, i) { diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 7e5e24c2152a..aa0943ec32b0 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -551,13 +551,12 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) } /** - * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000 * htotal_time = htotal * byte_per_pixel / num_lanes * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit * mipi_ratio = (htotal_time + overhead_time) / htotal_time * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes; */ - pixel_clock = dsi->vm.pixelclock * 1000; + pixel_clock = dsi->vm.pixelclock; htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch + dsi->vm.hsync_len; htotal_bits = htotal * bit_per_pixel; @@ -725,16 +724,7 @@ static void mtk_dsi_encoder_mode_set(struct drm_encoder *encoder, { struct mtk_dsi *dsi = encoder_to_dsi(encoder); - dsi->vm.pixelclock = adjusted->clock; - dsi->vm.hactive = adjusted->hdisplay; - dsi->vm.hback_porch = adjusted->htotal - adjusted->hsync_end; - dsi->vm.hfront_porch = adjusted->hsync_start - adjusted->hdisplay; - dsi->vm.hsync_len = adjusted->hsync_end - adjusted->hsync_start; - - dsi->vm.vactive = adjusted->vdisplay; - dsi->vm.vback_porch = adjusted->vtotal - adjusted->vsync_end; - dsi->vm.vfront_porch = adjusted->vsync_start - adjusted->vdisplay; - dsi->vm.vsync_len = adjusted->vsync_end - adjusted->vsync_start; + drm_display_mode_to_videomode(adjusted, &dsi->vm); } static void mtk_dsi_encoder_disable(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 8e0cb161754b..0ae5ace65462 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -168,7 +168,6 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) if (gpu->funcs->debugfs_init) { gpu->funcs->debugfs_init(gpu, dev->primary); gpu->funcs->debugfs_init(gpu, dev->render); - gpu->funcs->debugfs_init(gpu, dev->control); } #endif diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index ba74cb4f94df..1ff3fda245d1 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -140,9 +140,6 @@ int msm_debugfs_late_init(struct drm_device *dev) if (ret) return ret; ret = late_init_minor(dev->render); - if (ret) - return ret; - ret = late_init_minor(dev->control); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 009713404cc4..7d0bec8dd03d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -338,11 +338,9 @@ static struct nouveau_drm_prop_enum_list dither_depth[] = { if (c) { \ p = drm_property_create(dev, DRM_MODE_PROP_ENUM, n, c); \ l = (list); \ - c = 0; \ while (p && l->gen_mask) { \ if (l->gen_mask & (1 << (gen))) { \ - drm_property_add_enum(p, c, l->type, l->name); \ - c++; \ + drm_property_add_enum(p, l->type, l->name); \ } \ l++; \ } \ diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index d964d454e4ae..2c9c9722734f 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -238,12 +238,6 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts, static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) { -#if 0 - /* The firmware uses LP DSI transactions like this to bring up - * the hardware, which should be faster than using I2C to then - * pass to the Toshiba. However, I was unable to get it to - * work. - */ u8 msg[] = { reg, reg >> 8, @@ -253,13 +247,7 @@ static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) val >> 24, }; - mipi_dsi_dcs_write_buffer(ts->dsi, msg, sizeof(msg)); -#else - rpi_touchscreen_i2c_write(ts, REG_WR_ADDRH, reg >> 8); - rpi_touchscreen_i2c_write(ts, REG_WR_ADDRL, reg); - rpi_touchscreen_i2c_write(ts, REG_WRITEH, val >> 8); - rpi_touchscreen_i2c_write(ts, REG_WRITEL, val); -#endif + mipi_dsi_generic_write(ts->dsi, msg, sizeof(msg)); return 0; } diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile index 9c5e8dba8ac6..19a8189dc54f 100644 --- a/drivers/gpu/drm/pl111/Makefile +++ b/drivers/gpu/drm/pl111/Makefile @@ -3,6 +3,7 @@ pl111_drm-y += pl111_display.o \ pl111_versatile.o \ pl111_drv.o +pl111_drm-$(CONFIG_ARCH_VEXPRESS) += pl111_vexpress.o pl111_drm-$(CONFIG_DEBUG_FS) += pl111_debugfs.o obj-$(CONFIG_DRM_PL111) += pl111_drm.o diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h index 8639b2d4ddf7..ce4501d0ab48 100644 --- a/drivers/gpu/drm/pl111/pl111_drm.h +++ b/drivers/gpu/drm/pl111/pl111_drm.h @@ -79,6 +79,7 @@ struct pl111_drm_dev_private { const struct pl111_variant_data *variant; void (*variant_display_enable) (struct drm_device *drm, u32 format); void (*variant_display_disable) (struct drm_device *drm); + bool use_device_memory; }; int pl111_display_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index 4621259d5387..454ff0804642 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -60,6 +60,7 @@ #include <linux/slab.h> #include <linux/of.h> #include <linux/of_graph.h> +#include <linux/of_reserved_mem.h> #include <drm/drmP.h> #include <drm/drm_atomic_helper.h> @@ -207,6 +208,24 @@ finish: return ret; } +static struct drm_gem_object * +pl111_gem_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct pl111_drm_dev_private *priv = dev->dev_private; + + /* + * When using device-specific reserved memory we can't import + * DMA buffers: those are passed by reference in any global + * memory and we can only handle a specific range of memory. + */ + if (priv->use_device_memory) + return ERR_PTR(-EINVAL); + + return drm_gem_cma_prime_import_sg_table(dev, attach, sgt); +} + DEFINE_DRM_GEM_CMA_FOPS(drm_fops); static struct drm_driver pl111_drm_driver = { @@ -227,7 +246,7 @@ static struct drm_driver pl111_drm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = drm_gem_prime_import, - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_import_sg_table = pl111_gem_import_sg_table, .gem_prime_export = drm_gem_prime_export, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, @@ -257,6 +276,12 @@ static int pl111_amba_probe(struct amba_device *amba_dev, drm->dev_private = priv; priv->variant = variant; + ret = of_reserved_mem_device_init(dev); + if (!ret) { + dev_info(dev, "using device-specific reserved memory\n"); + priv->use_device_memory = true; + } + if (of_property_read_u32(dev->of_node, "max-memory-bandwidth", &priv->memory_bw)) { dev_info(dev, "no max memory bandwidth specified, assume unlimited\n"); @@ -275,7 +300,8 @@ static int pl111_amba_probe(struct amba_device *amba_dev, priv->regs = devm_ioremap_resource(dev, &amba_dev->res); if (IS_ERR(priv->regs)) { dev_err(dev, "%s failed mmio\n", __func__); - return PTR_ERR(priv->regs); + ret = PTR_ERR(priv->regs); + goto dev_unref; } /* This may override some variant settings */ @@ -305,11 +331,14 @@ static int pl111_amba_probe(struct amba_device *amba_dev, dev_unref: drm_dev_unref(drm); + of_reserved_mem_device_release(dev); + return ret; } static int pl111_amba_remove(struct amba_device *amba_dev) { + struct device *dev = &amba_dev->dev; struct drm_device *drm = amba_get_drvdata(amba_dev); struct pl111_drm_dev_private *priv = drm->dev_private; @@ -319,6 +348,7 @@ static int pl111_amba_remove(struct amba_device *amba_dev) drm_panel_bridge_remove(priv->bridge); drm_mode_config_cleanup(drm); drm_dev_unref(drm); + of_reserved_mem_device_release(dev); return 0; } diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c index 9302f516045e..b9baefdba38a 100644 --- a/drivers/gpu/drm/pl111/pl111_versatile.c +++ b/drivers/gpu/drm/pl111/pl111_versatile.c @@ -1,12 +1,14 @@ #include <linux/amba/clcd-regs.h> #include <linux/device.h> #include <linux/of.h> +#include <linux/of_platform.h> #include <linux/regmap.h> #include <linux/mfd/syscon.h> #include <linux/bitops.h> #include <linux/module.h> #include <drm/drmP.h> #include "pl111_versatile.h" +#include "pl111_vexpress.h" #include "pl111_drm.h" static struct regmap *versatile_syscon_map; @@ -22,6 +24,7 @@ enum versatile_clcd { REALVIEW_CLCD_PB11MP, REALVIEW_CLCD_PBA8, REALVIEW_CLCD_PBX, + VEXPRESS_CLCD_V2M, }; static const struct of_device_id versatile_clcd_of_match[] = { @@ -53,6 +56,10 @@ static const struct of_device_id versatile_clcd_of_match[] = { .compatible = "arm,realview-pbx-syscon", .data = (void *)REALVIEW_CLCD_PBX, }, + { + .compatible = "arm,vexpress-muxfpga", + .data = (void *)VEXPRESS_CLCD_V2M, + }, {}, }; @@ -286,12 +293,26 @@ static const struct pl111_variant_data pl111_realview = { .fb_bpp = 16, }; +/* + * Versatile Express PL111 variant, again we just push the maximum + * BPP to 16 to be able to get 1024x768 without saturating the memory + * bus. The clockdivider also seems broken on the Versatile Express. + */ +static const struct pl111_variant_data pl111_vexpress = { + .name = "PL111 Versatile Express", + .formats = pl111_realview_pixel_formats, + .nformats = ARRAY_SIZE(pl111_realview_pixel_formats), + .fb_bpp = 16, + .broken_clockdivider = true, +}; + int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) { const struct of_device_id *clcd_id; enum versatile_clcd versatile_clcd_type; struct device_node *np; struct regmap *map; + int ret; np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, &clcd_id); @@ -301,7 +322,33 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) } versatile_clcd_type = (enum versatile_clcd)clcd_id->data; - map = syscon_node_to_regmap(np); + /* Versatile Express special handling */ + if (versatile_clcd_type == VEXPRESS_CLCD_V2M) { + struct platform_device *pdev; + + /* Registers a driver for the muxfpga */ + ret = vexpress_muxfpga_init(); + if (ret) { + dev_err(dev, "unable to initialize muxfpga driver\n"); + return ret; + } + + /* Call into deep Vexpress configuration API */ + pdev = of_find_device_by_node(np); + if (!pdev) { + dev_err(dev, "can't find the sysreg device, deferring\n"); + return -EPROBE_DEFER; + } + map = dev_get_drvdata(&pdev->dev); + if (!map) { + dev_err(dev, "sysreg has not yet probed\n"); + platform_device_put(pdev); + return -EPROBE_DEFER; + } + } else { + map = syscon_node_to_regmap(np); + } + if (IS_ERR(map)) { dev_err(dev, "no Versatile syscon regmap\n"); return PTR_ERR(map); @@ -340,6 +387,13 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) priv->variant_display_disable = pl111_realview_clcd_disable; dev_info(dev, "set up callbacks for RealView PL111\n"); break; + case VEXPRESS_CLCD_V2M: + priv->variant = &pl111_vexpress; + dev_info(dev, "initializing Versatile Express PL111\n"); + ret = pl111_vexpress_clcd_init(dev, priv, map); + if (ret) + return ret; + break; default: dev_info(dev, "unknown Versatile system controller\n"); break; diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.c b/drivers/gpu/drm/pl111/pl111_vexpress.c new file mode 100644 index 000000000000..a534b225e31b --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_vexpress.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Versatile Express PL111 handling + * Copyright (C) 2018 Linus Walleij + * + * This module binds to the "arm,vexpress-muxfpga" device on the + * Versatile Express configuration bus and sets up which CLCD instance + * gets muxed out on the DVI bridge. + */ +#include <linux/device.h> +#include <linux/module.h> +#include <linux/regmap.h> +#include <linux/vexpress.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include "pl111_drm.h" +#include "pl111_vexpress.h" + +#define VEXPRESS_FPGAMUX_MOTHERBOARD 0x00 +#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1 0x01 +#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2 0x02 + +int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map) +{ + struct device_node *root; + struct device_node *child; + struct device_node *ct_clcd = NULL; + bool has_coretile_clcd = false; + bool has_coretile_hdlcd = false; + bool mux_motherboard = true; + u32 val; + int ret; + + /* + * Check if we have a CLCD or HDLCD on the core tile by checking if a + * CLCD or HDLCD is available in the root of the device tree. + */ + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + for_each_available_child_of_node(root, child) { + if (of_device_is_compatible(child, "arm,pl111")) { + has_coretile_clcd = true; + ct_clcd = child; + break; + } + if (of_device_is_compatible(child, "arm,hdlcd")) { + has_coretile_hdlcd = true; + break; + } + } + + /* + * If there is a coretile HDLCD and it has a driver, + * do not mux the CLCD on the motherboard to the DVI. + */ + if (has_coretile_hdlcd && IS_ENABLED(CONFIG_DRM_HDLCD)) + mux_motherboard = false; + + /* + * On the Vexpress CA9 we let the CLCD on the coretile + * take precedence, so also in this case do not mux the + * motherboard to the DVI. + */ + if (has_coretile_clcd) + mux_motherboard = false; + + if (mux_motherboard) { + dev_info(dev, "DVI muxed to motherboard CLCD\n"); + val = VEXPRESS_FPGAMUX_MOTHERBOARD; + } else if (ct_clcd == dev->of_node) { + dev_info(dev, + "DVI muxed to daughterboard 1 (core tile) CLCD\n"); + val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1; + } else { + dev_info(dev, "core tile graphics present\n"); + dev_info(dev, "this device will be deactivated\n"); + return -ENODEV; + } + + ret = regmap_write(map, 0, val); + if (ret) { + dev_err(dev, "error setting DVI muxmode\n"); + return -ENODEV; + } + + return 0; +} + +/* + * This sets up the regmap pointer that will then be retrieved by + * the detection code in pl111_versatile.c and passed in to the + * pl111_vexpress_clcd_init() function above. + */ +static int vexpress_muxfpga_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct regmap *map; + + map = devm_regmap_init_vexpress_config(&pdev->dev); + if (IS_ERR(map)) + return PTR_ERR(map); + dev_set_drvdata(dev, map); + + return 0; +} + +static const struct of_device_id vexpress_muxfpga_match[] = { + { .compatible = "arm,vexpress-muxfpga", } +}; + +static struct platform_driver vexpress_muxfpga_driver = { + .driver = { + .name = "vexpress-muxfpga", + .of_match_table = of_match_ptr(vexpress_muxfpga_match), + }, + .probe = vexpress_muxfpga_probe, +}; + +int vexpress_muxfpga_init(void) +{ + int ret; + + ret = platform_driver_register(&vexpress_muxfpga_driver); + /* -EBUSY just means this driver is already registered */ + if (ret == -EBUSY) + ret = 0; + return ret; +} diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.h b/drivers/gpu/drm/pl111/pl111_vexpress.h new file mode 100644 index 000000000000..5d3681bb4c00 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_vexpress.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +struct device; +struct pl111_drm_dev_private; +struct regmap; + +#ifdef CONFIG_ARCH_VEXPRESS + +int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map); + +int vexpress_muxfpga_init(void); + +#else + +static inline int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map) +{ + return -ENODEV; +} + +static inline int vexpress_muxfpga_init(void) +{ + return 0; +} + +#endif diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index 01665b98c57e..208af9f37914 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -339,12 +339,9 @@ int qxl_io_update_area(struct qxl_device *qdev, struct qxl_bo *surf, surface_height = surf->surf.height; if (area->left < 0 || area->top < 0 || - area->right > surface_width || area->bottom > surface_height) { - qxl_io_log(qdev, "%s: not doing area update for " - "%d, (%d,%d,%d,%d) (%d,%d)\n", __func__, surface_id, area->left, - area->top, area->right, area->bottom, surface_width, surface_height); + area->right > surface_width || area->bottom > surface_height) return -EINVAL; - } + mutex_lock(&qdev->update_area_mutex); qdev->ram_header->update_area = *area; qdev->ram_header->update_surface = surface_id; @@ -372,6 +369,7 @@ void qxl_io_flush_surfaces(struct qxl_device *qdev) void qxl_io_destroy_primary(struct qxl_device *qdev) { wait_for_io_cmd(qdev, 0, QXL_IO_DESTROY_PRIMARY_ASYNC); + qdev->primary_created = false; } void qxl_io_create_primary(struct qxl_device *qdev, @@ -397,6 +395,7 @@ void qxl_io_create_primary(struct qxl_device *qdev, create->type = QXL_SURF_TYPE_PRIMARY; wait_for_io_cmd(qdev, 0, QXL_IO_CREATE_PRIMARY_ASYNC); + qdev->primary_created = true; } void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) @@ -405,20 +404,6 @@ void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC); } -void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vsnprintf(qdev->ram_header->log_buf, QXL_LOG_BUF_SIZE, fmt, args); - va_end(args); - /* - * DO not do a DRM output here - this will call printk, which will - * call back into qxl for rendering (qxl_fb) - */ - outb(0, qdev->io_base + QXL_IO_LOG); -} - void qxl_io_reset(struct qxl_device *qdev) { outb(0, qdev->io_base + QXL_IO_RESET); @@ -426,19 +411,6 @@ void qxl_io_reset(struct qxl_device *qdev) void qxl_io_monitors_config(struct qxl_device *qdev) { - qxl_io_log(qdev, "%s: %d [%dx%d+%d+%d]\n", __func__, - qdev->monitors_config ? - qdev->monitors_config->count : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].width : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].height : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].x : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].y : -1 - ); - wait_for_io_cmd(qdev, 0, QXL_IO_MONITORS_CONFIG_ASYNC); } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 820cbca3bf6e..b8cda9449241 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -48,12 +48,8 @@ static void qxl_alloc_client_monitors_config(struct qxl_device *qdev, unsigned c qdev->client_monitors_config = kzalloc( sizeof(struct qxl_monitors_config) + sizeof(struct qxl_head) * count, GFP_KERNEL); - if (!qdev->client_monitors_config) { - qxl_io_log(qdev, - "%s: allocation failure for %u heads\n", - __func__, count); + if (!qdev->client_monitors_config) return; - } } qdev->client_monitors_config->count = count; } @@ -74,12 +70,8 @@ static int qxl_display_copy_rom_client_monitors_config(struct qxl_device *qdev) num_monitors = qdev->rom->client_monitors_config.count; crc = crc32(0, (const uint8_t *)&qdev->rom->client_monitors_config, sizeof(qdev->rom->client_monitors_config)); - if (crc != qdev->rom->client_monitors_config_crc) { - qxl_io_log(qdev, "crc mismatch: have %X (%zd) != %X\n", crc, - sizeof(qdev->rom->client_monitors_config), - qdev->rom->client_monitors_config_crc); + if (crc != qdev->rom->client_monitors_config_crc) return MONITORS_CONFIG_BAD_CRC; - } if (!num_monitors) { DRM_DEBUG_KMS("no client monitors configured\n"); return status; @@ -170,12 +162,10 @@ void qxl_display_read_client_monitors_config(struct qxl_device *qdev) udelay(5); } if (status == MONITORS_CONFIG_BAD_CRC) { - qxl_io_log(qdev, "config: bad crc\n"); DRM_DEBUG_KMS("ignoring client monitors config: bad crc"); return; } if (status == MONITORS_CONFIG_UNCHANGED) { - qxl_io_log(qdev, "config: unchanged\n"); DRM_DEBUG_KMS("ignoring client monitors config: unchanged"); return; } @@ -268,6 +258,89 @@ static int qxl_add_common_modes(struct drm_connector *connector, return i - 1; } +static void qxl_send_monitors_config(struct qxl_device *qdev) +{ + int i; + + BUG_ON(!qdev->ram_header->monitors_config); + + if (qdev->monitors_config->count == 0) + return; + + for (i = 0 ; i < qdev->monitors_config->count ; ++i) { + struct qxl_head *head = &qdev->monitors_config->heads[i]; + + if (head->y > 8192 || head->x > 8192 || + head->width > 8192 || head->height > 8192) { + DRM_ERROR("head %d wrong: %dx%d+%d+%d\n", + i, head->width, head->height, + head->x, head->y); + return; + } + } + qxl_io_monitors_config(qdev); +} + +static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc, + const char *reason) +{ + struct drm_device *dev = crtc->dev; + struct qxl_device *qdev = dev->dev_private; + struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); + struct qxl_head head; + int oldcount, i = qcrtc->index; + + if (!qdev->primary_created) { + DRM_DEBUG_KMS("no primary surface, skip (%s)\n", reason); + return; + } + + if (!qdev->monitors_config || + qdev->monitors_config->max_allowed <= i) + return; + + head.id = i; + head.flags = 0; + oldcount = qdev->monitors_config->count; + if (crtc->state->active) { + struct drm_display_mode *mode = &crtc->mode; + head.width = mode->hdisplay; + head.height = mode->vdisplay; + head.x = crtc->x; + head.y = crtc->y; + if (qdev->monitors_config->count < i + 1) + qdev->monitors_config->count = i + 1; + } else if (i > 0) { + head.width = 0; + head.height = 0; + head.x = 0; + head.y = 0; + if (qdev->monitors_config->count == i + 1) + qdev->monitors_config->count = i; + } else { + DRM_DEBUG_KMS("inactive head 0, skip (%s)\n", reason); + return; + } + + if (head.width == qdev->monitors_config->heads[i].width && + head.height == qdev->monitors_config->heads[i].height && + head.x == qdev->monitors_config->heads[i].x && + head.y == qdev->monitors_config->heads[i].y && + oldcount == qdev->monitors_config->count) + return; + + DRM_DEBUG_KMS("head %d, %dx%d, at +%d+%d, %s (%s)\n", + i, head.width, head.height, head.x, head.y, + crtc->state->active ? "on" : "off", reason); + if (oldcount != qdev->monitors_config->count) + DRM_DEBUG_KMS("active heads %d -> %d (%d total)\n", + oldcount, qdev->monitors_config->count, + qdev->monitors_config->max_allowed); + + qdev->monitors_config->heads[i] = head; + qxl_send_monitors_config(qdev); +} + static void qxl_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { @@ -283,6 +356,8 @@ static void qxl_crtc_atomic_flush(struct drm_crtc *crtc, drm_crtc_send_vblank_event(crtc, event); spin_unlock_irqrestore(&dev->event_lock, flags); } + + qxl_crtc_update_monitors_config(crtc, "flush"); } static void qxl_crtc_destroy(struct drm_crtc *crtc) @@ -381,95 +456,19 @@ qxl_framebuffer_init(struct drm_device *dev, return 0; } -static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct drm_device *dev = crtc->dev; - struct qxl_device *qdev = dev->dev_private; - - qxl_io_log(qdev, "%s: (%d,%d) => (%d,%d)\n", - __func__, - mode->hdisplay, mode->vdisplay, - adjusted_mode->hdisplay, - adjusted_mode->vdisplay); - return true; -} - -static void -qxl_send_monitors_config(struct qxl_device *qdev) -{ - int i; - - BUG_ON(!qdev->ram_header->monitors_config); - - if (qdev->monitors_config->count == 0) { - qxl_io_log(qdev, "%s: 0 monitors??\n", __func__); - return; - } - for (i = 0 ; i < qdev->monitors_config->count ; ++i) { - struct qxl_head *head = &qdev->monitors_config->heads[i]; - - if (head->y > 8192 || head->x > 8192 || - head->width > 8192 || head->height > 8192) { - DRM_ERROR("head %d wrong: %dx%d+%d+%d\n", - i, head->width, head->height, - head->x, head->y); - return; - } - } - qxl_io_monitors_config(qdev); -} - -static void qxl_monitors_config_set(struct qxl_device *qdev, - int index, - unsigned x, unsigned y, - unsigned width, unsigned height, - unsigned surf_id) -{ - DRM_DEBUG_KMS("%d:%dx%d+%d+%d\n", index, width, height, x, y); - qdev->monitors_config->heads[index].x = x; - qdev->monitors_config->heads[index].y = y; - qdev->monitors_config->heads[index].width = width; - qdev->monitors_config->heads[index].height = height; - qdev->monitors_config->heads[index].surface_id = surf_id; - -} - -static void qxl_mode_set_nofb(struct drm_crtc *crtc) -{ - struct qxl_device *qdev = crtc->dev->dev_private; - struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); - struct drm_display_mode *mode = &crtc->mode; - - DRM_DEBUG("Mode set (%d,%d)\n", - mode->hdisplay, mode->vdisplay); - - qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, - mode->hdisplay, mode->vdisplay, 0); - -} - static void qxl_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { - DRM_DEBUG("\n"); + qxl_crtc_update_monitors_config(crtc, "enable"); } static void qxl_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { - struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); - struct qxl_device *qdev = crtc->dev->dev_private; - - qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, 0, 0, 0); - - qxl_send_monitors_config(qdev); + qxl_crtc_update_monitors_config(crtc, "disable"); } static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = { - .mode_fixup = qxl_crtc_mode_fixup, - .mode_set_nofb = qxl_mode_set_nofb, .atomic_flush = qxl_crtc_atomic_flush, .atomic_enable = qxl_crtc_atomic_enable, .atomic_disable = qxl_crtc_atomic_disable, @@ -613,12 +612,6 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane, } } -static int qxl_plane_atomic_check(struct drm_plane *plane, - struct drm_plane_state *state) -{ - return 0; -} - static void qxl_cursor_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { @@ -824,7 +817,6 @@ static const uint32_t qxl_cursor_plane_formats[] = { }; static const struct drm_plane_helper_funcs qxl_cursor_helper_funcs = { - .atomic_check = qxl_plane_atomic_check, .atomic_update = qxl_cursor_atomic_update, .atomic_disable = qxl_cursor_atomic_disable, .prepare_fb = qxl_plane_prepare_fb, @@ -949,81 +941,6 @@ free_mem: return r; } -static void qxl_enc_dpms(struct drm_encoder *encoder, int mode) -{ - DRM_DEBUG("\n"); -} - -static void qxl_enc_prepare(struct drm_encoder *encoder) -{ - DRM_DEBUG("\n"); -} - -static void qxl_write_monitors_config_for_encoder(struct qxl_device *qdev, - struct drm_encoder *encoder) -{ - int i; - struct qxl_output *output = drm_encoder_to_qxl_output(encoder); - struct qxl_head *head; - struct drm_display_mode *mode; - - BUG_ON(!encoder); - /* TODO: ugly, do better */ - i = output->index; - if (!qdev->monitors_config || - qdev->monitors_config->max_allowed <= i) { - DRM_ERROR( - "head number too large or missing monitors config: %p, %d", - qdev->monitors_config, - qdev->monitors_config ? - qdev->monitors_config->max_allowed : -1); - return; - } - if (!encoder->crtc) { - DRM_ERROR("missing crtc on encoder %p\n", encoder); - return; - } - if (i != 0) - DRM_DEBUG("missing for multiple monitors: no head holes\n"); - head = &qdev->monitors_config->heads[i]; - head->id = i; - if (encoder->crtc->enabled) { - mode = &encoder->crtc->mode; - head->width = mode->hdisplay; - head->height = mode->vdisplay; - head->x = encoder->crtc->x; - head->y = encoder->crtc->y; - if (qdev->monitors_config->count < i + 1) - qdev->monitors_config->count = i + 1; - } else { - head->width = 0; - head->height = 0; - head->x = 0; - head->y = 0; - } - DRM_DEBUG_KMS("setting head %d to +%d+%d %dx%d out of %d\n", - i, head->x, head->y, head->width, head->height, qdev->monitors_config->count); - head->flags = 0; - /* TODO - somewhere else to call this for multiple monitors - * (config_commit?) */ - qxl_send_monitors_config(qdev); -} - -static void qxl_enc_commit(struct drm_encoder *encoder) -{ - struct qxl_device *qdev = encoder->dev->dev_private; - - qxl_write_monitors_config_for_encoder(qdev, encoder); - DRM_DEBUG("\n"); -} - -static void qxl_enc_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - DRM_DEBUG("\n"); -} - static int qxl_conn_get_modes(struct drm_connector *connector) { unsigned pwidth = 1024; @@ -1069,10 +986,6 @@ static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector) static const struct drm_encoder_helper_funcs qxl_enc_helper_funcs = { - .dpms = qxl_enc_dpms, - .prepare = qxl_enc_prepare, - .mode_set = qxl_enc_mode_set, - .commit = qxl_enc_commit, }; static const struct drm_connector_helper_funcs qxl_connector_helper_funcs = { @@ -1100,21 +1013,11 @@ static enum drm_connector_status qxl_conn_detect( qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]); DRM_DEBUG("#%d connected: %d\n", output->index, connected); - if (!connected) - qxl_monitors_config_set(qdev, output->index, 0, 0, 0, 0, 0); return connected ? connector_status_connected : connector_status_disconnected; } -static int qxl_conn_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t value) -{ - DRM_DEBUG("\n"); - return 0; -} - static void qxl_conn_destroy(struct drm_connector *connector) { struct qxl_output *qxl_output = @@ -1129,7 +1032,6 @@ static const struct drm_connector_funcs qxl_connector_funcs = { .dpms = drm_helper_connector_dpms, .detect = qxl_conn_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = qxl_conn_set_property, .destroy = qxl_conn_destroy, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 864b456080c4..01220d386b0a 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -299,9 +299,6 @@ struct qxl_device { int monitors_config_height; }; -/* forward declaration for QXL_INFO_IO */ -__printf(2,3) void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...); - extern const struct drm_ioctl_desc qxl_ioctls[]; extern int qxl_max_ioctl; diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 338891401f35..9a6752606079 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -185,8 +185,6 @@ static int qxlfb_framebuffer_dirty(struct drm_framebuffer *fb, /* * we are using a shadow draw buffer, at qdev->surface0_shadow */ - qxl_io_log(qdev, "dirty x[%d, %d], y[%d, %d]\n", clips->x1, clips->x2, - clips->y1, clips->y2); image->dx = clips->x1; image->dy = clips->y1; image->width = clips->x2 - clips->x1; diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c index 23a40106ab53..3bb31add6350 100644 --- a/drivers/gpu/drm/qxl/qxl_irq.c +++ b/drivers/gpu/drm/qxl/qxl_irq.c @@ -57,10 +57,9 @@ irqreturn_t qxl_irq_handler(int irq, void *arg) * to avoid endless loops). */ qdev->irq_received_error++; - qxl_io_log(qdev, "%s: driver is in bug mode.\n", __func__); + DRM_WARN("driver is in bug mode\n"); } if (pending & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG) { - qxl_io_log(qdev, "QXL_INTERRUPT_CLIENT_MONITORS_CONFIG\n"); schedule_work(&qdev->client_monitors_config_work); } qdev->ram_header->int_mask = QXL_INTERRUPT_MASK; diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index ee2340e31f06..86a1fb32f6db 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -105,16 +105,16 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev) static struct vm_operations_struct qxl_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops; -static int qxl_ttm_fault(struct vm_fault *vmf) +static vm_fault_t qxl_ttm_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo; - int r; + vm_fault_t ret; bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data; if (bo == NULL) return VM_FAULT_NOPAGE; - r = ttm_vm_ops->fault(vmf); - return r; + ret = ttm_vm_ops->fault(vmf); + return ret; } int qxl_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index c4420538ec85..f2a0bd1e5119 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -767,7 +767,8 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) * Initialization */ -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex) { static const unsigned int mmio_offsets[] = { DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET @@ -775,7 +776,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) struct rcar_du_device *rcdu = rgrp->dev; struct platform_device *pdev = to_platform_device(rcdu->dev); - struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index]; + struct rcar_du_crtc *rcrtc = &rcdu->crtcs[swindex]; struct drm_crtc *crtc = &rcrtc->crtc; struct drm_plane *primary; unsigned int irqflags; @@ -787,7 +788,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Get the CRTC clock and the optional external clock. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - sprintf(clk_name, "du.%u", index); + sprintf(clk_name, "du.%u", hwindex); name = clk_name; } else { name = NULL; @@ -795,16 +796,16 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) rcrtc->clock = devm_clk_get(rcdu->dev, name); if (IS_ERR(rcrtc->clock)) { - dev_err(rcdu->dev, "no clock for CRTC %u\n", index); + dev_err(rcdu->dev, "no clock for DU channel %u\n", hwindex); return PTR_ERR(rcrtc->clock); } - sprintf(clk_name, "dclkin.%u", index); + sprintf(clk_name, "dclkin.%u", hwindex); clk = devm_clk_get(rcdu->dev, clk_name); if (!IS_ERR(clk)) { rcrtc->extclock = clk; } else if (PTR_ERR(rcrtc->clock) == -EPROBE_DEFER) { - dev_info(rcdu->dev, "can't get external clock %u\n", index); + dev_info(rcdu->dev, "can't get external clock %u\n", hwindex); return -EPROBE_DEFER; } @@ -813,13 +814,13 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) spin_lock_init(&rcrtc->vblank_lock); rcrtc->group = rgrp; - rcrtc->mmio_offset = mmio_offsets[index]; - rcrtc->index = index; + rcrtc->mmio_offset = mmio_offsets[hwindex]; + rcrtc->index = hwindex; if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) primary = &rcrtc->vsp->planes[rcrtc->vsp_pipe].plane; else - primary = &rgrp->planes[index % 2].plane; + primary = &rgrp->planes[swindex % 2].plane; ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary, NULL, &crtc_funcs, NULL); @@ -833,7 +834,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Register the interrupt handler. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - irq = platform_get_irq(pdev, index); + /* The IRQ's are associated with the CRTC (sw)index. */ + irq = platform_get_irq(pdev, swindex); irqflags = 0; } else { irq = platform_get_irq(pdev, 0); @@ -841,7 +843,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) } if (irq < 0) { - dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index); + dev_err(rcdu->dev, "no IRQ for CRTC %u\n", swindex); return irq; } @@ -849,7 +851,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) dev_name(rcdu->dev), rcrtc); if (ret < 0) { dev_err(rcdu->dev, - "failed to register IRQ for CRTC %u\n", index); + "failed to register IRQ for CRTC %u\n", swindex); return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index fdc2bf99bda1..84b5e23a85b1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -80,7 +80,8 @@ enum rcar_du_output { RCAR_DU_OUTPUT_MAX, }; -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index); +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex); void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc); void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 3917d839c04c..02aee6cb0e53 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -40,7 +40,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7743_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7743 has one RGB output and one LVDS output @@ -61,7 +61,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7745 has two RGB outputs @@ -80,7 +80,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { static const struct rcar_du_device_info rcar_du_r8a7779_info = { .gen = 2, .features = 0, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7779 has two RGB outputs and one (currently unsupported) @@ -102,7 +102,7 @@ static const struct rcar_du_device_info rcar_du_r8a7790_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, .quirks = RCAR_DU_QUIRK_ALIGN_128B, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7790 has one RGB output, two LVDS outputs and one @@ -129,7 +129,7 @@ static const struct rcar_du_device_info rcar_du_r8a7791_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A779[13] has one RGB output, one LVDS output and one @@ -151,7 +151,7 @@ static const struct rcar_du_device_info rcar_du_r8a7792_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* R8A7792 has two RGB outputs. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -169,7 +169,7 @@ static const struct rcar_du_device_info rcar_du_r8a7794_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7794 has two RGB outputs and one (currently unsupported) @@ -191,7 +191,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 4, + .channels_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7795 has one RGB output, two HDMI outputs and one @@ -215,7 +215,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { }, }, .num_lvds = 1, - .dpll_ch = BIT(1) | BIT(2), + .dpll_ch = BIT(2) | BIT(1), }; static const struct rcar_du_device_info rcar_du_r8a7796_info = { @@ -223,7 +223,7 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7796 has one RGB output, one LVDS output and one HDMI @@ -246,12 +246,40 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .dpll_ch = BIT(1), }; +static const struct rcar_du_device_info rcar_du_r8a77965_info = { + .gen = 3, + .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK + | RCAR_DU_FEATURE_EXT_CTRL_REGS + | RCAR_DU_FEATURE_VSP1_SOURCE, + .channels_mask = BIT(3) | BIT(1) | BIT(0), + .routes = { + /* + * R8A77965 has one RGB output, one LVDS output and one HDMI + * output. + */ + [RCAR_DU_OUTPUT_DPAD0] = { + .possible_crtcs = BIT(2), + .port = 0, + }, + [RCAR_DU_OUTPUT_HDMI0] = { + .possible_crtcs = BIT(1), + .port = 1, + }, + [RCAR_DU_OUTPUT_LVDS0] = { + .possible_crtcs = BIT(0), + .port = 2, + }, + }, + .num_lvds = 1, + .dpll_ch = BIT(1), +}; + static const struct rcar_du_device_info rcar_du_r8a77970_info = { .gen = 3, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 1, + .channels_mask = BIT(0), .routes = { /* R8A77970 has one RGB output and one LVDS output. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -277,6 +305,7 @@ static const struct of_device_id rcar_du_of_table[] = { { .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info }, { .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info }, { .compatible = "renesas,du-r8a7796", .data = &rcar_du_r8a7796_info }, + { .compatible = "renesas,du-r8a77965", .data = &rcar_du_r8a77965_info }, { .compatible = "renesas,du-r8a77970", .data = &rcar_du_r8a77970_info }, { } }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index 131d8e88b06c..b3a25e8e07d0 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -52,7 +52,7 @@ struct rcar_du_output_routing { * @gen: device generation (2 or 3) * @features: device features (RCAR_DU_FEATURE_*) * @quirks: device quirks (RCAR_DU_QUIRK_*) - * @num_crtcs: total number of CRTCs + * @channels_mask: bit mask of available DU channels * @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*) * @num_lvds: number of internal LVDS encoders */ @@ -60,7 +60,7 @@ struct rcar_du_device_info { unsigned int gen; unsigned int features; unsigned int quirks; - unsigned int num_crtcs; + unsigned int channels_mask; struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX]; unsigned int num_lvds; unsigned int dpll_ch; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c index 2f37ea901873..d539cb290a35 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c @@ -46,10 +46,13 @@ void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data) static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp) { - u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP; + u32 defr6 = DEFR6_CODE; - if (rgrp->num_crtcs > 1) - defr6 |= DEFR6_ODPM22_DISP; + if (rgrp->channels_mask & BIT(0)) + defr6 |= DEFR6_ODPM02_DISP; + + if (rgrp->channels_mask & BIT(1)) + defr6 |= DEFR6_ODPM12_DISP; rcar_du_group_write(rgrp, DEFR6, defr6); } @@ -80,10 +83,11 @@ static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp) * On Gen3 VSPD routing can't be configured, but DPAD routing * needs to be set despite having a single option available. */ - u32 crtc = ffs(possible_crtcs) - 1; + unsigned int rgb_crtc = ffs(possible_crtcs) - 1; + struct rcar_du_crtc *crtc = &rcdu->crtcs[rgb_crtc]; - if (crtc / 2 == rgrp->index) - defr8 |= DEFR8_DRGBS_DU(crtc); + if (crtc->index / 2 == rgrp->index) + defr8 |= DEFR8_DRGBS_DU(crtc->index); } rcar_du_group_write(rgrp, DEFR8, defr8); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h index 5e3adc6b31b5..42105aedecc8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h @@ -25,6 +25,7 @@ struct rcar_du_device; * @dev: the DU device * @mmio_offset: registers offset in the device memory map * @index: group index + * @channels_mask: bitmask of populated DU channels in this group * @num_crtcs: number of CRTCs in this group (1 or 2) * @use_count: number of users of the group (rcar_du_group_(get|put)) * @used_crtcs: number of CRTCs currently in use @@ -39,6 +40,7 @@ struct rcar_du_group { unsigned int mmio_offset; unsigned int index; + unsigned int channels_mask; unsigned int num_crtcs; unsigned int use_count; unsigned int used_crtcs; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index f4ac0f884f00..f0bc7cc0e913 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -428,7 +428,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) struct { struct device_node *np; unsigned int crtcs_mask; - } vsps[RCAR_DU_MAX_VSPS] = { { 0, }, }; + } vsps[RCAR_DU_MAX_VSPS] = { { NULL, }, }; unsigned int vsps_count = 0; unsigned int cells; unsigned int i; @@ -507,6 +507,8 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) struct drm_fbdev_cma *fbdev; unsigned int num_encoders; unsigned int num_groups; + unsigned int swindex; + unsigned int hwindex; unsigned int i; int ret; @@ -520,7 +522,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) dev->mode_config.funcs = &rcar_du_mode_config_funcs; dev->mode_config.helper_private = &rcar_du_mode_config_helper; - rcdu->num_crtcs = rcdu->info->num_crtcs; + rcdu->num_crtcs = hweight8(rcdu->info->channels_mask); ret = rcar_du_properties_init(rcdu); if (ret < 0) @@ -530,7 +532,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) * Initialize vertical blanking interrupts handling. Start with vblank * disabled for all CRTCs. */ - ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1); + ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1); if (ret < 0) return ret; @@ -545,7 +547,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) rgrp->dev = rcdu; rgrp->mmio_offset = mmio_offsets[i]; rgrp->index = i; - rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U); + /* Extract the channel mask for this group only. */ + rgrp->channels_mask = (rcdu->info->channels_mask >> (2 * i)) + & GENMASK(1, 0); + rgrp->num_crtcs = hweight8(rgrp->channels_mask); /* * If we have more than one CRTCs in this group pre-associate @@ -572,10 +577,16 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) } /* Create the CRTCs. */ - for (i = 0; i < rcdu->num_crtcs; ++i) { - struct rcar_du_group *rgrp = &rcdu->groups[i / 2]; + for (swindex = 0, hwindex = 0; swindex < rcdu->num_crtcs; ++hwindex) { + struct rcar_du_group *rgrp; + + /* Skip unpopulated DU channels. */ + if (!(rcdu->info->channels_mask & BIT(hwindex))) + continue; + + rgrp = &rcdu->groups[hwindex / 2]; - ret = rcar_du_crtc_create(rgrp, i); + ret = rcar_du_crtc_create(rgrp, swindex++, hwindex); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.c b/drivers/gpu/drm/rcar-du/rcar_du_of.c index 68a0b82cb17e..afef69669bb4 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_of.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_of.c @@ -18,6 +18,7 @@ #include "rcar_du_crtc.h" #include "rcar_du_drv.h" +#include "rcar_du_of.h" /* ----------------------------------------------------------------------------- * Generic Overlay Handling diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h index d5bae99d3cfe..9dfd220ceda1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h @@ -187,14 +187,14 @@ #define DEFR6 0x000e8 #define DEFR6_CODE (0x7778 << 16) -#define DEFR6_ODPM22_DSMR (0 << 10) -#define DEFR6_ODPM22_DISP (2 << 10) -#define DEFR6_ODPM22_CDE (3 << 10) -#define DEFR6_ODPM22_MASK (3 << 10) -#define DEFR6_ODPM12_DSMR (0 << 8) -#define DEFR6_ODPM12_DISP (2 << 8) -#define DEFR6_ODPM12_CDE (3 << 8) -#define DEFR6_ODPM12_MASK (3 << 8) +#define DEFR6_ODPM12_DSMR (0 << 10) +#define DEFR6_ODPM12_DISP (2 << 10) +#define DEFR6_ODPM12_CDE (3 << 10) +#define DEFR6_ODPM12_MASK (3 << 10) +#define DEFR6_ODPM02_DSMR (0 << 8) +#define DEFR6_ODPM02_DISP (2 << 8) +#define DEFR6_ODPM02_CDE (3 << 8) +#define DEFR6_ODPM02_MASK (3 << 8) #define DEFR6_TCNE1 (1 << 6) #define DEFR6_TCNE0 (1 << 4) #define DEFR6_MLOS1 (1 << 2) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index b3bec0125696..27a440886b17 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -17,6 +17,7 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_cma_helper.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_plane_helper.h> #include <linux/bitops.h> @@ -237,6 +238,10 @@ static int rcar_du_vsp_plane_prepare_fb(struct drm_plane *plane, } } + ret = drm_gem_fb_prepare_fb(plane, state); + if (ret) + goto fail; + return 0; fail: @@ -299,18 +304,17 @@ static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = { static struct drm_plane_state * rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane) { - struct rcar_du_vsp_plane_state *state; struct rcar_du_vsp_plane_state *copy; if (WARN_ON(!plane->state)) return NULL; - state = to_rcar_vsp_plane_state(plane->state); - copy = kmemdup(state, sizeof(*state), GFP_KERNEL); + copy = kzalloc(sizeof(*copy), GFP_KERNEL); if (copy == NULL) return NULL; __drm_atomic_helper_plane_duplicate_state(plane, ©->state); + copy->alpha = to_rcar_vsp_plane_state(plane->state)->alpha; return ©->state; } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index fe3faa7c38d9..2121345a61af 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -76,6 +76,9 @@ #define VOP_WIN_GET_YRGBADDR(vop, win) \ vop_readl(vop, win->base + win->phy->yrgb_mst.offset) +#define VOP_WIN_TO_INDEX(vop_win) \ + ((vop_win) - (vop_win)->vop->win) + #define to_vop(x) container_of(x, struct vop, crtc) #define to_vop_win(x) container_of(x, struct vop_win, base) @@ -708,6 +711,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane, dma_addr_t dma_addr; uint32_t val; bool rb_swap; + int win_index = VOP_WIN_TO_INDEX(vop_win); int format; /* @@ -777,7 +781,14 @@ static void vop_plane_atomic_update(struct drm_plane *plane, rb_swap = has_rb_swapped(fb->format->format); VOP_WIN_SET(vop, win, rb_swap, rb_swap); - if (fb->format->has_alpha) { + /* + * Blending win0 with the background color doesn't seem to work + * correctly. We only get the background color, no matter the contents + * of the win0 framebuffer. However, blending pre-multiplied color + * with the default opaque black default background color is a no-op, + * so we can just disable blending to get the correct result. + */ + if (fb->format->has_alpha && win_index > 0) { VOP_WIN_SET(vop, win, dst_alpha_ctl, DST_FACTOR_M0(ALPHA_SRC_INVERSE)); val = SRC_ALPHA_EN(1) | SRC_COLOR_M0(ALPHA_SRC_PRE_MUL) | diff --git a/drivers/gpu/drm/selftests/Makefile b/drivers/gpu/drm/selftests/Makefile index 4aebfc7f27d4..9fc349fa18e9 100644 --- a/drivers/gpu/drm/selftests/Makefile +++ b/drivers/gpu/drm/selftests/Makefile @@ -1 +1 @@ -obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += test-drm_mm.o +obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm-helper.o diff --git a/drivers/gpu/drm/selftests/drm_helper_selftests.h b/drivers/gpu/drm/selftests/drm_helper_selftests.h new file mode 100644 index 000000000000..9771290ed228 --- /dev/null +++ b/drivers/gpu/drm/selftests/drm_helper_selftests.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as igt__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * Tests are executed in order by igt/drm_selftests_helper + */ +selftest(check_plane_state, igt_check_plane_state) diff --git a/drivers/gpu/drm/selftests/test-drm-helper.c b/drivers/gpu/drm/selftests/test-drm-helper.c new file mode 100644 index 000000000000..a015712b43e8 --- /dev/null +++ b/drivers/gpu/drm/selftests/test-drm-helper.c @@ -0,0 +1,247 @@ +/* + * Test cases for the drm_kms_helper functions + */ + +#define pr_fmt(fmt) "drm_kms_helper: " fmt + +#include <linux/module.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_modes.h> + +#define TESTS "drm_helper_selftests.h" +#include "drm_selftest.h" + +#define FAIL(test, msg, ...) \ + do { \ + if (test) { \ + pr_err("%s/%u: " msg, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + return -EINVAL; \ + } \ + } while (0) + +#define FAIL_ON(x) FAIL((x), "%s", "FAIL_ON(" __stringify(x) ")\n") + +static void set_src(struct drm_plane_state *plane_state, + unsigned src_x, unsigned src_y, + unsigned src_w, unsigned src_h) +{ + plane_state->src_x = src_x; + plane_state->src_y = src_y; + plane_state->src_w = src_w; + plane_state->src_h = src_h; +} + +static bool check_src_eq(struct drm_plane_state *plane_state, + unsigned src_x, unsigned src_y, + unsigned src_w, unsigned src_h) +{ + if (plane_state->src.x1 < 0) { + pr_err("src x coordinate %x should never be below 0.\n", plane_state->src.x1); + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + if (plane_state->src.y1 < 0) { + pr_err("src y coordinate %x should never be below 0.\n", plane_state->src.y1); + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + + if (plane_state->src.x1 != src_x || + plane_state->src.y1 != src_y || + drm_rect_width(&plane_state->src) != src_w || + drm_rect_height(&plane_state->src) != src_h) { + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + + return true; +} + +static void set_crtc(struct drm_plane_state *plane_state, + int crtc_x, int crtc_y, + unsigned crtc_w, unsigned crtc_h) +{ + plane_state->crtc_x = crtc_x; + plane_state->crtc_y = crtc_y; + plane_state->crtc_w = crtc_w; + plane_state->crtc_h = crtc_h; +} + +static bool check_crtc_eq(struct drm_plane_state *plane_state, + int crtc_x, int crtc_y, + unsigned crtc_w, unsigned crtc_h) +{ + if (plane_state->dst.x1 != crtc_x || + plane_state->dst.y1 != crtc_y || + drm_rect_width(&plane_state->dst) != crtc_w || + drm_rect_height(&plane_state->dst) != crtc_h) { + drm_rect_debug_print("dst: ", &plane_state->dst, false); + + return false; + } + + return true; +} + +static int igt_check_plane_state(void *ignored) +{ + int ret; + + const struct drm_crtc_state crtc_state = { + .crtc = ZERO_SIZE_PTR, + .enable = true, + .active = true, + .mode = { + DRM_MODE("1024x768", 0, 65000, 1024, 1048, + 1184, 1344, 0, 768, 771, 777, 806, 0, + DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) + }, + }; + struct drm_framebuffer fb = { + .width = 2048, + .height = 2048 + }; + struct drm_plane_state plane_state = { + .crtc = ZERO_SIZE_PTR, + .fb = &fb, + .rotation = DRM_MODE_ROTATE_0 + }; + + /* Simple clipping, no scaling. */ + set_src(&plane_state, 0, 0, fb.width << 16, fb.height << 16); + set_crtc(&plane_state, 0, 0, fb.width, fb.height); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Simple clipping check should pass\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + /* Rotated clipping + reflection, no scaling. */ + plane_state.rotation = DRM_MODE_ROTATE_90 | DRM_MODE_REFLECT_X; + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Rotated clipping check should pass\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 768 << 16, 1024 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + plane_state.rotation = DRM_MODE_ROTATE_0; + + /* Check whether positioning works correctly. */ + set_src(&plane_state, 0, 0, 1023 << 16, 767 << 16); + set_crtc(&plane_state, 0, 0, 1023, 767); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(!ret, "Should not be able to position on the crtc with can_position=false\n"); + + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, false); + FAIL(ret < 0, "Simple positioning should work\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1023 << 16, 767 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1023, 767)); + + /* Simple scaling tests. */ + set_src(&plane_state, 0, 0, 512 << 16, 384 << 16); + set_crtc(&plane_state, 0, 0, 1024, 768); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0x8001, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(!ret, "Upscaling out of range should fail.\n"); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0x8000, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Upscaling exactly 2x should work\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 512 << 16, 384 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + set_src(&plane_state, 0, 0, 2048 << 16, 1536 << 16); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x1ffff, false, false); + FAIL(!ret, "Downscaling out of range should fail.\n"); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x20000, false, false); + FAIL(ret < 0, "Should succeed with exact scaling limit\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2048 << 16, 1536 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + /* Testing rounding errors. */ + set_src(&plane_state, 0, 0, 0x40001, 0x40001); + set_crtc(&plane_state, 1022, 766, 4, 4); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x10001, + true, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2)); + + set_src(&plane_state, 0x20001, 0x20001, 0x4040001, 0x3040001); + set_crtc(&plane_state, -2, -2, 1028, 772); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x10001, + false, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0x40002, 0x40002, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + set_src(&plane_state, 0, 0, 0x3ffff, 0x3ffff); + set_crtc(&plane_state, 1022, 766, 4, 4); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0xffff, + DRM_PLANE_HELPER_NO_SCALING, + true, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + /* Should not be rounded to 0x20001, which would be upscaling. */ + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2)); + + set_src(&plane_state, 0x1ffff, 0x1ffff, 0x403ffff, 0x303ffff); + set_crtc(&plane_state, -2, -2, 1028, 772); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0xffff, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0x3fffe, 0x3fffe, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + return 0; +} + +#include "drm_selftest.c" + +static int __init test_drm_helper_init(void) +{ + int err; + + err = run_selftests(selftests, ARRAY_SIZE(selftests), NULL); + + return err > 0 ? 0 : err; +} + +module_init(test_drm_helper_init); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c index 21e50d7b1f86..5824e6aca8f4 100644 --- a/drivers/gpu/drm/sti/sti_crtc.c +++ b/drivers/gpu/drm/sti/sti_crtc.c @@ -357,7 +357,7 @@ int sti_crtc_init(struct drm_device *drm_dev, struct sti_mixer *mixer, res = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor, &sti_crtc_funcs, NULL); if (res) { - DRM_ERROR("Can't initialze CRTC\n"); + DRM_ERROR("Can't initialize CRTC\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index e3121d9e4230..d997a6014d6c 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -445,6 +445,43 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc, reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR); } +#define CLK_TOLERANCE_HZ 50 + +static enum drm_mode_status +ltdc_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + struct ltdc_device *ldev = crtc_to_ltdc(crtc); + int target = mode->clock * 1000; + int target_min = target - CLK_TOLERANCE_HZ; + int target_max = target + CLK_TOLERANCE_HZ; + int result; + + /* + * Accept all "preferred" modes: + * - this is important for panels because panel clock tolerances are + * bigger than hdmi ones and there is no reason to not accept them + * (the fps may vary a little but it is not a problem). + * - the hdmi preferred mode will be accepted too, but userland will + * be able to use others hdmi "valid" modes if necessary. + */ + if (mode->type & DRM_MODE_TYPE_PREFERRED) + return MODE_OK; + + result = clk_round_rate(ldev->pixel_clk, target); + + DRM_DEBUG_DRIVER("clk rate target %d, available %d\n", target, result); + + /* + * Filter modes according to the clock value, particularly useful for + * hdmi modes that require precise pixel clocks. + */ + if (result < target_min || result > target_max) + return MODE_CLOCK_RANGE; + + return MODE_OK; +} + static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -559,6 +596,7 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc, } static const struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = { + .mode_valid = ltdc_crtc_mode_valid, .mode_fixup = ltdc_crtc_mode_fixup, .mode_set_nofb = ltdc_crtc_mode_set_nofb, .atomic_flush = ltdc_crtc_atomic_flush, @@ -822,13 +860,13 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev, plane = devm_kzalloc(dev, sizeof(*plane), GFP_KERNEL); if (!plane) - return 0; + return NULL; ret = drm_universal_plane_init(ddev, plane, possible_crtcs, <dc_plane_funcs, formats, nb_fmt, NULL, type, NULL); if (ret < 0) - return 0; + return NULL; drm_plane_helper_add(plane, <dc_plane_helper_funcs); @@ -987,14 +1025,13 @@ int ltdc_load(struct drm_device *ddev) &bridge[i]); /* - * If at least one endpoint is ready, continue probing, - * else if at least one endpoint is -EPROBE_DEFER and - * there is no previous ready endpoints, defer probing. + * If at least one endpoint is -EPROBE_DEFER, defer probing, + * else if at least one endpoint is ready, continue probing. */ - if (!ret) + if (ret == -EPROBE_DEFER) + return ret; + else if (!ret) endpoint_not_ready = 0; - else if (ret == -EPROBE_DEFER && endpoint_not_ready) - endpoint_not_ready = -EPROBE_DEFER; } if (endpoint_not_ready) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 1b278a22c8b7..1067e702c22c 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -224,7 +224,7 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc) ret = clk_set_rate(priv->clk, req_rate * clkdiv); clk_rate = clk_get_rate(priv->clk); - if (ret < 0) { + if (ret < 0 || tilcdc_pclk_diff(req_rate, clk_rate) > 5) { /* * If we fail to set the clock rate (some architectures don't * use the common clock framework yet and may not implement diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c index d5ef65179c16..015d03f2acba 100644 --- a/drivers/gpu/drm/tinydrm/mi0283qt.c +++ b/drivers/gpu/drm/tinydrm/mi0283qt.c @@ -85,24 +85,6 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, /* Memory Access Control */ mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT); - switch (mipi->rotation) { - default: - addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY | - ILI9341_MADCTL_MX; - break; - case 90: - addr_mode = ILI9341_MADCTL_MY; - break; - case 180: - addr_mode = ILI9341_MADCTL_MV; - break; - case 270: - addr_mode = ILI9341_MADCTL_MX; - break; - } - addr_mode |= ILI9341_MADCTL_BGR; - mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode); - /* Frame Rate */ mipi_dbi_command(mipi, ILI9341_FRMCTR1, 0x00, 0x1b); @@ -128,6 +110,29 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, msleep(100); out_enable: + /* The PiTFT (ili9340) has a hardware reset circuit that + * resets only on power-on and not on each reboot through + * a gpio like the rpi-display does. + * As a result, we need to always apply the rotation value + * regardless of the display "on/off" state. + */ + switch (mipi->rotation) { + default: + addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY | + ILI9341_MADCTL_MX; + break; + case 90: + addr_mode = ILI9341_MADCTL_MY; + break; + case 180: + addr_mode = ILI9341_MADCTL_MV; + break; + case 270: + addr_mode = ILI9341_MADCTL_MX; + break; + } + addr_mode |= ILI9341_MADCTL_BGR; + mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode); mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } diff --git a/drivers/gpu/drm/v3d/Kconfig b/drivers/gpu/drm/v3d/Kconfig new file mode 100644 index 000000000000..a0c0259355bd --- /dev/null +++ b/drivers/gpu/drm/v3d/Kconfig @@ -0,0 +1,9 @@ +config DRM_V3D + tristate "Broadcom V3D 3.x and newer" + depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST + depends on DRM + depends on COMMON_CLK + select DRM_SCHED + help + Choose this option if you have a system that has a Broadcom + V3D 3.x or newer GPU, such as BCM7268. diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile new file mode 100644 index 000000000000..34446e1de64f --- /dev/null +++ b/drivers/gpu/drm/v3d/Makefile @@ -0,0 +1,18 @@ +# Please keep these build lists sorted! + +# core driver code +v3d-y := \ + v3d_bo.o \ + v3d_drv.o \ + v3d_fence.o \ + v3d_gem.o \ + v3d_irq.o \ + v3d_mmu.o \ + v3d_trace_points.o \ + v3d_sched.o + +v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o + +obj-$(CONFIG_DRM_V3D) += v3d.o + +CFLAGS_v3d_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c new file mode 100644 index 000000000000..7b1e2a549a71 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +/** + * DOC: V3D GEM BO management support + * + * Compared to VC4 (V3D 2.x), V3D 3.3 introduces an MMU between the + * GPU and the bus, allowing us to use shmem objects for our storage + * instead of CMA. + * + * Physically contiguous objects may still be imported to V3D, but the + * driver doesn't allocate physically contiguous objects on its own. + * Display engines requiring physically contiguous allocations should + * look into Mesa's "renderonly" support (as used by the Mesa pl111 + * driver) for an example of how to integrate with V3D. + * + * Long term, we should support evicting pages from the MMU when under + * memory pressure (thus the v3d_bo_get_pages() refcounting), but + * that's not a high priority since our systems tend to not have swap. + */ + +#include <linux/dma-buf.h> +#include <linux/pfn_t.h> + +#include "v3d_drv.h" +#include "uapi/drm/v3d_drm.h" + +/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and maps + * it for DMA. + */ +static int +v3d_bo_get_pages(struct v3d_bo *bo) +{ + struct drm_gem_object *obj = &bo->base; + struct drm_device *dev = obj->dev; + int npages = obj->size >> PAGE_SHIFT; + int ret = 0; + + mutex_lock(&bo->lock); + if (bo->pages_refcount++ != 0) + goto unlock; + + if (!obj->import_attach) { + bo->pages = drm_gem_get_pages(obj); + if (IS_ERR(bo->pages)) { + ret = PTR_ERR(bo->pages); + goto unlock; + } + + bo->sgt = drm_prime_pages_to_sg(bo->pages, npages); + if (IS_ERR(bo->sgt)) { + ret = PTR_ERR(bo->sgt); + goto put_pages; + } + + /* Map the pages for use by the GPU. */ + dma_map_sg(dev->dev, bo->sgt->sgl, + bo->sgt->nents, DMA_BIDIRECTIONAL); + } else { + bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL); + if (!bo->pages) + goto put_pages; + + drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages, + NULL, npages); + + /* Note that dma-bufs come in mapped. */ + } + + mutex_unlock(&bo->lock); + + return 0; + +put_pages: + drm_gem_put_pages(obj, bo->pages, true, true); + bo->pages = NULL; +unlock: + bo->pages_refcount--; + mutex_unlock(&bo->lock); + return ret; +} + +static void +v3d_bo_put_pages(struct v3d_bo *bo) +{ + struct drm_gem_object *obj = &bo->base; + + mutex_lock(&bo->lock); + if (--bo->pages_refcount == 0) { + if (!obj->import_attach) { + dma_unmap_sg(obj->dev->dev, bo->sgt->sgl, + bo->sgt->nents, DMA_BIDIRECTIONAL); + sg_free_table(bo->sgt); + kfree(bo->sgt); + drm_gem_put_pages(obj, bo->pages, true, true); + } else { + kfree(bo->pages); + } + } + mutex_unlock(&bo->lock); +} + +static struct v3d_bo *v3d_bo_create_struct(struct drm_device *dev, + size_t unaligned_size) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_gem_object *obj; + struct v3d_bo *bo; + size_t size = roundup(unaligned_size, PAGE_SIZE); + int ret; + + if (size == 0) + return ERR_PTR(-EINVAL); + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + obj = &bo->base; + + INIT_LIST_HEAD(&bo->vmas); + INIT_LIST_HEAD(&bo->unref_head); + mutex_init(&bo->lock); + + ret = drm_gem_object_init(dev, obj, size); + if (ret) + goto free_bo; + + spin_lock(&v3d->mm_lock); + ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, + obj->size >> PAGE_SHIFT, + GMP_GRANULARITY >> PAGE_SHIFT, 0, 0); + spin_unlock(&v3d->mm_lock); + if (ret) + goto free_obj; + + return bo; + +free_obj: + drm_gem_object_release(obj); +free_bo: + kfree(bo); + return ERR_PTR(ret); +} + +struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, + size_t unaligned_size) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_gem_object *obj; + struct v3d_bo *bo; + int ret; + + bo = v3d_bo_create_struct(dev, unaligned_size); + if (IS_ERR(bo)) + return bo; + obj = &bo->base; + + bo->resv = &bo->_resv; + reservation_object_init(bo->resv); + + ret = v3d_bo_get_pages(bo); + if (ret) + goto free_mm; + + v3d_mmu_insert_ptes(bo); + + mutex_lock(&v3d->bo_lock); + v3d->bo_stats.num_allocated++; + v3d->bo_stats.pages_allocated += obj->size >> PAGE_SHIFT; + mutex_unlock(&v3d->bo_lock); + + return bo; + +free_mm: + spin_lock(&v3d->mm_lock); + drm_mm_remove_node(&bo->node); + spin_unlock(&v3d->mm_lock); + + drm_gem_object_release(obj); + kfree(bo); + return ERR_PTR(ret); +} + +/* Called DRM core on the last userspace/kernel unreference of the + * BO. + */ +void v3d_free_object(struct drm_gem_object *obj) +{ + struct v3d_dev *v3d = to_v3d_dev(obj->dev); + struct v3d_bo *bo = to_v3d_bo(obj); + + mutex_lock(&v3d->bo_lock); + v3d->bo_stats.num_allocated--; + v3d->bo_stats.pages_allocated -= obj->size >> PAGE_SHIFT; + mutex_unlock(&v3d->bo_lock); + + reservation_object_fini(&bo->_resv); + + v3d_bo_put_pages(bo); + + if (obj->import_attach) + drm_prime_gem_destroy(obj, bo->sgt); + + v3d_mmu_remove_ptes(bo); + spin_lock(&v3d->mm_lock); + drm_mm_remove_node(&bo->node); + spin_unlock(&v3d->mm_lock); + + mutex_destroy(&bo->lock); + + drm_gem_object_release(obj); + kfree(bo); +} + +struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj) +{ + struct v3d_bo *bo = to_v3d_bo(obj); + + return bo->resv; +} + +static void +v3d_set_mmap_vma_flags(struct vm_area_struct *vma) +{ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); +} + +int v3d_gem_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct v3d_bo *bo = to_v3d_bo(obj); + unsigned long pfn; + pgoff_t pgoff; + int ret; + + /* We don't use vmf->pgoff since that has the fake offset: */ + pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + pfn = page_to_pfn(bo->pages[pgoff]); + + ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); + + switch (ret) { + case -EAGAIN: + case 0: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ + return VM_FAULT_NOPAGE; + case -ENOMEM: + return VM_FAULT_OOM; + default: + return VM_FAULT_SIGBUS; + } +} + +int v3d_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; + + v3d_set_mmap_vma_flags(vma); + + return ret; +} + +int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + ret = drm_gem_mmap_obj(obj, obj->size, vma); + if (ret < 0) + return ret; + + v3d_set_mmap_vma_flags(vma); + + return 0; +} + +struct sg_table * +v3d_prime_get_sg_table(struct drm_gem_object *obj) +{ + struct v3d_bo *bo = to_v3d_bo(obj); + int npages = obj->size >> PAGE_SHIFT; + + return drm_prime_pages_to_sg(bo->pages, npages); +} + +struct drm_gem_object * +v3d_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct drm_gem_object *obj; + struct v3d_bo *bo; + + bo = v3d_bo_create_struct(dev, attach->dmabuf->size); + if (IS_ERR(bo)) + return ERR_CAST(bo); + obj = &bo->base; + + bo->resv = attach->dmabuf->resv; + + bo->sgt = sgt; + v3d_bo_get_pages(bo); + + v3d_mmu_insert_ptes(bo); + + return obj; +} + +int v3d_create_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_create_bo *args = data; + struct v3d_bo *bo = NULL; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown create_bo flags: %d\n", args->flags); + return -EINVAL; + } + + bo = v3d_bo_create(dev, file_priv, PAGE_ALIGN(args->size)); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + args->offset = bo->node.start << PAGE_SHIFT; + + ret = drm_gem_handle_create(file_priv, &bo->base, &args->handle); + drm_gem_object_put_unlocked(&bo->base); + + return ret; +} + +int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_mmap_bo *args = data; + struct drm_gem_object *gem_obj; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown mmap_bo flags: %d\n", args->flags); + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + ret = drm_gem_create_mmap_offset(gem_obj); + if (ret == 0) + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_get_bo_offset *args = data; + struct drm_gem_object *gem_obj; + struct v3d_bo *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_v3d_bo(gem_obj); + + args->offset = bo->node.start << PAGE_SHIFT; + + drm_gem_object_put_unlocked(gem_obj); + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c new file mode 100644 index 000000000000..4db62c545748 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +#include <linux/circ_buf.h> +#include <linux/ctype.h> +#include <linux/debugfs.h> +#include <linux/pm_runtime.h> +#include <linux/seq_file.h> +#include <drm/drmP.h> + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define REGDEF(reg) { reg, #reg } +struct v3d_reg_def { + u32 reg; + const char *name; +}; + +static const struct v3d_reg_def v3d_hub_reg_defs[] = { + REGDEF(V3D_HUB_AXICFG), + REGDEF(V3D_HUB_UIFCFG), + REGDEF(V3D_HUB_IDENT0), + REGDEF(V3D_HUB_IDENT1), + REGDEF(V3D_HUB_IDENT2), + REGDEF(V3D_HUB_IDENT3), + REGDEF(V3D_HUB_INT_STS), + REGDEF(V3D_HUB_INT_MSK_STS), +}; + +static const struct v3d_reg_def v3d_gca_reg_defs[] = { + REGDEF(V3D_GCA_SAFE_SHUTDOWN), + REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK), +}; + +static const struct v3d_reg_def v3d_core_reg_defs[] = { + REGDEF(V3D_CTL_IDENT0), + REGDEF(V3D_CTL_IDENT1), + REGDEF(V3D_CTL_IDENT2), + REGDEF(V3D_CTL_MISCCFG), + REGDEF(V3D_CTL_INT_STS), + REGDEF(V3D_CTL_INT_MSK_STS), + REGDEF(V3D_CLE_CT0CS), + REGDEF(V3D_CLE_CT0CA), + REGDEF(V3D_CLE_CT0EA), + REGDEF(V3D_CLE_CT1CS), + REGDEF(V3D_CLE_CT1CA), + REGDEF(V3D_CLE_CT1EA), + + REGDEF(V3D_PTB_BPCA), + REGDEF(V3D_PTB_BPCS), + + REGDEF(V3D_MMU_CTL), + REGDEF(V3D_MMU_VIO_ADDR), + + REGDEF(V3D_GMP_STATUS), + REGDEF(V3D_GMP_CFG), + REGDEF(V3D_GMP_VIO_ADDR), +}; + +static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + int i, core; + + for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg, + V3D_READ(v3d_hub_reg_defs[i].reg)); + } + + for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg, + V3D_GCA_READ(v3d_gca_reg_defs[i].reg)); + } + + for (core = 0; core < v3d->cores; core++) { + for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) { + seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", + core, + v3d_core_reg_defs[i].name, + v3d_core_reg_defs[i].reg, + V3D_CORE_READ(core, + v3d_core_reg_defs[i].reg)); + } + } + + return 0; +} + +static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + u32 ident0, ident1, ident2, ident3, cores; + int ret, core; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) + return ret; + + ident0 = V3D_READ(V3D_HUB_IDENT0); + ident1 = V3D_READ(V3D_HUB_IDENT1); + ident2 = V3D_READ(V3D_HUB_IDENT2); + ident3 = V3D_READ(V3D_HUB_IDENT3); + cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); + + seq_printf(m, "Revision: %d.%d.%d.%d\n", + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER), + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV), + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV), + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPIDX)); + seq_printf(m, "MMU: %s\n", + (ident2 & V3D_HUB_IDENT2_WITH_MMU) ? "yes" : "no"); + seq_printf(m, "TFU: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_TFU) ? "yes" : "no"); + seq_printf(m, "TSY: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_TSY) ? "yes" : "no"); + seq_printf(m, "MSO: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_MSO) ? "yes" : "no"); + seq_printf(m, "L3C: %s (%dkb)\n", + (ident1 & V3D_HUB_IDENT1_WITH_L3C) ? "yes" : "no", + V3D_GET_FIELD(ident2, V3D_HUB_IDENT2_L3C_NKB)); + + for (core = 0; core < cores; core++) { + u32 misccfg; + u32 nslc, ntmu, qups; + + ident0 = V3D_CORE_READ(core, V3D_CTL_IDENT0); + ident1 = V3D_CORE_READ(core, V3D_CTL_IDENT1); + ident2 = V3D_CORE_READ(core, V3D_CTL_IDENT2); + misccfg = V3D_CORE_READ(core, V3D_CTL_MISCCFG); + + nslc = V3D_GET_FIELD(ident1, V3D_IDENT1_NSLC); + ntmu = V3D_GET_FIELD(ident1, V3D_IDENT1_NTMU); + qups = V3D_GET_FIELD(ident1, V3D_IDENT1_QUPS); + + seq_printf(m, "Core %d:\n", core); + seq_printf(m, " Revision: %d.%d\n", + V3D_GET_FIELD(ident0, V3D_IDENT0_VER), + V3D_GET_FIELD(ident1, V3D_IDENT1_REV)); + seq_printf(m, " Slices: %d\n", nslc); + seq_printf(m, " TMUs: %d\n", nslc * ntmu); + seq_printf(m, " QPUs: %d\n", nslc * qups); + seq_printf(m, " Semaphores: %d\n", + V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM)); + seq_printf(m, " BCG int: %d\n", + (ident2 & V3D_IDENT2_BCG_INT) != 0); + seq_printf(m, " Override TMU: %d\n", + (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); + } + + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + + return 0; +} + +static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + + mutex_lock(&v3d->bo_lock); + seq_printf(m, "allocated bos: %d\n", + v3d->bo_stats.num_allocated); + seq_printf(m, "allocated bo size (kb): %ld\n", + (long)v3d->bo_stats.pages_allocated << (PAGE_SHIFT - 10)); + mutex_unlock(&v3d->bo_lock); + + return 0; +} + +static const struct drm_info_list v3d_debugfs_list[] = { + {"v3d_ident", v3d_v3d_debugfs_ident, 0}, + {"v3d_regs", v3d_v3d_debugfs_regs, 0}, + {"bo_stats", v3d_debugfs_bo_stats, 0}, +}; + +int +v3d_debugfs_init(struct drm_minor *minor) +{ + return drm_debugfs_create_files(v3d_debugfs_list, + ARRAY_SIZE(v3d_debugfs_list), + minor->debugfs_root, minor); +} diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c new file mode 100644 index 000000000000..38e8041b5f0c --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +/** + * DOC: Broadcom V3D Graphics Driver + * + * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs. + * For V3D 2.x support, see the VC4 driver. + * + * Currently only single-core rendering using the binner and renderer + * is supported. The TFU (texture formatting unit) and V3D 4.x's CSD + * (compute shader dispatch) are not yet supported. + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <drm/drm_fb_cma_helper.h> +#include <drm/drm_fb_helper.h> + +#include "uapi/drm/v3d_drm.h" +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define DRIVER_NAME "v3d" +#define DRIVER_DESC "Broadcom V3D graphics" +#define DRIVER_DATE "20180419" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 0 + +#ifdef CONFIG_PM +static int v3d_runtime_suspend(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + + v3d_irq_disable(v3d); + + clk_disable_unprepare(v3d->clk); + + return 0; +} + +static int v3d_runtime_resume(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + int ret; + + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; + + /* XXX: VPM base */ + + v3d_mmu_set_page_table(v3d); + v3d_irq_enable(v3d); + + return 0; +} +#endif + +static const struct dev_pm_ops v3d_v3d_pm_ops = { + SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL) +}; + +static int v3d_get_param_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_v3d_get_param *args = data; + int ret; + static const u32 reg_map[] = { + [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG, + [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1, + [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2, + [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3, + [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0, + [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1, + [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2, + }; + + if (args->pad != 0) + return -EINVAL; + + /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need + * to explicitly allow it in the "the register in our + * parameter map" check. + */ + if (args->param < ARRAY_SIZE(reg_map) && + (reg_map[args->param] || + args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) { + u32 offset = reg_map[args->param]; + + if (args->value != 0) + return -EINVAL; + + ret = pm_runtime_get_sync(v3d->dev); + if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 && + args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) { + args->value = V3D_CORE_READ(0, offset); + } else { + args->value = V3D_READ(offset); + } + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + return 0; + } + + /* Any params that aren't just register reads would go here. */ + + DRM_DEBUG("Unknown parameter %d\n", args->param); + return -EINVAL; +} + +static int +v3d_open(struct drm_device *dev, struct drm_file *file) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv; + int i; + + v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL); + if (!v3d_priv) + return -ENOMEM; + + v3d_priv->v3d = v3d; + + for (i = 0; i < V3D_MAX_QUEUES; i++) { + drm_sched_entity_init(&v3d->queue[i].sched, + &v3d_priv->sched_entity[i], + &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], + 32, NULL); + } + + file->driver_priv = v3d_priv; + + return 0; +} + +static void +v3d_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file->driver_priv; + enum v3d_queue q; + + for (q = 0; q < V3D_MAX_QUEUES; q++) { + drm_sched_entity_fini(&v3d->queue[q].sched, + &v3d_priv->sched_entity[q]); + } + + kfree(v3d_priv); +} + +static const struct file_operations v3d_drm_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = v3d_mmap, + .poll = drm_poll, + .read = drm_read, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, +}; + +/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP + * protection between clients. Note that render nodes would be be + * able to submit CLs that could access BOs from clients authenticated + * with the master node. + */ +static const struct drm_ioctl_desc v3d_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), + DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW), +}; + +static const struct vm_operations_struct v3d_vm_ops = { + .fault = v3d_gem_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static struct drm_driver v3d_drm_driver = { + .driver_features = (DRIVER_GEM | + DRIVER_RENDER | + DRIVER_PRIME | + DRIVER_SYNCOBJ), + + .open = v3d_open, + .postclose = v3d_postclose, + +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = v3d_debugfs_init, +#endif + + .gem_free_object_unlocked = v3d_free_object, + .gem_vm_ops = &v3d_vm_ops, + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = drm_gem_prime_import, + .gem_prime_export = drm_gem_prime_export, + .gem_prime_res_obj = v3d_prime_res_obj, + .gem_prime_get_sg_table = v3d_prime_get_sg_table, + .gem_prime_import_sg_table = v3d_prime_import_sg_table, + .gem_prime_mmap = v3d_prime_mmap, + + .ioctls = v3d_drm_ioctls, + .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls), + .fops = &v3d_drm_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static const struct of_device_id v3d_of_match[] = { + { .compatible = "brcm,7268-v3d" }, + { .compatible = "brcm,7278-v3d" }, + {}, +}; +MODULE_DEVICE_TABLE(of, v3d_of_match); + +static int +map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name) +{ + struct resource *res = + platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name); + + *regs = devm_ioremap_resource(v3d->dev, res); + return PTR_ERR_OR_ZERO(*regs); +} + +static int v3d_platform_drm_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct drm_device *drm; + struct v3d_dev *v3d; + int ret; + u32 ident1; + + dev->coherent_dma_mask = DMA_BIT_MASK(36); + + v3d = kzalloc(sizeof(*v3d), GFP_KERNEL); + if (!v3d) + return -ENOMEM; + v3d->dev = dev; + v3d->pdev = pdev; + drm = &v3d->drm; + + ret = map_regs(v3d, &v3d->bridge_regs, "bridge"); + if (ret) + goto dev_free; + + ret = map_regs(v3d, &v3d->hub_regs, "hub"); + if (ret) + goto dev_free; + + ret = map_regs(v3d, &v3d->core_regs[0], "core0"); + if (ret) + goto dev_free; + + ident1 = V3D_READ(V3D_HUB_IDENT1); + v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 + + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV)); + v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); + WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ + + if (v3d->ver < 41) { + ret = map_regs(v3d, &v3d->gca_regs, "gca"); + if (ret) + goto dev_free; + } + + v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!v3d->mmu_scratch) { + dev_err(dev, "Failed to allocate MMU scratch page\n"); + ret = -ENOMEM; + goto dev_free; + } + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_enable(dev); + + ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev); + if (ret) + goto dma_free; + + platform_set_drvdata(pdev, drm); + drm->dev_private = v3d; + + ret = v3d_gem_init(drm); + if (ret) + goto dev_destroy; + + v3d_irq_init(v3d); + + ret = drm_dev_register(drm, 0); + if (ret) + goto gem_destroy; + + return 0; + +gem_destroy: + v3d_gem_destroy(drm); +dev_destroy: + drm_dev_put(drm); +dma_free: + dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); +dev_free: + kfree(v3d); + return ret; +} + +static int v3d_platform_drm_remove(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + struct v3d_dev *v3d = to_v3d_dev(drm); + + drm_dev_unregister(drm); + + v3d_gem_destroy(drm); + + drm_dev_put(drm); + + dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); + + return 0; +} + +static struct platform_driver v3d_platform_driver = { + .probe = v3d_platform_drm_probe, + .remove = v3d_platform_drm_remove, + .driver = { + .name = "v3d", + .of_match_table = v3d_of_match, + }, +}; + +static int __init v3d_drm_register(void) +{ + return platform_driver_register(&v3d_platform_driver); +} + +static void __exit v3d_drm_unregister(void) +{ + platform_driver_unregister(&v3d_platform_driver); +} + +module_init(v3d_drm_register); +module_exit(v3d_drm_unregister); + +MODULE_ALIAS("platform:v3d-drm"); +MODULE_DESCRIPTION("Broadcom V3D DRM Driver"); +MODULE_AUTHOR("Eric Anholt <eric@anholt.net>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h new file mode 100644 index 000000000000..a043ac3aae98 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +#include <linux/reservation.h> +#include <drm/drmP.h> +#include <drm/drm_encoder.h> +#include <drm/drm_gem.h> +#include <drm/gpu_scheduler.h> + +#define GMP_GRANULARITY (128 * 1024) + +/* Enum for each of the V3D queues. We maintain various queue + * tracking as an array because at some point we'll want to support + * the TFU (texture formatting unit) as another queue. + */ +enum v3d_queue { + V3D_BIN, + V3D_RENDER, +}; + +#define V3D_MAX_QUEUES (V3D_RENDER + 1) + +struct v3d_queue_state { + struct drm_gpu_scheduler sched; + + u64 fence_context; + u64 emit_seqno; + u64 finished_seqno; +}; + +struct v3d_dev { + struct drm_device drm; + + /* Short representation (e.g. 33, 41) of the V3D tech version + * and revision. + */ + int ver; + + struct device *dev; + struct platform_device *pdev; + void __iomem *hub_regs; + void __iomem *core_regs[3]; + void __iomem *bridge_regs; + void __iomem *gca_regs; + struct clk *clk; + + /* Virtual and DMA addresses of the single shared page table. */ + volatile u32 *pt; + dma_addr_t pt_paddr; + + /* Virtual and DMA addresses of the MMU's scratch page. When + * a read or write is invalid in the MMU, it will be + * redirected here. + */ + void *mmu_scratch; + dma_addr_t mmu_scratch_paddr; + + /* Number of V3D cores. */ + u32 cores; + + /* Allocator managing the address space. All units are in + * number of pages. + */ + struct drm_mm mm; + spinlock_t mm_lock; + + struct work_struct overflow_mem_work; + + struct v3d_exec_info *bin_job; + struct v3d_exec_info *render_job; + + struct v3d_queue_state queue[V3D_MAX_QUEUES]; + + /* Spinlock used to synchronize the overflow memory + * management against bin job submission. + */ + spinlock_t job_lock; + + /* Protects bo_stats */ + struct mutex bo_lock; + + /* Lock taken when resetting the GPU, to keep multiple + * processes from trying to park the scheduler threads and + * reset at once. + */ + struct mutex reset_lock; + + struct { + u32 num_allocated; + u32 pages_allocated; + } bo_stats; +}; + +static inline struct v3d_dev * +to_v3d_dev(struct drm_device *dev) +{ + return (struct v3d_dev *)dev->dev_private; +} + +/* The per-fd struct, which tracks the MMU mappings. */ +struct v3d_file_priv { + struct v3d_dev *v3d; + + struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; +}; + +/* Tracks a mapping of a BO into a per-fd address space */ +struct v3d_vma { + struct v3d_page_table *pt; + struct list_head list; /* entry in v3d_bo.vmas */ +}; + +struct v3d_bo { + struct drm_gem_object base; + + struct mutex lock; + + struct drm_mm_node node; + + u32 pages_refcount; + struct page **pages; + struct sg_table *sgt; + void *vaddr; + + struct list_head vmas; /* list of v3d_vma */ + + /* List entry for the BO's position in + * v3d_exec_info->unref_list + */ + struct list_head unref_head; + + /* normally (resv == &_resv) except for imported bo's */ + struct reservation_object *resv; + struct reservation_object _resv; +}; + +static inline struct v3d_bo * +to_v3d_bo(struct drm_gem_object *bo) +{ + return (struct v3d_bo *)bo; +} + +struct v3d_fence { + struct dma_fence base; + struct drm_device *dev; + /* v3d seqno for signaled() test */ + u64 seqno; + enum v3d_queue queue; +}; + +static inline struct v3d_fence * +to_v3d_fence(struct dma_fence *fence) +{ + return (struct v3d_fence *)fence; +} + +#define V3D_READ(offset) readl(v3d->hub_regs + offset) +#define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset) + +#define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset) +#define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset) + +#define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset) +#define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset) + +#define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset) +#define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset) + +struct v3d_job { + struct drm_sched_job base; + + struct v3d_exec_info *exec; + + /* An optional fence userspace can pass in for the job to depend on. */ + struct dma_fence *in_fence; + + /* v3d fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + /* GPU virtual addresses of the start/end of the CL job. */ + u32 start, end; +}; + +struct v3d_exec_info { + struct v3d_dev *v3d; + + struct v3d_job bin, render; + + /* Fence for when the scheduler considers the binner to be + * done, for render to depend on. + */ + struct dma_fence *bin_done_fence; + + struct kref refcount; + + /* This is the array of BOs that were looked up at the start of exec. */ + struct v3d_bo **bo; + u32 bo_count; + + /* List of overflow BOs used in the job that need to be + * released once the job is complete. + */ + struct list_head unref_list; + + /* Submitted tile memory allocation start/size, tile state. */ + u32 qma, qms, qts; +}; + +/** + * _wait_for - magic (register) wait macro + * + * Does the right thing for modeset paths when run under kdgb or similar atomic + * contexts. Note that it's important that we check the condition again after + * having timed out, since the timeout could be due to preemption or similar and + * we've never had a chance to check the condition before the timeout. + */ +#define wait_for(COND, MS) ({ \ + unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ + int ret__ = 0; \ + while (!(COND)) { \ + if (time_after(jiffies, timeout__)) { \ + if (!(COND)) \ + ret__ = -ETIMEDOUT; \ + break; \ + } \ + msleep(1); \ + } \ + ret__; \ +}) + +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) +{ + /* nsecs_to_jiffies64() does not guard against overflow */ + if (NSEC_PER_SEC % HZ && + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) + return MAX_JIFFY_OFFSET; + + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); +} + +/* v3d_bo.c */ +void v3d_free_object(struct drm_gem_object *gem_obj); +struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, + size_t size); +int v3d_create_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_gem_fault(struct vm_fault *vmf); +int v3d_mmap(struct file *filp, struct vm_area_struct *vma); +struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj); +int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); + +/* v3d_debugfs.c */ +int v3d_debugfs_init(struct drm_minor *minor); + +/* v3d_fence.c */ +extern const struct dma_fence_ops v3d_fence_ops; +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); + +/* v3d_gem.c */ +int v3d_gem_init(struct drm_device *dev); +void v3d_gem_destroy(struct drm_device *dev); +int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +void v3d_exec_put(struct v3d_exec_info *exec); +void v3d_reset(struct v3d_dev *v3d); +void v3d_invalidate_caches(struct v3d_dev *v3d); +void v3d_flush_caches(struct v3d_dev *v3d); + +/* v3d_irq.c */ +void v3d_irq_init(struct v3d_dev *v3d); +void v3d_irq_enable(struct v3d_dev *v3d); +void v3d_irq_disable(struct v3d_dev *v3d); +void v3d_irq_reset(struct v3d_dev *v3d); + +/* v3d_mmu.c */ +int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo, + u32 *offset); +int v3d_mmu_set_page_table(struct v3d_dev *v3d); +void v3d_mmu_insert_ptes(struct v3d_bo *bo); +void v3d_mmu_remove_ptes(struct v3d_bo *bo); + +/* v3d_sched.c */ +int v3d_sched_init(struct v3d_dev *v3d); +void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c new file mode 100644 index 000000000000..087d49c8cb12 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +#include "v3d_drv.h" + +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) +{ + struct v3d_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->dev = &v3d->drm; + fence->queue = queue; + fence->seqno = ++v3d->queue[queue].emit_seqno; + dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock, + v3d->queue[queue].fence_context, fence->seqno); + + return &fence->base; +} + +static const char *v3d_fence_get_driver_name(struct dma_fence *fence) +{ + return "v3d"; +} + +static const char *v3d_fence_get_timeline_name(struct dma_fence *fence) +{ + struct v3d_fence *f = to_v3d_fence(fence); + + if (f->queue == V3D_BIN) + return "v3d-bin"; + else + return "v3d-render"; +} + +static bool v3d_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static bool v3d_fence_signaled(struct dma_fence *fence) +{ + struct v3d_fence *f = to_v3d_fence(fence); + struct v3d_dev *v3d = to_v3d_dev(f->dev); + + return v3d->queue[f->queue].finished_seqno >= f->seqno; +} + +const struct dma_fence_ops v3d_fence_ops = { + .get_driver_name = v3d_fence_get_driver_name, + .get_timeline_name = v3d_fence_get_timeline_name, + .enable_signaling = v3d_fence_enable_signaling, + .signaled = v3d_fence_signaled, + .wait = dma_fence_default_wait, + .release = dma_fence_free, +}; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c new file mode 100644 index 000000000000..b513f9189caf --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -0,0 +1,668 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +#include <drm/drmP.h> +#include <drm/drm_syncobj.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/sched/signal.h> + +#include "uapi/drm/v3d_drm.h" +#include "v3d_drv.h" +#include "v3d_regs.h" +#include "v3d_trace.h" + +static void +v3d_init_core(struct v3d_dev *v3d, int core) +{ + /* Set OVRTMUOUT, which means that the texture sampler uniform + * configuration's tmu output type field is used, instead of + * using the hardware default behavior based on the texture + * type. If you want the default behavior, you can still put + * "2" in the indirect texture state's output_type field. + */ + V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); + + /* Whenever we flush the L2T cache, we always want to flush + * the whole thing. + */ + V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); + V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); +} + +/* Sets invariant state for the HW. */ +static void +v3d_init_hw_state(struct v3d_dev *v3d) +{ + v3d_init_core(v3d, 0); +} + +static void +v3d_idle_axi(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); + + if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & + (V3D_GMP_STATUS_RD_COUNT_MASK | + V3D_GMP_STATUS_WR_COUNT_MASK | + V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { + DRM_ERROR("Failed to wait for safe GMP shutdown\n"); + } +} + +static void +v3d_idle_gca(struct v3d_dev *v3d) +{ + if (v3d->ver >= 41) + return; + + V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); + + if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { + DRM_ERROR("Failed to wait for safe GCA shutdown\n"); + } +} + +static void +v3d_reset_v3d(struct v3d_dev *v3d) +{ + int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); + + if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, + V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); + + /* GFXH-1383: The SW_INIT may cause a stray write to address 0 + * of the unit, so reset it to its power-on value here. + */ + V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); + } else { + WARN_ON_ONCE(V3D_GET_FIELD(version, + V3D_TOP_GR_BRIDGE_MAJOR) != 7); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, + V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); + } + + v3d_init_hw_state(v3d); +} + +void +v3d_reset(struct v3d_dev *v3d) +{ + struct drm_device *dev = &v3d->drm; + + DRM_ERROR("Resetting GPU.\n"); + trace_v3d_reset_begin(dev); + + /* XXX: only needed for safe powerdown, not reset. */ + if (false) + v3d_idle_axi(v3d, 0); + + v3d_idle_gca(v3d); + v3d_reset_v3d(v3d); + + v3d_mmu_set_page_table(v3d); + v3d_irq_reset(v3d); + + trace_v3d_reset_end(dev); +} + +static void +v3d_flush_l3(struct v3d_dev *v3d) +{ + if (v3d->ver < 41) { + u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); + + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, + gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); + + if (v3d->ver < 33) { + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, + gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); + } + } +} + +/* Invalidates the (read-only) L2 cache. */ +static void +v3d_invalidate_l2(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, + V3D_L2CACTL_L2CCLR | + V3D_L2CACTL_L2CENA); +} + +static void +v3d_invalidate_l1td(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); + } +} + +/* Invalidates texture L2 cachelines */ +static void +v3d_flush_l2t(struct v3d_dev *v3d, int core) +{ + v3d_invalidate_l1td(v3d, core); + + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, + V3D_L2TCACTL_L2TFLS | + V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T flush\n"); + } +} + +/* Invalidates the slice caches. These are read-only caches. */ +static void +v3d_invalidate_slices(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, + V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); +} + +/* Invalidates texture L2 cachelines */ +static void +v3d_invalidate_l2t(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, + V3D_CTL_L2TCACTL, + V3D_L2TCACTL_L2TFLS | + V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM)); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T invalidate\n"); + } +} + +void +v3d_invalidate_caches(struct v3d_dev *v3d) +{ + v3d_flush_l3(v3d); + + v3d_invalidate_l2(v3d, 0); + v3d_invalidate_slices(v3d, 0); + v3d_flush_l2t(v3d, 0); +} + +void +v3d_flush_caches(struct v3d_dev *v3d) +{ + v3d_invalidate_l1td(v3d, 0); + v3d_invalidate_l2t(v3d, 0); +} + +static void +v3d_attach_object_fences(struct v3d_exec_info *exec) +{ + struct dma_fence *out_fence = &exec->render.base.s_fence->finished; + struct v3d_bo *bo; + int i; + + for (i = 0; i < exec->bo_count; i++) { + bo = to_v3d_bo(&exec->bo[i]->base); + + /* XXX: Use shared fences for read-only objects. */ + reservation_object_add_excl_fence(bo->resv, out_fence); + } +} + +static void +v3d_unlock_bo_reservations(struct drm_device *dev, + struct v3d_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int i; + + for (i = 0; i < exec->bo_count; i++) { + struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + ww_acquire_fini(acquire_ctx); +} + +/* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * + * We don't lock the RCL the tile alloc/state BOs, or overflow memory + * (all of which are on exec->unref_list). They're entirely private + * to v3d, so we don't attach dma-buf fences to them. + */ +static int +v3d_lock_bo_reservations(struct drm_device *dev, + struct v3d_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int contended_lock = -1; + int i, ret; + struct v3d_bo *bo; + + ww_acquire_init(acquire_ctx, &reservation_ww_class); + +retry: + if (contended_lock != -1) { + bo = to_v3d_bo(&exec->bo[contended_lock]->base); + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + acquire_ctx); + if (ret) { + ww_acquire_done(acquire_ctx); + return ret; + } + } + + for (i = 0; i < exec->bo_count; i++) { + if (i == contended_lock) + continue; + + bo = to_v3d_bo(&exec->bo[i]->base); + + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) { + bo = to_v3d_bo(&exec->bo[j]->base); + ww_mutex_unlock(&bo->resv->lock); + } + + if (contended_lock != -1 && contended_lock >= i) { + bo = to_v3d_bo(&exec->bo[contended_lock]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + if (ret == -EDEADLK) { + contended_lock = i; + goto retry; + } + + ww_acquire_done(acquire_ctx); + return ret; + } + } + + ww_acquire_done(acquire_ctx); + + /* Reserve space for our shared (read-only) fence references, + * before we commit the CL to the hardware. + */ + for (i = 0; i < exec->bo_count; i++) { + bo = to_v3d_bo(&exec->bo[i]->base); + + ret = reservation_object_reserve_shared(bo->resv); + if (ret) { + v3d_unlock_bo_reservations(dev, exec, acquire_ctx); + return ret; + } + } + + return 0; +} + +/** + * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @exec: V3D job being set up + * + * The command validator needs to reference BOs by their index within + * the submitted job's BO list. This does the validation of the job's + * BO list and reference counting for the lifetime of the job. + * + * Note that this function doesn't need to unreference the BOs on + * failure, because that will happen at v3d_exec_cleanup() time. + */ +static int +v3d_cl_lookup_bos(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_v3d_submit_cl *args, + struct v3d_exec_info *exec) +{ + u32 *handles; + int ret = 0; + int i; + + exec->bo_count = args->bo_handle_count; + + if (!exec->bo_count) { + /* See comment on bo_index for why we have to check + * this. + */ + DRM_DEBUG("Rendering requires BOs\n"); + return -EINVAL; + } + + exec->bo = kvmalloc_array(exec->bo_count, + sizeof(struct drm_gem_cma_object *), + GFP_KERNEL | __GFP_ZERO); + if (!exec->bo) { + DRM_DEBUG("Failed to allocate validated BO pointers\n"); + return -ENOMEM; + } + + handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); + if (!handles) { + ret = -ENOMEM; + DRM_DEBUG("Failed to allocate incoming GEM handles\n"); + goto fail; + } + + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->bo_handles, + exec->bo_count * sizeof(u32))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy in GEM handles\n"); + goto fail; + } + + spin_lock(&file_priv->table_lock); + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_object *bo = idr_find(&file_priv->object_idr, + handles[i]); + if (!bo) { + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", + i, handles[i]); + ret = -ENOENT; + spin_unlock(&file_priv->table_lock); + goto fail; + } + drm_gem_object_get(bo); + exec->bo[i] = to_v3d_bo(bo); + } + spin_unlock(&file_priv->table_lock); + +fail: + kvfree(handles); + return ret; +} + +static void +v3d_exec_cleanup(struct kref *ref) +{ + struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, + refcount); + struct v3d_dev *v3d = exec->v3d; + unsigned int i; + struct v3d_bo *bo, *save; + + dma_fence_put(exec->bin.in_fence); + dma_fence_put(exec->render.in_fence); + + dma_fence_put(exec->bin.done_fence); + dma_fence_put(exec->render.done_fence); + + dma_fence_put(exec->bin_done_fence); + + for (i = 0; i < exec->bo_count; i++) + drm_gem_object_put_unlocked(&exec->bo[i]->base); + kvfree(exec->bo); + + list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { + drm_gem_object_put_unlocked(&bo->base); + } + + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + + kfree(exec); +} + +void v3d_exec_put(struct v3d_exec_info *exec) +{ + kref_put(&exec->refcount, v3d_exec_cleanup); +} + +int +v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + int ret; + struct drm_v3d_wait_bo *args = data; + struct drm_gem_object *gem_obj; + struct v3d_bo *bo; + ktime_t start = ktime_get(); + u64 delta_ns; + unsigned long timeout_jiffies = + nsecs_to_jiffies_timeout(args->timeout_ns); + + if (args->pad != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -EINVAL; + } + bo = to_v3d_bo(gem_obj); + + ret = reservation_object_wait_timeout_rcu(bo->resv, + true, true, + timeout_jiffies); + + if (ret == 0) + ret = -ETIME; + else if (ret > 0) + ret = 0; + + /* Decrement the user's timeout, in case we got interrupted + * such that the ioctl will be restarted. + */ + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); + if (delta_ns < args->timeout_ns) + args->timeout_ns -= delta_ns; + else + args->timeout_ns = 0; + + /* Asked to wait beyond the jiffie/scheduler precision? */ + if (ret == -ETIME && args->timeout_ns) + ret = -EAGAIN; + + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +/** + * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * This is the main entrypoint for userspace to submit a 3D frame to + * the GPU. Userspace provides the binner command list (if + * applicable), and the kernel sets up the render command list to draw + * to the framebuffer described in the ioctl, using the command lists + * that the 3D engine's binner will produce. + */ +int +v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_cl *args = data; + struct v3d_exec_info *exec; + struct ww_acquire_ctx acquire_ctx; + struct drm_syncobj *sync_out; + int ret = 0; + + if (args->pad != 0) { + DRM_INFO("pad must be zero: %d\n", args->pad); + return -EINVAL; + } + + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); + if (!exec) + return -ENOMEM; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) { + kfree(exec); + return ret; + } + + kref_init(&exec->refcount); + + ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, + &exec->bin.in_fence); + if (ret == -EINVAL) + goto fail; + + ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, + &exec->render.in_fence); + if (ret == -EINVAL) + goto fail; + + exec->qma = args->qma; + exec->qms = args->qms; + exec->qts = args->qts; + exec->bin.exec = exec; + exec->bin.start = args->bcl_start; + exec->bin.end = args->bcl_end; + exec->render.exec = exec; + exec->render.start = args->rcl_start; + exec->render.end = args->rcl_end; + exec->v3d = v3d; + INIT_LIST_HEAD(&exec->unref_list); + + ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); + if (ret) + goto fail; + + ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx); + if (ret) + goto fail; + + if (exec->bin.start != exec->bin.end) { + ret = drm_sched_job_init(&exec->bin.base, + &v3d->queue[V3D_BIN].sched, + &v3d_priv->sched_entity[V3D_BIN], + v3d_priv); + if (ret) + goto fail_unreserve; + + exec->bin_done_fence = + dma_fence_get(&exec->bin.base.s_fence->finished); + + kref_get(&exec->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&exec->bin.base, + &v3d_priv->sched_entity[V3D_BIN]); + } + + ret = drm_sched_job_init(&exec->render.base, + &v3d->queue[V3D_RENDER].sched, + &v3d_priv->sched_entity[V3D_RENDER], + v3d_priv); + if (ret) + goto fail_unreserve; + + kref_get(&exec->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&exec->render.base, + &v3d_priv->sched_entity[V3D_RENDER]); + + v3d_attach_object_fences(exec); + + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); + + /* Update the return sync object for the */ + sync_out = drm_syncobj_find(file_priv, args->out_sync); + if (sync_out) { + drm_syncobj_replace_fence(sync_out, + &exec->render.base.s_fence->finished); + drm_syncobj_put(sync_out); + } + + v3d_exec_put(exec); + + return 0; + +fail_unreserve: + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); +fail: + v3d_exec_put(exec); + + return ret; +} + +int +v3d_gem_init(struct drm_device *dev) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + u32 pt_size = 4096 * 1024; + int ret, i; + + for (i = 0; i < V3D_MAX_QUEUES; i++) + v3d->queue[i].fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&v3d->mm_lock); + spin_lock_init(&v3d->job_lock); + mutex_init(&v3d->bo_lock); + mutex_init(&v3d->reset_lock); + + /* Note: We don't allocate address 0. Various bits of HW + * treat 0 as special, such as the occlusion query counters + * where 0 means "disabled". + */ + drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1); + + v3d->pt = dma_alloc_wc(v3d->dev, pt_size, + &v3d->pt_paddr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!v3d->pt) { + drm_mm_takedown(&v3d->mm); + dev_err(v3d->dev, + "Failed to allocate page tables. " + "Please ensure you have CMA enabled.\n"); + return -ENOMEM; + } + + v3d_init_hw_state(v3d); + v3d_mmu_set_page_table(v3d); + + ret = v3d_sched_init(v3d); + if (ret) { + drm_mm_takedown(&v3d->mm); + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, + v3d->pt_paddr); + } + + return 0; +} + +void +v3d_gem_destroy(struct drm_device *dev) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + enum v3d_queue q; + + v3d_sched_fini(v3d); + + /* Waiting for exec to finish would need to be done before + * unregistering V3D. + */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + WARN_ON(v3d->queue[q].emit_seqno != + v3d->queue[q].finished_seqno); + } + + drm_mm_takedown(&v3d->mm); + + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr); +} diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c new file mode 100644 index 000000000000..77e1fa046c10 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +/** + * DOC: Interrupt management for the V3D engine + * + * When we take a binning or rendering flush done interrupt, we need + * to signal the fence for that job so that the scheduler can queue up + * the next one and unblock any waiters. + * + * When we take the binner out of memory interrupt, we need to + * allocate some new memory and pass it to the binner so that the + * current job can make progress. + */ + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \ + V3D_INT_FLDONE | \ + V3D_INT_FRDONE | \ + V3D_INT_GMPV)) + +#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ + V3D_HUB_INT_MMU_PTI | \ + V3D_HUB_INT_MMU_CAP)) + +static void +v3d_overflow_mem_work(struct work_struct *work) +{ + struct v3d_dev *v3d = + container_of(work, struct v3d_dev, overflow_mem_work); + struct drm_device *dev = &v3d->drm; + struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024); + unsigned long irqflags; + + if (IS_ERR(bo)) { + DRM_ERROR("Couldn't allocate binner overflow mem\n"); + return; + } + + /* We lost a race, and our work task came in after the bin job + * completed and exited. This can happen because the HW + * signals OOM before it's fully OOM, so the binner might just + * barely complete. + * + * If we lose the race and our work task comes in after a new + * bin job got scheduled, that's fine. We'll just give them + * some binner pool anyway. + */ + spin_lock_irqsave(&v3d->job_lock, irqflags); + if (!v3d->bin_job) { + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + goto out; + } + + drm_gem_object_get(&bo->base); + list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list); + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + + V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT); + V3D_CORE_WRITE(0, V3D_PTB_BPOS, bo->base.size); + +out: + drm_gem_object_put_unlocked(&bo->base); +} + +static irqreturn_t +v3d_irq(int irq, void *arg) +{ + struct v3d_dev *v3d = arg; + u32 intsts; + irqreturn_t status = IRQ_NONE; + + intsts = V3D_CORE_READ(0, V3D_CTL_INT_STS); + + /* Acknowledge the interrupts we're handling here. */ + V3D_CORE_WRITE(0, V3D_CTL_INT_CLR, intsts); + + if (intsts & V3D_INT_OUTOMEM) { + /* Note that the OOM status is edge signaled, so the + * interrupt won't happen again until the we actually + * add more memory. + */ + schedule_work(&v3d->overflow_mem_work); + status = IRQ_HANDLED; + } + + if (intsts & V3D_INT_FLDONE) { + v3d->queue[V3D_BIN].finished_seqno++; + dma_fence_signal(v3d->bin_job->bin.done_fence); + status = IRQ_HANDLED; + } + + if (intsts & V3D_INT_FRDONE) { + v3d->queue[V3D_RENDER].finished_seqno++; + dma_fence_signal(v3d->render_job->render.done_fence); + + status = IRQ_HANDLED; + } + + /* We shouldn't be triggering these if we have GMP in + * always-allowed mode. + */ + if (intsts & V3D_INT_GMPV) + dev_err(v3d->dev, "GMP violation\n"); + + return status; +} + +static irqreturn_t +v3d_hub_irq(int irq, void *arg) +{ + struct v3d_dev *v3d = arg; + u32 intsts; + irqreturn_t status = IRQ_NONE; + + intsts = V3D_READ(V3D_HUB_INT_STS); + + /* Acknowledge the interrupts we're handling here. */ + V3D_WRITE(V3D_HUB_INT_CLR, intsts); + + if (intsts & (V3D_HUB_INT_MMU_WRV | + V3D_HUB_INT_MMU_PTI | + V3D_HUB_INT_MMU_CAP)) { + u32 axi_id = V3D_READ(V3D_MMU_VIO_ID); + u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8; + + dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n", + axi_id, (long long)vio_addr, + ((intsts & V3D_HUB_INT_MMU_WRV) ? + ", write violation" : ""), + ((intsts & V3D_HUB_INT_MMU_PTI) ? + ", pte invalid" : ""), + ((intsts & V3D_HUB_INT_MMU_CAP) ? + ", cap exceeded" : "")); + status = IRQ_HANDLED; + } + + return status; +} + +void +v3d_irq_init(struct v3d_dev *v3d) +{ + int ret, core; + + INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); + + /* Clear any pending interrupts someone might have left around + * for us. + */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0), + v3d_hub_irq, IRQF_SHARED, + "v3d_hub", v3d); + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1), + v3d_irq, IRQF_SHARED, + "v3d_core0", v3d); + if (ret) + dev_err(v3d->dev, "IRQ setup failed: %d\n", ret); + + v3d_irq_enable(v3d); +} + +void +v3d_irq_enable(struct v3d_dev *v3d) +{ + int core; + + /* Enable our set of interrupts, masking out any others. */ + for (core = 0; core < v3d->cores; core++) { + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS); + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS); + } + + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS); + V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS); +} + +void +v3d_irq_disable(struct v3d_dev *v3d) +{ + int core; + + /* Disable all interrupts. */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + + /* Clear any pending interrupts we might have left. */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + + cancel_work_sync(&v3d->overflow_mem_work); +} + +/** Reinitializes interrupt registers when a GPU reset is performed. */ +void v3d_irq_reset(struct v3d_dev *v3d) +{ + v3d_irq_enable(v3d); +} diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c new file mode 100644 index 000000000000..b00f97c31b70 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +/** + * DOC: Broadcom V3D MMU + * + * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has + * a single level of page tables for the V3D's 4GB address space to + * map to AXI bus addresses, thus it could need up to 4MB of + * physically contiguous memory to store the PTEs. + * + * Because the 4MB of contiguous memory for page tables is precious, + * and switching between them is expensive, we load all BOs into the + * same 4GB address space. + * + * To protect clients from each other, we should use the GMP to + * quickly mask out (at 128kb granularity) what pages are available to + * each client. This is not yet implemented. + */ + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define V3D_MMU_PAGE_SHIFT 12 + +/* Note: All PTEs for the 1MB superpage must be filled with the + * superpage bit set. + */ +#define V3D_PTE_SUPERPAGE BIT(31) +#define V3D_PTE_WRITEABLE BIT(29) +#define V3D_PTE_VALID BIT(28) + +static int v3d_mmu_flush_all(struct v3d_dev *v3d) +{ + int ret; + + /* Make sure that another flush isn't already running when we + * start this one. + */ + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & + V3D_MMU_CTL_TLB_CLEARING), 100); + if (ret) + dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n"); + + V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | + V3D_MMU_CTL_TLB_CLEAR); + + V3D_WRITE(V3D_MMUC_CONTROL, + V3D_MMUC_CONTROL_FLUSH | + V3D_MMUC_CONTROL_ENABLE); + + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & + V3D_MMU_CTL_TLB_CLEARING), 100); + if (ret) { + dev_err(v3d->dev, "TLB clear wait idle failed\n"); + return ret; + } + + ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) & + V3D_MMUC_CONTROL_FLUSHING), 100); + if (ret) + dev_err(v3d->dev, "MMUC flush wait idle failed\n"); + + return ret; +} + +int v3d_mmu_set_page_table(struct v3d_dev *v3d) +{ + V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT); + V3D_WRITE(V3D_MMU_CTL, + V3D_MMU_CTL_ENABLE | + V3D_MMU_CTL_PT_INVALID | + V3D_MMU_CTL_PT_INVALID_ABORT | + V3D_MMU_CTL_WRITE_VIOLATION_ABORT | + V3D_MMU_CTL_CAP_EXCEEDED_ABORT); + V3D_WRITE(V3D_MMU_ILLEGAL_ADDR, + (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) | + V3D_MMU_ILLEGAL_ADDR_ENABLE); + V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE); + + return v3d_mmu_flush_all(v3d); +} + +void v3d_mmu_insert_ptes(struct v3d_bo *bo) +{ + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); + u32 page = bo->node.start; + u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; + unsigned int count; + struct scatterlist *sgl; + + for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) { + u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT; + u32 pte = page_prot | page_address; + u32 i; + + BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >= + BIT(24)); + + for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++) + v3d->pt[page++] = pte + i; + } + + WARN_ON_ONCE(page - bo->node.start != + bo->base.size >> V3D_MMU_PAGE_SHIFT); + + if (v3d_mmu_flush_all(v3d)) + dev_err(v3d->dev, "MMU flush timeout\n"); +} + +void v3d_mmu_remove_ptes(struct v3d_bo *bo) +{ + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); + u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT; + u32 page; + + for (page = bo->node.start; page < bo->node.start + npages; page++) + v3d->pt[page] = 0; + + if (v3d_mmu_flush_all(v3d)) + dev_err(v3d->dev, "MMU flush timeout\n"); +} diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h new file mode 100644 index 000000000000..fc13282dfc2f --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +#ifndef V3D_REGS_H +#define V3D_REGS_H + +#include <linux/bitops.h> + +#define V3D_MASK(high, low) ((u32)GENMASK(high, low)) +/* Using the GNU statement expression extension */ +#define V3D_SET_FIELD(value, field) \ + ({ \ + u32 fieldval = (value) << field##_SHIFT; \ + WARN_ON((fieldval & ~field##_MASK) != 0); \ + fieldval & field##_MASK; \ + }) + +#define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ + field##_SHIFT) + +/* Hub registers for shared hardware between V3D cores. */ + +#define V3D_HUB_AXICFG 0x00000 +# define V3D_HUB_AXICFG_MAX_LEN_MASK V3D_MASK(3, 0) +# define V3D_HUB_AXICFG_MAX_LEN_SHIFT 0 +#define V3D_HUB_UIFCFG 0x00004 +#define V3D_HUB_IDENT0 0x00008 + +#define V3D_HUB_IDENT1 0x0000c +# define V3D_HUB_IDENT1_WITH_MSO BIT(19) +# define V3D_HUB_IDENT1_WITH_TSY BIT(18) +# define V3D_HUB_IDENT1_WITH_TFU BIT(17) +# define V3D_HUB_IDENT1_WITH_L3C BIT(16) +# define V3D_HUB_IDENT1_NHOSTS_MASK V3D_MASK(15, 12) +# define V3D_HUB_IDENT1_NHOSTS_SHIFT 12 +# define V3D_HUB_IDENT1_NCORES_MASK V3D_MASK(11, 8) +# define V3D_HUB_IDENT1_NCORES_SHIFT 8 +# define V3D_HUB_IDENT1_REV_MASK V3D_MASK(7, 4) +# define V3D_HUB_IDENT1_REV_SHIFT 4 +# define V3D_HUB_IDENT1_TVER_MASK V3D_MASK(3, 0) +# define V3D_HUB_IDENT1_TVER_SHIFT 0 + +#define V3D_HUB_IDENT2 0x00010 +# define V3D_HUB_IDENT2_WITH_MMU BIT(8) +# define V3D_HUB_IDENT2_L3C_NKB_MASK V3D_MASK(7, 0) +# define V3D_HUB_IDENT2_L3C_NKB_SHIFT 0 + +#define V3D_HUB_IDENT3 0x00014 +# define V3D_HUB_IDENT3_IPREV_MASK V3D_MASK(15, 8) +# define V3D_HUB_IDENT3_IPREV_SHIFT 8 +# define V3D_HUB_IDENT3_IPIDX_MASK V3D_MASK(7, 0) +# define V3D_HUB_IDENT3_IPIDX_SHIFT 0 + +#define V3D_HUB_INT_STS 0x00050 +#define V3D_HUB_INT_SET 0x00054 +#define V3D_HUB_INT_CLR 0x00058 +#define V3D_HUB_INT_MSK_STS 0x0005c +#define V3D_HUB_INT_MSK_SET 0x00060 +#define V3D_HUB_INT_MSK_CLR 0x00064 +# define V3D_HUB_INT_MMU_WRV BIT(5) +# define V3D_HUB_INT_MMU_PTI BIT(4) +# define V3D_HUB_INT_MMU_CAP BIT(3) +# define V3D_HUB_INT_MSO BIT(2) +# define V3D_HUB_INT_TFUC BIT(1) +# define V3D_HUB_INT_TFUF BIT(0) + +#define V3D_GCA_CACHE_CTRL 0x0000c +# define V3D_GCA_CACHE_CTRL_FLUSH BIT(0) + +#define V3D_GCA_SAFE_SHUTDOWN 0x000b0 +# define V3D_GCA_SAFE_SHUTDOWN_EN BIT(0) + +#define V3D_GCA_SAFE_SHUTDOWN_ACK 0x000b4 +# define V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED 3 + +# define V3D_TOP_GR_BRIDGE_REVISION 0x00000 +# define V3D_TOP_GR_BRIDGE_MAJOR_MASK V3D_MASK(15, 8) +# define V3D_TOP_GR_BRIDGE_MAJOR_SHIFT 8 +# define V3D_TOP_GR_BRIDGE_MINOR_MASK V3D_MASK(7, 0) +# define V3D_TOP_GR_BRIDGE_MINOR_SHIFT 0 + +/* 7268 reset reg */ +# define V3D_TOP_GR_BRIDGE_SW_INIT_0 0x00008 +# define V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT BIT(0) +/* 7278 reset reg */ +# define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c +# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0) + +/* Per-MMU registers. */ + +#define V3D_MMUC_CONTROL 0x01000 +# define V3D_MMUC_CONTROL_CLEAR BIT(3) +# define V3D_MMUC_CONTROL_FLUSHING BIT(2) +# define V3D_MMUC_CONTROL_FLUSH BIT(1) +# define V3D_MMUC_CONTROL_ENABLE BIT(0) + +#define V3D_MMU_CTL 0x01200 +# define V3D_MMU_CTL_CAP_EXCEEDED BIT(27) +# define V3D_MMU_CTL_CAP_EXCEEDED_ABORT BIT(26) +# define V3D_MMU_CTL_CAP_EXCEEDED_INT BIT(25) +# define V3D_MMU_CTL_CAP_EXCEEDED_EXCEPTION BIT(24) +# define V3D_MMU_CTL_PT_INVALID BIT(20) +# define V3D_MMU_CTL_PT_INVALID_ABORT BIT(19) +# define V3D_MMU_CTL_PT_INVALID_INT BIT(18) +# define V3D_MMU_CTL_PT_INVALID_EXCEPTION BIT(17) +# define V3D_MMU_CTL_WRITE_VIOLATION BIT(16) +# define V3D_MMU_CTL_WRITE_VIOLATION_ABORT BIT(11) +# define V3D_MMU_CTL_WRITE_VIOLATION_INT BIT(10) +# define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION BIT(9) +# define V3D_MMU_CTL_TLB_CLEARING BIT(7) +# define V3D_MMU_CTL_TLB_STATS_CLEAR BIT(3) +# define V3D_MMU_CTL_TLB_CLEAR BIT(2) +# define V3D_MMU_CTL_TLB_STATS_ENABLE BIT(1) +# define V3D_MMU_CTL_ENABLE BIT(0) + +#define V3D_MMU_PT_PA_BASE 0x01204 +#define V3D_MMU_HIT 0x01208 +#define V3D_MMU_MISSES 0x0120c +#define V3D_MMU_STALLS 0x01210 + +#define V3D_MMU_ADDR_CAP 0x01214 +# define V3D_MMU_ADDR_CAP_ENABLE BIT(31) +# define V3D_MMU_ADDR_CAP_MPAGE_MASK V3D_MASK(11, 0) +# define V3D_MMU_ADDR_CAP_MPAGE_SHIFT 0 + +#define V3D_MMU_SHOOT_DOWN 0x01218 +# define V3D_MMU_SHOOT_DOWN_SHOOTING BIT(29) +# define V3D_MMU_SHOOT_DOWN_SHOOT BIT(28) +# define V3D_MMU_SHOOT_DOWN_PAGE_MASK V3D_MASK(27, 0) +# define V3D_MMU_SHOOT_DOWN_PAGE_SHIFT 0 + +#define V3D_MMU_BYPASS_START 0x0121c +#define V3D_MMU_BYPASS_END 0x01220 + +/* AXI ID of the access that faulted */ +#define V3D_MMU_VIO_ID 0x0122c + +/* Address for illegal PTEs to return */ +#define V3D_MMU_ILLEGAL_ADDR 0x01230 +# define V3D_MMU_ILLEGAL_ADDR_ENABLE BIT(31) + +/* Address that faulted */ +#define V3D_MMU_VIO_ADDR 0x01234 + +/* Per-V3D-core registers */ + +#define V3D_CTL_IDENT0 0x00000 +# define V3D_IDENT0_VER_MASK V3D_MASK(31, 24) +# define V3D_IDENT0_VER_SHIFT 24 + +#define V3D_CTL_IDENT1 0x00004 +/* Multiples of 1kb */ +# define V3D_IDENT1_VPM_SIZE_MASK V3D_MASK(31, 28) +# define V3D_IDENT1_VPM_SIZE_SHIFT 28 +# define V3D_IDENT1_NSEM_MASK V3D_MASK(23, 16) +# define V3D_IDENT1_NSEM_SHIFT 16 +# define V3D_IDENT1_NTMU_MASK V3D_MASK(15, 12) +# define V3D_IDENT1_NTMU_SHIFT 12 +# define V3D_IDENT1_QUPS_MASK V3D_MASK(11, 8) +# define V3D_IDENT1_QUPS_SHIFT 8 +# define V3D_IDENT1_NSLC_MASK V3D_MASK(7, 4) +# define V3D_IDENT1_NSLC_SHIFT 4 +# define V3D_IDENT1_REV_MASK V3D_MASK(3, 0) +# define V3D_IDENT1_REV_SHIFT 0 + +#define V3D_CTL_IDENT2 0x00008 +# define V3D_IDENT2_BCG_INT BIT(28) + +#define V3D_CTL_MISCCFG 0x00018 +# define V3D_MISCCFG_OVRTMUOUT BIT(0) + +#define V3D_CTL_L2CACTL 0x00020 +# define V3D_L2CACTL_L2CCLR BIT(2) +# define V3D_L2CACTL_L2CDIS BIT(1) +# define V3D_L2CACTL_L2CENA BIT(0) + +#define V3D_CTL_SLCACTL 0x00024 +# define V3D_SLCACTL_TVCCS_MASK V3D_MASK(27, 24) +# define V3D_SLCACTL_TVCCS_SHIFT 24 +# define V3D_SLCACTL_TDCCS_MASK V3D_MASK(19, 16) +# define V3D_SLCACTL_TDCCS_SHIFT 16 +# define V3D_SLCACTL_UCC_MASK V3D_MASK(11, 8) +# define V3D_SLCACTL_UCC_SHIFT 8 +# define V3D_SLCACTL_ICC_MASK V3D_MASK(3, 0) +# define V3D_SLCACTL_ICC_SHIFT 0 + +#define V3D_CTL_L2TCACTL 0x00030 +# define V3D_L2TCACTL_TMUWCF BIT(8) +# define V3D_L2TCACTL_L2T_NO_WM BIT(4) +# define V3D_L2TCACTL_FLM_FLUSH 0 +# define V3D_L2TCACTL_FLM_CLEAR 1 +# define V3D_L2TCACTL_FLM_CLEAN 2 +# define V3D_L2TCACTL_FLM_MASK V3D_MASK(2, 1) +# define V3D_L2TCACTL_FLM_SHIFT 1 +# define V3D_L2TCACTL_L2TFLS BIT(0) +#define V3D_CTL_L2TFLSTA 0x00034 +#define V3D_CTL_L2TFLEND 0x00038 + +#define V3D_CTL_INT_STS 0x00050 +#define V3D_CTL_INT_SET 0x00054 +#define V3D_CTL_INT_CLR 0x00058 +#define V3D_CTL_INT_MSK_STS 0x0005c +#define V3D_CTL_INT_MSK_SET 0x00060 +#define V3D_CTL_INT_MSK_CLR 0x00064 +# define V3D_INT_QPU_MASK V3D_MASK(27, 16) +# define V3D_INT_QPU_SHIFT 16 +# define V3D_INT_GMPV BIT(5) +# define V3D_INT_TRFB BIT(4) +# define V3D_INT_SPILLUSE BIT(3) +# define V3D_INT_OUTOMEM BIT(2) +# define V3D_INT_FLDONE BIT(1) +# define V3D_INT_FRDONE BIT(0) + +#define V3D_CLE_CT0CS 0x00100 +#define V3D_CLE_CT1CS 0x00104 +#define V3D_CLE_CTNCS(n) (V3D_CLE_CT0CS + 4 * n) +#define V3D_CLE_CT0EA 0x00108 +#define V3D_CLE_CT1EA 0x0010c +#define V3D_CLE_CTNEA(n) (V3D_CLE_CT0EA + 4 * n) +#define V3D_CLE_CT0CA 0x00110 +#define V3D_CLE_CT1CA 0x00114 +#define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n) +#define V3D_CLE_CT0RA 0x00118 +#define V3D_CLE_CT1RA 0x0011c +#define V3D_CLE_CT0LC 0x00120 +#define V3D_CLE_CT1LC 0x00124 +#define V3D_CLE_CT0PC 0x00128 +#define V3D_CLE_CT1PC 0x0012c +#define V3D_CLE_PCS 0x00130 +#define V3D_CLE_BFC 0x00134 +#define V3D_CLE_RFC 0x00138 +#define V3D_CLE_TFBC 0x0013c +#define V3D_CLE_TFIT 0x00140 +#define V3D_CLE_CT1CFG 0x00144 +#define V3D_CLE_CT1TILECT 0x00148 +#define V3D_CLE_CT1TSKIP 0x0014c +#define V3D_CLE_CT1PTCT 0x00150 +#define V3D_CLE_CT0SYNC 0x00154 +#define V3D_CLE_CT1SYNC 0x00158 +#define V3D_CLE_CT0QTS 0x0015c +# define V3D_CLE_CT0QTS_ENABLE BIT(1) +#define V3D_CLE_CT0QBA 0x00160 +#define V3D_CLE_CT1QBA 0x00164 +#define V3D_CLE_CTNQBA(n) (V3D_CLE_CT0QBA + 4 * n) +#define V3D_CLE_CT0QEA 0x00168 +#define V3D_CLE_CT1QEA 0x0016c +#define V3D_CLE_CTNQEA(n) (V3D_CLE_CT0QEA + 4 * n) +#define V3D_CLE_CT0QMA 0x00170 +#define V3D_CLE_CT0QMS 0x00174 +#define V3D_CLE_CT1QCFG 0x00178 +/* If set without ETPROC, entirely skip tiles with no primitives. */ +# define V3D_CLE_QCFG_ETFILT BIT(7) +/* If set with ETFILT, just write the clear color to tiles with no + * primitives. + */ +# define V3D_CLE_QCFG_ETPROC BIT(6) +# define V3D_CLE_QCFG_ETSFLUSH BIT(1) +# define V3D_CLE_QCFG_MCDIS BIT(0) + +#define V3D_PTB_BPCA 0x00300 +#define V3D_PTB_BPCS 0x00304 +#define V3D_PTB_BPOA 0x00308 +#define V3D_PTB_BPOS 0x0030c + +#define V3D_PTB_BXCF 0x00310 +# define V3D_PTB_BXCF_RWORDERDISA BIT(1) +# define V3D_PTB_BXCF_CLIPDISA BIT(0) + +#define V3D_GMP_STATUS 0x00800 +# define V3D_GMP_STATUS_GMPRST BIT(31) +# define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24) +# define V3D_GMP_STATUS_WR_COUNT_SHIFT 24 +# define V3D_GMP_STATUS_RD_COUNT_MASK V3D_MASK(22, 16) +# define V3D_GMP_STATUS_RD_COUNT_SHIFT 16 +# define V3D_GMP_STATUS_WR_ACTIVE BIT(5) +# define V3D_GMP_STATUS_RD_ACTIVE BIT(4) +# define V3D_GMP_STATUS_CFG_BUSY BIT(3) +# define V3D_GMP_STATUS_CNTOVF BIT(2) +# define V3D_GMP_STATUS_INVPROT BIT(1) +# define V3D_GMP_STATUS_VIO BIT(0) + +#define V3D_GMP_CFG 0x00804 +# define V3D_GMP_CFG_LBURSTEN BIT(3) +# define V3D_GMP_CFG_PGCRSEN BIT() +# define V3D_GMP_CFG_STOP_REQ BIT(1) +# define V3D_GMP_CFG_PROT_ENABLE BIT(0) + +#define V3D_GMP_VIO_ADDR 0x00808 +#define V3D_GMP_VIO_TYPE 0x0080c +#define V3D_GMP_TABLE_ADDR 0x00810 +#define V3D_GMP_CLEAR_LOAD 0x00814 +#define V3D_GMP_PRESERVE_LOAD 0x00818 +#define V3D_GMP_VALID_LINES 0x00820 + +#endif /* V3D_REGS_H */ diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c new file mode 100644 index 000000000000..b07bece9417d --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2018 Broadcom */ + +/** + * DOC: Broadcom V3D scheduling + * + * The shared DRM GPU scheduler is used to coordinate submitting jobs + * to the hardware. Each DRM fd (roughly a client process) gets its + * own scheduler entity, which will process jobs in order. The GPU + * scheduler will round-robin between clients to submit the next job. + * + * For simplicity, and in order to keep latency low for interactive + * jobs when bulk background jobs are queued up, we submit a new job + * to the HW only when it has completed the last one, instead of + * filling up the CT[01]Q FIFOs with jobs. Similarly, we use + * v3d_job_dependency() to manage the dependency between bin and + * render, instead of having the clients submit jobs with using the + * HW's semaphores to interlock between them. + */ + +#include <linux/kthread.h> + +#include "v3d_drv.h" +#include "v3d_regs.h" +#include "v3d_trace.h" + +static struct v3d_job * +to_v3d_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct v3d_job, base); +} + +static void +v3d_job_free(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + + v3d_exec_put(job->exec); +} + +/** + * Returns the fences that the bin job depends on, one by one. + * v3d_job_run() won't be called until all of them have been signaled. + */ +static struct dma_fence * +v3d_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; + struct dma_fence *fence; + + fence = job->in_fence; + if (fence) { + job->in_fence = NULL; + return fence; + } + + if (q == V3D_RENDER) { + /* If we had a bin job, the render job definitely depends on + * it. We first have to wait for bin to be scheduled, so that + * its done_fence is created. + */ + fence = exec->bin_done_fence; + if (fence) { + exec->bin_done_fence = NULL; + return fence; + } + } + + /* XXX: Wait on a fence for switching the GMP if necessary, + * and then do so. + */ + + return fence; +} + +static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; + struct v3d_dev *v3d = exec->v3d; + struct drm_device *dev = &v3d->drm; + struct dma_fence *fence; + unsigned long irqflags; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + /* Lock required around bin_job update vs + * v3d_overflow_mem_work(). + */ + spin_lock_irqsave(&v3d->job_lock, irqflags); + if (q == V3D_BIN) { + v3d->bin_job = job->exec; + + /* Clear out the overflow allocation, so we don't + * reuse the overflow attached to a previous job. + */ + V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); + } else { + v3d->render_job = job->exec; + } + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + + /* Can we avoid this flush when q==RENDER? We need to be + * careful of scheduling, though -- imagine job0 rendering to + * texture and job1 reading, and them being executed as bin0, + * bin1, render0, render1, so that render1's flush at bin time + * wasn't enough. + */ + v3d_invalidate_caches(v3d); + + fence = v3d_fence_create(v3d, q); + if (!fence) + return fence; + + if (job->done_fence) + dma_fence_put(job->done_fence); + job->done_fence = dma_fence_get(fence); + + trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, + job->start, job->end); + + if (q == V3D_BIN) { + if (exec->qma) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); + V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); + } + if (exec->qts) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, + V3D_CLE_CT0QTS_ENABLE | + exec->qts); + } + } else { + /* XXX: Set the QCFG */ + } + + /* Set the current and end address of the control list. + * Writing the end register is what starts the job. + */ + V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); + V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); + + return fence; +} + +static void +v3d_job_timedout(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + struct v3d_dev *v3d = exec->v3d; + enum v3d_queue q; + + mutex_lock(&v3d->reset_lock); + + /* block scheduler */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; + + kthread_park(sched->thread); + drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? + sched_job : NULL)); + } + + /* get the GPU back into the init state */ + v3d_reset(v3d); + + /* Unblock schedulers and restart their jobs. */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + drm_sched_job_recovery(&v3d->queue[q].sched); + kthread_unpark(v3d->queue[q].sched.thread); + } + + mutex_unlock(&v3d->reset_lock); +} + +static const struct drm_sched_backend_ops v3d_sched_ops = { + .dependency = v3d_job_dependency, + .run_job = v3d_job_run, + .timedout_job = v3d_job_timedout, + .free_job = v3d_job_free +}; + +int +v3d_sched_init(struct v3d_dev *v3d) +{ + int hw_jobs_limit = 1; + int job_hang_limit = 0; + int hang_limit_ms = 500; + int ret; + + ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, + &v3d_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_bin"); + if (ret) { + dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret); + return ret; + } + + ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, + &v3d_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_render"); + if (ret) { + dev_err(v3d->dev, "Failed to create render scheduler: %d.", + ret); + drm_sched_fini(&v3d->queue[V3D_BIN].sched); + return ret; + } + + return 0; +} + +void +v3d_sched_fini(struct v3d_dev *v3d) +{ + enum v3d_queue q; + + for (q = 0; q < V3D_MAX_QUEUES; q++) + drm_sched_fini(&v3d->queue[q].sched); +} diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h new file mode 100644 index 000000000000..85dd351e1e09 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_trace.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +#if !defined(_V3D_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _V3D_TRACE_H_ + +#include <linux/stringify.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM v3d +#define TRACE_INCLUDE_FILE v3d_trace + +TRACE_EVENT(v3d_submit_cl, + TP_PROTO(struct drm_device *dev, bool is_render, + uint64_t seqno, + u32 ctnqba, u32 ctnqea), + TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea), + + TP_STRUCT__entry( + __field(u32, dev) + __field(bool, is_render) + __field(u64, seqno) + __field(u32, ctnqba) + __field(u32, ctnqea) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->is_render = is_render; + __entry->seqno = seqno; + __entry->ctnqba = ctnqba; + __entry->ctnqea = ctnqea; + ), + + TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x", + __entry->dev, + __entry->is_render ? "RCL" : "BCL", + __entry->seqno, + __entry->ctnqba, + __entry->ctnqea) +); + +TRACE_EVENT(v3d_reset_begin, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + +TRACE_EVENT(v3d_reset_end, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + +#endif /* _V3D_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/v3d/v3d_trace_points.c b/drivers/gpu/drm/v3d/v3d_trace_points.c new file mode 100644 index 000000000000..482922d7c7e1 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_trace_points.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015 Broadcom */ + +#include "v3d_drv.h" + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "v3d_trace.h" +#endif diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 83d3b7912fc2..c8650bbcbcb3 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -741,6 +741,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data) struct vc4_async_flip_state { struct drm_crtc *crtc; struct drm_framebuffer *fb; + struct drm_framebuffer *old_fb; struct drm_pending_vblank_event *event; struct vc4_seqno_cb cb; @@ -770,6 +771,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) drm_crtc_vblank_put(crtc); drm_framebuffer_put(flip_state->fb); + + /* Decrement the BO usecnt in order to keep the inc/dec calls balanced + * when the planes are updated through the async update path. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made cleanup_fb() + * logic. + */ + if (flip_state->old_fb) { + struct drm_gem_cma_object *cma_bo; + struct vc4_bo *bo; + + cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); + bo = to_vc4_bo(&cma_bo->base); + vc4_bo_dec_usecnt(bo); + drm_framebuffer_put(flip_state->old_fb); + } + kfree(flip_state); up(&vc4->async_modeset); @@ -794,9 +812,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + /* Increment the BO usecnt here, so that we never end up with an + * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the + * plane is later updated through the non-async path. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made prepare_fb() + * logic. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); - if (!flip_state) + if (!flip_state) { + vc4_bo_dec_usecnt(bo); return -ENOMEM; + } drm_framebuffer_get(fb); flip_state->fb = fb; @@ -807,10 +838,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, ret = down_interruptible(&vc4->async_modeset); if (ret) { drm_framebuffer_put(fb); + vc4_bo_dec_usecnt(bo); kfree(flip_state); return ret; } + /* Save the current FB before it's replaced by the new one in + * drm_atomic_set_fb_for_plane(). We'll need the old FB in + * vc4_async_page_flip_complete() to decrement the BO usecnt and keep + * it consistent. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made cleanup_fb() + * logic. + */ + flip_state->old_fb = plane->state->fb; + if (flip_state->old_fb) + drm_framebuffer_get(flip_state->old_fb); + WARN_ON(drm_crtc_vblank_get(crtc) != 0); /* Immediately update the plane's legacy fb pointer, so that later diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 40ddeaafd65f..d9b8b701d2ce 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -175,7 +175,8 @@ static struct drm_driver vc4_drm_driver = { DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_RENDER | - DRIVER_PRIME), + DRIVER_PRIME | + DRIVER_SYNCOBJ), .lastclose = drm_fb_helper_lastclose, .open = vc4_open, .postclose = vc4_close, diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 22589d39083c..554a4e810d5b 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -11,6 +11,7 @@ #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_atomic.h> +#include <drm/drm_syncobj.h> #include "uapi/drm/vc4_drm.h" diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 94085f8bcd68..8aa897835118 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -753,6 +753,11 @@ static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps) (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) | (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0)); int ret; + bool ulps_currently_enabled = (DSI_PORT_READ(PHY_AFEC0) & + DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS)); + + if (ulps == ulps_currently_enabled) + return; DSI_PORT_WRITE(STAT, stat_ulps); DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 2107b0daf8ef..7910b9acedd6 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/sched/signal.h> +#include <linux/dma-fence-array.h> #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -655,7 +656,8 @@ retry: */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) + struct ww_acquire_ctx *acquire_ctx, + struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *renderjob; @@ -678,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, fence->seqno = exec->seqno; exec->fence = &fence->base; + if (out_sync) + drm_syncobj_replace_fence(out_sync, exec->fence); + vc4_update_bo_seqnos(exec, seqno); vc4_unlock_bo_reservations(dev, exec, acquire_ctx); @@ -1113,8 +1118,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; + struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; + struct dma_fence *in_fence; int ret = 0; if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | @@ -1126,7 +1133,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } if (args->pad2 != 0) { - DRM_DEBUG("->pad2 must be set to zero\n"); + DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2); return -EINVAL; } @@ -1164,6 +1171,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } } + if (args->in_sync) { + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + &in_fence); + if (ret) + goto fail; + + /* When the fence (or fence array) is exclusively from our + * context we can skip the wait since jobs are executed in + * order of their submission through this ioctl and this can + * only have fences from a prior job. + */ + if (!dma_fence_match_context(in_fence, + vc4->dma_fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) { + dma_fence_put(in_fence); + goto fail; + } + } + + dma_fence_put(in_fence); + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) @@ -1181,12 +1211,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->out_sync) { + out_sync = drm_syncobj_find(file_priv, args->out_sync); + if (!out_sync) { + ret = -EINVAL; + goto fail; + } + + /* We replace the fence in out_sync in vc4_queue_submit since + * the render job could execute immediately after that call. + * If it finishes before our ioctl processing resumes the + * render job fence could already have been freed. + */ + } + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx); + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + + /* The syncobj isn't part of the exec data and we need to free our + * reference even if job submission failed. + */ + if (out_sync) + drm_syncobj_put(out_sync); + if (ret) goto fail; diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index bfc2fa73d2ae..e47e29426078 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -218,8 +218,7 @@ try_again: * overall CMA pool before they make scenes complicated enough to run * out of bin space. */ -int -vc4_allocate_bin_bo(struct drm_device *drm) +static int vc4_allocate_bin_bo(struct drm_device *drm) { struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_v3d *v3d = vc4->v3d; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 70e1a8820a7c..97f37c3c16f2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -159,14 +159,14 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CURSOR_BYPASS, vmw_kms_cursor_bypass_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 2582ffd36bb5..ba0cdb743c3e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -441,11 +441,11 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set) struct drm_crtc *crtc = set->crtc; struct drm_framebuffer *fb; struct drm_crtc *tmp; - struct drm_modeset_acquire_ctx *ctx; struct drm_device *dev = set->crtc->dev; + struct drm_modeset_acquire_ctx ctx; int ret; - ctx = dev->mode_config.acquire_ctx; + drm_modeset_acquire_init(&ctx, 0); restart: /* @@ -458,7 +458,7 @@ restart: fb = set->fb; - ret = crtc->funcs->set_config(set, ctx); + ret = crtc->funcs->set_config(set, &ctx); if (ret == 0) { crtc->primary->crtc = crtc; crtc->primary->fb = fb; @@ -473,20 +473,13 @@ restart: } if (ret == -EDEADLK) { - dev->mode_config.acquire_ctx = NULL; - -retry_locking: - drm_modeset_backoff(ctx); - - ret = drm_modeset_lock_all_ctx(dev, ctx); - if (ret) - goto retry_locking; - - dev->mode_config.acquire_ctx = ctx; - + drm_modeset_backoff(&ctx); goto restart; } + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + return ret; } @@ -624,7 +617,6 @@ static int vmw_fb_set_par(struct fb_info *info) } mutex_lock(&par->bo_mutex); - drm_modeset_lock_all(vmw_priv->dev); ret = vmw_fb_kms_framebuffer(info); if (ret) goto out_unlock; @@ -657,7 +649,6 @@ out_unlock: drm_mode_destroy(vmw_priv->dev, old_mode); par->set_mode = mode; - drm_modeset_unlock_all(vmw_priv->dev); mutex_unlock(&par->bo_mutex); return ret; @@ -713,18 +704,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv) par->max_width = fb_width; par->max_height = fb_height; - drm_modeset_lock_all(vmw_priv->dev); ret = vmw_kms_fbdev_init_data(vmw_priv, 0, par->max_width, par->max_height, &par->con, &par->crtc, &init_mode); - if (ret) { - drm_modeset_unlock_all(vmw_priv->dev); + if (ret) goto err_kms; - } info->var.xres = init_mode->hdisplay; info->var.yres = init_mode->vdisplay; - drm_modeset_unlock_all(vmw_priv->dev); /* * Create buffers and alloc memory @@ -832,7 +819,9 @@ int vmw_fb_close(struct vmw_private *vmw_priv) cancel_delayed_work_sync(&par->local_work); unregister_framebuffer(info); + mutex_lock(&par->bo_mutex); (void) vmw_fb_kms_detach(par, true, true); + mutex_unlock(&par->bo_mutex); vfree(par->vmalloc); framebuffer_release(info); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 6728c6247b4b..01f2dc9e6f52 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2595,6 +2595,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx, vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf, out_fence, NULL); + vmw_dmabuf_unreference(&ctx->buf); vmw_resource_unreserve(res, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -2680,7 +2681,9 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, struct vmw_display_unit *du; struct drm_display_mode *mode; int i = 0; + int ret = 0; + mutex_lock(&dev_priv->dev->mode_config.mutex); list_for_each_entry(con, &dev_priv->dev->mode_config.connector_list, head) { if (i == unit) @@ -2691,7 +2694,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, if (i != unit) { DRM_ERROR("Could not find initial display unit.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } if (list_empty(&con->modes)) @@ -2699,7 +2703,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, if (list_empty(&con->modes)) { DRM_ERROR("Could not find initial display mode.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } du = vmw_connector_to_du(con); @@ -2720,7 +2725,10 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, head); } - return 0; + out_unlock: + mutex_unlock(&dev_priv->dev->mode_config.mutex); + + return ret; } /** diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 1b0ea9ac330e..3345ac71b391 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -188,8 +188,8 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, buf_cfg.be_alloc = front_info->cfg.be_alloc; shbuf = xen_drm_front_shbuf_alloc(&buf_cfg); - if (!shbuf) - return -ENOMEM; + if (IS_ERR(shbuf)) + return PTR_ERR(shbuf); ret = dbuf_add_to_list(front_info, shbuf, dbuf_cookie); if (ret < 0) { @@ -543,8 +543,8 @@ static int xen_drm_drv_init(struct xen_drm_front_info *front_info) front_info->drm_info = drm_info; drm_dev = drm_dev_alloc(&xen_drm_driver, dev); - if (!drm_dev) { - ret = -ENOMEM; + if (IS_ERR(drm_dev)) { + ret = PTR_ERR(drm_dev); goto fail; } @@ -778,7 +778,7 @@ static int xen_drv_remove(struct xenbus_device *dev) */ while ((xenbus_read_unsigned(front_info->xb_dev->otherend, "state", XenbusStateUnknown) != XenbusStateInitWait) && - to--) + --to) msleep(10); if (!to) { diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c index d5705251a0d6..8099cb343ae3 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c +++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c @@ -383,7 +383,7 @@ xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg) buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) - return NULL; + return ERR_PTR(-ENOMEM); if (cfg->be_alloc) buf->ops = &backend_ops; diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 111a0ab6280a..38716eb50408 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -93,6 +93,9 @@ ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer, if (size < length) return -ENOSPC; + if (frame->picture_aspect > HDMI_PICTURE_ASPECT_16_9) + return -EINVAL; + memset(buffer, 0, size); ptr[0] = frame->type; diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 3a0eac2885b7..858ba19a3e29 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -38,7 +38,6 @@ struct drm_device { struct device *dev; /**< Device structure of bus-device */ struct drm_driver *driver; /**< DRM driver managing the device */ void *dev_private; /**< DRM driver private data */ - struct drm_minor *control; /**< Control node */ struct drm_minor *primary; /**< Primary node */ struct drm_minor *render; /**< Render node */ bool registered; diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 91c9bcd4196f..c01564991a9f 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -64,6 +64,11 @@ /* AUX CH addresses */ /* DPCD */ #define DP_DPCD_REV 0x000 +# define DP_DPCD_REV_10 0x10 +# define DP_DPCD_REV_11 0x11 +# define DP_DPCD_REV_12 0x12 +# define DP_DPCD_REV_13 0x13 +# define DP_DPCD_REV_14 0x14 #define DP_MAX_LINK_RATE 0x001 @@ -119,6 +124,7 @@ # define DP_DPCD_DISPLAY_CONTROL_CAPABLE (1 << 3) /* edp v1.2 or higher */ #define DP_TRAINING_AUX_RD_INTERVAL 0x00e /* XXX 1.2? */ +# define DP_TRAINING_AUX_RD_MASK 0x7F /* XXX 1.2? */ #define DP_ADAPTER_CAP 0x00f /* 1.2 */ # define DP_FORCE_LOAD_SENSE_CAP (1 << 0) @@ -977,18 +983,18 @@ int drm_dp_bw_code_to_link_rate(u8 link_bw); #define DP_SDP_VSC_EXT_CEA 0x21 /* DP 1.4 */ /* 0x80+ CEA-861 infoframe types */ -struct edp_sdp_header { +struct dp_sdp_header { u8 HB0; /* Secondary Data Packet ID */ u8 HB1; /* Secondary Data Packet Type */ - u8 HB2; /* 7:5 reserved, 4:0 revision number */ - u8 HB3; /* 7:5 reserved, 4:0 number of valid data bytes */ + u8 HB2; /* Secondary Data Packet Specific header, Byte 0 */ + u8 HB3; /* Secondary Data packet Specific header, Byte 1 */ } __packed; #define EDP_SDP_HEADER_REVISION_MASK 0x1F #define EDP_SDP_HEADER_VALID_PAYLOAD_BYTES 0x1F struct edp_vsc_psr { - struct edp_sdp_header sdp_header; + struct dp_sdp_header sdp_header; u8 DB0; /* Stereo Interface */ u8 DB1; /* 0 - PSR State; 1 - Update RFB; 2 - CRC Valid */ u8 DB2; /* CRC value bits 7:0 of the R or Cr component */ diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5176c3797680..027ac16da3d1 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -47,6 +47,9 @@ struct device; * header include loops we need it here for now. */ +/* Note that the order of this enum is ABI (it determines + * /dev/dri/renderD* numbers). + */ enum drm_minor_type { DRM_MINOR_PRIMARY, DRM_MINOR_CONTROL, @@ -182,6 +185,14 @@ struct drm_file { unsigned atomic:1; /** + * @aspect_ratio_allowed: + * + * True, if client can handle picture aspect ratios, and has requested + * to pass this information along with the mode. + */ + unsigned aspect_ratio_allowed:1; + + /** * @is_master: * * This client is the creator of @master. Protected by struct @@ -348,18 +359,6 @@ static inline bool drm_is_render_client(const struct drm_file *file_priv) return file_priv->minor->type == DRM_MINOR_RENDER; } -/** - * drm_is_control_client - is this an open file of the control node - * @file_priv: DRM file - * - * Control nodes are deprecated and in the process of getting removed from the - * DRM userspace API. Do not ever use! - */ -static inline bool drm_is_control_client(const struct drm_file *file_priv) -{ - return file_priv->minor->type == DRM_MINOR_CONTROL; -} - int drm_open(struct inode *inode, struct file *filp); ssize_t drm_read(struct file *filp, char __user *buffer, size_t count, loff_t *offset); diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h index add42809642a..fafb6f592c4b 100644 --- a/include/drm/drm_ioctl.h +++ b/include/drm/drm_ioctl.h @@ -109,13 +109,6 @@ enum drm_ioctl_flags { */ DRM_ROOT_ONLY = BIT(2), /** - * @DRM_CONTROL_ALLOW: - * - * Deprecated, do not use. Control nodes are in the process of getting - * removed. - */ - DRM_CONTROL_ALLOW = BIT(3), - /** * @DRM_UNLOCKED: * * Whether &drm_ioctl_desc.func should be called with the DRM BKL held diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h index 0d310beae6af..b159fe07fcf9 100644 --- a/include/drm/drm_modes.h +++ b/include/drm/drm_modes.h @@ -147,6 +147,12 @@ enum drm_mode_status { #define DRM_MODE_FLAG_3D_MAX DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF +#define DRM_MODE_MATCH_TIMINGS (1 << 0) +#define DRM_MODE_MATCH_CLOCK (1 << 1) +#define DRM_MODE_MATCH_FLAGS (1 << 2) +#define DRM_MODE_MATCH_3D_FLAGS (1 << 3) +#define DRM_MODE_MATCH_ASPECT_RATIO (1 << 4) + /** * struct drm_display_mode - DRM kernel-internal display mode structure * @hdisplay: horizontal display size @@ -405,6 +411,19 @@ struct drm_display_mode { * Field for setting the HDMI picture aspect ratio of a mode. */ enum hdmi_picture_aspect picture_aspect_ratio; + + /** + * @export_head: + * + * struct list_head for modes to be exposed to the userspace. + * This is to maintain a list of exposed modes while preparing + * user-mode's list in drm_mode_getconnector ioctl. The purpose of this + * list_head only lies in the ioctl function, and is not expected to be + * used outside the function. + * Once used, the stale pointers are not reset, but left as it is, to + * avoid overhead of protecting it by mode_config.mutex. + */ + struct list_head export_head; }; /** @@ -490,6 +509,9 @@ void drm_mode_copy(struct drm_display_mode *dst, const struct drm_display_mode *src); struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev, const struct drm_display_mode *mode); +bool drm_mode_match(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2, + unsigned int match_flags); bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2); bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h index ab8167baade5..1d5c0b2a8956 100644 --- a/include/drm/drm_property.h +++ b/include/drm/drm_property.h @@ -260,7 +260,7 @@ struct drm_property *drm_property_create_object(struct drm_device *dev, uint32_t type); struct drm_property *drm_property_create_bool(struct drm_device *dev, u32 flags, const char *name); -int drm_property_add_enum(struct drm_property *property, int index, +int drm_property_add_enum(struct drm_property *property, uint64_t value, const char *name); void drm_property_destroy(struct drm_device *dev, struct drm_property *property); diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h index 44bc122b9ee0..6c54544a4be7 100644 --- a/include/drm/drm_rect.h +++ b/include/drm/drm_rect.h @@ -175,8 +175,7 @@ static inline bool drm_rect_equals(const struct drm_rect *r1, bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip); bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, - const struct drm_rect *clip, - int hscale, int vscale); + const struct drm_rect *clip); int drm_rect_calc_hscale(const struct drm_rect *src, const struct drm_rect *dst, int min_hscale, int max_hscale); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 4c008170fe65..eb9b05aa5aea 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -94,11 +94,11 @@ typedef void (*dma_fence_func_t)(struct dma_fence *fence, struct dma_fence_cb *cb); /** - * struct dma_fence_cb - callback for dma_fence_add_callback - * @node: used by dma_fence_add_callback to append this struct to fence::cb_list + * struct dma_fence_cb - callback for dma_fence_add_callback() + * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list * @func: dma_fence_func_t to call * - * This struct will be initialized by dma_fence_add_callback, additional + * This struct will be initialized by dma_fence_add_callback(), additional * data can be passed along by embedding dma_fence_cb in another struct. */ struct dma_fence_cb { @@ -108,75 +108,143 @@ struct dma_fence_cb { /** * struct dma_fence_ops - operations implemented for fence - * @get_driver_name: returns the driver name. - * @get_timeline_name: return the name of the context this fence belongs to. - * @enable_signaling: enable software signaling of fence. - * @signaled: [optional] peek whether the fence is signaled, can be null. - * @wait: custom wait implementation, or dma_fence_default_wait. - * @release: [optional] called on destruction of fence, can be null - * @fill_driver_data: [optional] callback to fill in free-form debug info - * Returns amount of bytes filled, or -errno. - * @fence_value_str: [optional] fills in the value of the fence as a string - * @timeline_value_str: [optional] fills in the current value of the timeline - * as a string * - * Notes on enable_signaling: - * For fence implementations that have the capability for hw->hw - * signaling, they can implement this op to enable the necessary - * irqs, or insert commands into cmdstream, etc. This is called - * in the first wait() or add_callback() path to let the fence - * implementation know that there is another driver waiting on - * the signal (ie. hw->sw case). - * - * This function can be called from atomic context, but not - * from irq context, so normal spinlocks can be used. - * - * A return value of false indicates the fence already passed, - * or some failure occurred that made it impossible to enable - * signaling. True indicates successful enabling. - * - * fence->error may be set in enable_signaling, but only when false is - * returned. - * - * Calling dma_fence_signal before enable_signaling is called allows - * for a tiny race window in which enable_signaling is called during, - * before, or after dma_fence_signal. To fight this, it is recommended - * that before enable_signaling returns true an extra reference is - * taken on the fence, to be released when the fence is signaled. - * This will mean dma_fence_signal will still be called twice, but - * the second time will be a noop since it was already signaled. - * - * Notes on signaled: - * May set fence->error if returning true. - * - * Notes on wait: - * Must not be NULL, set to dma_fence_default_wait for default implementation. - * the dma_fence_default_wait implementation should work for any fence, as long - * as enable_signaling works correctly. - * - * Must return -ERESTARTSYS if the wait is intr = true and the wait was - * interrupted, and remaining jiffies if fence has signaled, or 0 if wait - * timed out. Can also return other error values on custom implementations, - * which should be treated as if the fence is signaled. For example a hardware - * lockup could be reported like that. - * - * Notes on release: - * Can be NULL, this function allows additional commands to run on - * destruction of the fence. Can be called from irq context. - * If pointer is set to NULL, kfree will get called instead. */ - struct dma_fence_ops { + /** + * @get_driver_name: + * + * Returns the driver name. This is a callback to allow drivers to + * compute the name at runtime, without having it to store permanently + * for each fence, or build a cache of some sort. + * + * This callback is mandatory. + */ const char * (*get_driver_name)(struct dma_fence *fence); + + /** + * @get_timeline_name: + * + * Return the name of the context this fence belongs to. This is a + * callback to allow drivers to compute the name at runtime, without + * having it to store permanently for each fence, or build a cache of + * some sort. + * + * This callback is mandatory. + */ const char * (*get_timeline_name)(struct dma_fence *fence); + + /** + * @enable_signaling: + * + * Enable software signaling of fence. + * + * For fence implementations that have the capability for hw->hw + * signaling, they can implement this op to enable the necessary + * interrupts, or insert commands into cmdstream, etc, to avoid these + * costly operations for the common case where only hw->hw + * synchronization is required. This is called in the first + * dma_fence_wait() or dma_fence_add_callback() path to let the fence + * implementation know that there is another driver waiting on the + * signal (ie. hw->sw case). + * + * This function can be called from atomic context, but not + * from irq context, so normal spinlocks can be used. + * + * A return value of false indicates the fence already passed, + * or some failure occurred that made it impossible to enable + * signaling. True indicates successful enabling. + * + * &dma_fence.error may be set in enable_signaling, but only when false + * is returned. + * + * Since many implementations can call dma_fence_signal() even when before + * @enable_signaling has been called there's a race window, where the + * dma_fence_signal() might result in the final fence reference being + * released and its memory freed. To avoid this, implementations of this + * callback should grab their own reference using dma_fence_get(), to be + * released when the fence is signalled (through e.g. the interrupt + * handler). + * + * This callback is mandatory. + */ bool (*enable_signaling)(struct dma_fence *fence); + + /** + * @signaled: + * + * Peek whether the fence is signaled, as a fastpath optimization for + * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this + * callback does not need to make any guarantees beyond that a fence + * once indicates as signalled must always return true from this + * callback. This callback may return false even if the fence has + * completed already, in this case information hasn't propogated throug + * the system yet. See also dma_fence_is_signaled(). + * + * May set &dma_fence.error if returning true. + * + * This callback is optional. + */ bool (*signaled)(struct dma_fence *fence); + + /** + * @wait: + * + * Custom wait implementation, or dma_fence_default_wait. + * + * Must not be NULL, set to dma_fence_default_wait for default implementation. + * the dma_fence_default_wait implementation should work for any fence, as long + * as enable_signaling works correctly. + * + * Must return -ERESTARTSYS if the wait is intr = true and the wait was + * interrupted, and remaining jiffies if fence has signaled, or 0 if wait + * timed out. Can also return other error values on custom implementations, + * which should be treated as if the fence is signaled. For example a hardware + * lockup could be reported like that. + * + * This callback is mandatory. + */ signed long (*wait)(struct dma_fence *fence, bool intr, signed long timeout); + + /** + * @release: + * + * Called on destruction of fence to release additional resources. + * Can be called from irq context. This callback is optional. If it is + * NULL, then dma_fence_free() is instead called as the default + * implementation. + */ void (*release)(struct dma_fence *fence); + /** + * @fill_driver_data: + * + * Callback to fill in free-form debug info. + * + * Returns amount of bytes filled, or negative error on failure. + * + * This callback is optional. + */ int (*fill_driver_data)(struct dma_fence *fence, void *data, int size); + + /** + * @fence_value_str: + * + * Callback to fill in free-form debug info specific to this fence, like + * the sequence number. + * + * This callback is optional. + */ void (*fence_value_str)(struct dma_fence *fence, char *str, int size); + + /** + * @timeline_value_str: + * + * Fills in the current value of the timeline as a string, like the + * sequence number. This should match what @fill_driver_data prints for + * the most recently signalled fence (assuming no delayed signalling). + */ void (*timeline_value_str)(struct dma_fence *fence, char *str, int size); }; @@ -189,7 +257,7 @@ void dma_fence_free(struct dma_fence *fence); /** * dma_fence_put - decreases refcount of the fence - * @fence: [in] fence to reduce refcount of + * @fence: fence to reduce refcount of */ static inline void dma_fence_put(struct dma_fence *fence) { @@ -199,7 +267,7 @@ static inline void dma_fence_put(struct dma_fence *fence) /** * dma_fence_get - increases refcount of the fence - * @fence: [in] fence to increase refcount of + * @fence: fence to increase refcount of * * Returns the same fence, with refcount increased by 1. */ @@ -213,7 +281,7 @@ static inline struct dma_fence *dma_fence_get(struct dma_fence *fence) /** * dma_fence_get_rcu - get a fence from a reservation_object_list with * rcu read lock - * @fence: [in] fence to increase refcount of + * @fence: fence to increase refcount of * * Function returns NULL if no refcount could be obtained, or the fence. */ @@ -227,7 +295,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) /** * dma_fence_get_rcu_safe - acquire a reference to an RCU tracked fence - * @fencep: [in] pointer to fence to increase refcount of + * @fencep: pointer to fence to increase refcount of * * Function returns NULL if no refcount could be obtained, or the fence. * This function handles acquiring a reference to a fence that may be @@ -289,14 +357,16 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence); /** * dma_fence_is_signaled_locked - Return an indication if the fence * is signaled yet. - * @fence: [in] the fence to check + * @fence: the fence to check * * Returns true if the fence was already signaled, false if not. Since this * function doesn't enable signaling, it is not guaranteed to ever return - * true if dma_fence_add_callback, dma_fence_wait or - * dma_fence_enable_sw_signaling haven't been called before. + * true if dma_fence_add_callback(), dma_fence_wait() or + * dma_fence_enable_sw_signaling() haven't been called before. * - * This function requires fence->lock to be held. + * This function requires &dma_fence.lock to be held. + * + * See also dma_fence_is_signaled(). */ static inline bool dma_fence_is_signaled_locked(struct dma_fence *fence) @@ -314,17 +384,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence) /** * dma_fence_is_signaled - Return an indication if the fence is signaled yet. - * @fence: [in] the fence to check + * @fence: the fence to check * * Returns true if the fence was already signaled, false if not. Since this * function doesn't enable signaling, it is not guaranteed to ever return - * true if dma_fence_add_callback, dma_fence_wait or - * dma_fence_enable_sw_signaling haven't been called before. + * true if dma_fence_add_callback(), dma_fence_wait() or + * dma_fence_enable_sw_signaling() haven't been called before. * * It's recommended for seqno fences to call dma_fence_signal when the * operation is complete, it makes it possible to prevent issues from * wraparound between time of issue and time of use by checking the return * value of this function before calling hardware-specific wait instructions. + * + * See also dma_fence_is_signaled_locked(). */ static inline bool dma_fence_is_signaled(struct dma_fence *fence) @@ -342,8 +414,8 @@ dma_fence_is_signaled(struct dma_fence *fence) /** * __dma_fence_is_later - return if f1 is chronologically later than f2 - * @f1: [in] the first fence's seqno - * @f2: [in] the second fence's seqno from the same context + * @f1: the first fence's seqno + * @f2: the second fence's seqno from the same context * * Returns true if f1 is chronologically later than f2. Both fences must be * from the same context, since a seqno is not common across contexts. @@ -355,8 +427,8 @@ static inline bool __dma_fence_is_later(u32 f1, u32 f2) /** * dma_fence_is_later - return if f1 is chronologically later than f2 - * @f1: [in] the first fence from the same context - * @f2: [in] the second fence from the same context + * @f1: the first fence from the same context + * @f2: the second fence from the same context * * Returns true if f1 is chronologically later than f2. Both fences must be * from the same context, since a seqno is not re-used across contexts. @@ -372,8 +444,8 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, /** * dma_fence_later - return the chronologically later fence - * @f1: [in] the first fence from the same context - * @f2: [in] the second fence from the same context + * @f1: the first fence from the same context + * @f2: the second fence from the same context * * Returns NULL if both fences are signaled, otherwise the fence that would be * signaled last. Both fences must be from the same context, since a seqno is @@ -398,7 +470,7 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1, /** * dma_fence_get_status_locked - returns the status upon completion - * @fence: [in] the dma_fence to query + * @fence: the dma_fence to query * * Drivers can supply an optional error status condition before they signal * the fence (to indicate whether the fence was completed due to an error @@ -422,8 +494,8 @@ int dma_fence_get_status(struct dma_fence *fence); /** * dma_fence_set_error - flag an error condition on the fence - * @fence: [in] the dma_fence - * @error: [in] the error to store + * @fence: the dma_fence + * @error: the error to store * * Drivers can supply an optional error status condition before they signal * the fence, to indicate that the fence was completed due to an error @@ -449,8 +521,8 @@ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, /** * dma_fence_wait - sleep until the fence gets signaled - * @fence: [in] the fence to wait on - * @intr: [in] if true, do an interruptible wait + * @fence: the fence to wait on + * @intr: if true, do an interruptible wait * * This function will return -ERESTARTSYS if interrupted by a signal, * or 0 if the fence was signaled. Other error values may be @@ -459,6 +531,8 @@ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, * Performs a synchronous wait on this fence. It is assumed the caller * directly or indirectly holds a reference to the fence, otherwise the * fence might be freed before return, resulting in undefined behavior. + * + * See also dma_fence_wait_timeout() and dma_fence_wait_any_timeout(). */ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr) { diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 6fdff5945c8a..9c660e1688ab 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -680,6 +680,13 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_ATOMIC 3 +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 50bcf4214ff9..4b3a1bb58e68 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -93,6 +93,8 @@ extern "C" { #define DRM_MODE_PICTURE_ASPECT_NONE 0 #define DRM_MODE_PICTURE_ASPECT_4_3 1 #define DRM_MODE_PICTURE_ASPECT_16_9 2 +#define DRM_MODE_PICTURE_ASPECT_64_27 3 +#define DRM_MODE_PICTURE_ASPECT_256_135 4 /* Aspect ratio flag bitmask (4 bits 22:19) */ #define DRM_MODE_FLAG_PIC_AR_MASK (0x0F<<19) @@ -102,6 +104,10 @@ extern "C" { (DRM_MODE_PICTURE_ASPECT_4_3<<19) #define DRM_MODE_FLAG_PIC_AR_16_9 \ (DRM_MODE_PICTURE_ASPECT_16_9<<19) +#define DRM_MODE_FLAG_PIC_AR_64_27 \ + (DRM_MODE_PICTURE_ASPECT_64_27<<19) +#define DRM_MODE_FLAG_PIC_AR_256_135 \ + (DRM_MODE_PICTURE_ASPECT_256_135<<19) #define DRM_MODE_FLAG_ALL (DRM_MODE_FLAG_PHSYNC | \ DRM_MODE_FLAG_NHSYNC | \ diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index 4a54305120e0..3e59b8382dd8 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -135,6 +135,219 @@ struct drm_exynos_g2d_exec { __u64 async; }; +/* Exynos DRM IPP v2 API */ + +/** + * Enumerate available IPP hardware modules. + * + * @count_ipps: size of ipp_id array / number of ipp modules (set by driver) + * @reserved: padding + * @ipp_id_ptr: pointer to ipp_id array or NULL + */ +struct drm_exynos_ioctl_ipp_get_res { + __u32 count_ipps; + __u32 reserved; + __u64 ipp_id_ptr; +}; + +enum drm_exynos_ipp_format_type { + DRM_EXYNOS_IPP_FORMAT_SOURCE = 0x01, + DRM_EXYNOS_IPP_FORMAT_DESTINATION = 0x02, +}; + +struct drm_exynos_ipp_format { + __u32 fourcc; + __u32 type; + __u64 modifier; +}; + +enum drm_exynos_ipp_capability { + DRM_EXYNOS_IPP_CAP_CROP = 0x01, + DRM_EXYNOS_IPP_CAP_ROTATE = 0x02, + DRM_EXYNOS_IPP_CAP_SCALE = 0x04, + DRM_EXYNOS_IPP_CAP_CONVERT = 0x08, +}; + +/** + * Get IPP hardware capabilities and supported image formats. + * + * @ipp_id: id of IPP module to query + * @capabilities: bitmask of drm_exynos_ipp_capability (set by driver) + * @reserved: padding + * @formats_count: size of formats array (in entries) / number of filled + * formats (set by driver) + * @formats_ptr: pointer to formats array or NULL + */ +struct drm_exynos_ioctl_ipp_get_caps { + __u32 ipp_id; + __u32 capabilities; + __u32 reserved; + __u32 formats_count; + __u64 formats_ptr; +}; + +enum drm_exynos_ipp_limit_type { + /* size (horizontal/vertial) limits, in pixels (min, max, alignment) */ + DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE = 0x0001, + /* scale ratio (horizonta/vertial), 16.16 fixed point (min, max) */ + DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE = 0x0002, + + /* image buffer area */ + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER = 0x0001 << 16, + /* src/dst rectangle area */ + DRM_EXYNOS_IPP_LIMIT_SIZE_AREA = 0x0002 << 16, + /* src/dst rectangle area when rotation enabled */ + DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED = 0x0003 << 16, + + DRM_EXYNOS_IPP_LIMIT_TYPE_MASK = 0x000f, + DRM_EXYNOS_IPP_LIMIT_SIZE_MASK = 0x000f << 16, +}; + +struct drm_exynos_ipp_limit_val { + __u32 min; + __u32 max; + __u32 align; + __u32 reserved; +}; + +/** + * IPP module limitation. + * + * @type: limit type (see drm_exynos_ipp_limit_type enum) + * @reserved: padding + * @h: horizontal limits + * @v: vertical limits + */ +struct drm_exynos_ipp_limit { + __u32 type; + __u32 reserved; + struct drm_exynos_ipp_limit_val h; + struct drm_exynos_ipp_limit_val v; +}; + +/** + * Get IPP limits for given image format. + * + * @ipp_id: id of IPP module to query + * @fourcc: image format code (see DRM_FORMAT_* in drm_fourcc.h) + * @modifier: image format modifier (see DRM_FORMAT_MOD_* in drm_fourcc.h) + * @type: source/destination identifier (drm_exynos_ipp_format_flag enum) + * @limits_count: size of limits array (in entries) / number of filled entries + * (set by driver) + * @limits_ptr: pointer to limits array or NULL + */ +struct drm_exynos_ioctl_ipp_get_limits { + __u32 ipp_id; + __u32 fourcc; + __u64 modifier; + __u32 type; + __u32 limits_count; + __u64 limits_ptr; +}; + +enum drm_exynos_ipp_task_id { + /* buffer described by struct drm_exynos_ipp_task_buffer */ + DRM_EXYNOS_IPP_TASK_BUFFER = 0x0001, + /* rectangle described by struct drm_exynos_ipp_task_rect */ + DRM_EXYNOS_IPP_TASK_RECTANGLE = 0x0002, + /* transformation described by struct drm_exynos_ipp_task_transform */ + DRM_EXYNOS_IPP_TASK_TRANSFORM = 0x0003, + /* alpha configuration described by struct drm_exynos_ipp_task_alpha */ + DRM_EXYNOS_IPP_TASK_ALPHA = 0x0004, + + /* source image data (for buffer and rectangle chunks) */ + DRM_EXYNOS_IPP_TASK_TYPE_SOURCE = 0x0001 << 16, + /* destination image data (for buffer and rectangle chunks) */ + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION = 0x0002 << 16, +}; + +/** + * Memory buffer with image data. + * + * @id: must be DRM_EXYNOS_IPP_TASK_BUFFER + * other parameters are same as for AddFB2 generic DRM ioctl + */ +struct drm_exynos_ipp_task_buffer { + __u32 id; + __u32 fourcc; + __u32 width, height; + __u32 gem_id[4]; + __u32 offset[4]; + __u32 pitch[4]; + __u64 modifier; +}; + +/** + * Rectangle for processing. + * + * @id: must be DRM_EXYNOS_IPP_TASK_RECTANGLE + * @reserved: padding + * @x,@y: left corner in pixels + * @w,@h: width/height in pixels + */ +struct drm_exynos_ipp_task_rect { + __u32 id; + __u32 reserved; + __u32 x; + __u32 y; + __u32 w; + __u32 h; +}; + +/** + * Image tranformation description. + * + * @id: must be DRM_EXYNOS_IPP_TASK_TRANSFORM + * @rotation: DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* values + */ +struct drm_exynos_ipp_task_transform { + __u32 id; + __u32 rotation; +}; + +/** + * Image global alpha configuration for formats without alpha values. + * + * @id: must be DRM_EXYNOS_IPP_TASK_ALPHA + * @value: global alpha value (0-255) + */ +struct drm_exynos_ipp_task_alpha { + __u32 id; + __u32 value; +}; + +enum drm_exynos_ipp_flag { + /* generate DRM event after processing */ + DRM_EXYNOS_IPP_FLAG_EVENT = 0x01, + /* dry run, only check task parameters */ + DRM_EXYNOS_IPP_FLAG_TEST_ONLY = 0x02, + /* non-blocking processing */ + DRM_EXYNOS_IPP_FLAG_NONBLOCK = 0x04, +}; + +#define DRM_EXYNOS_IPP_FLAGS (DRM_EXYNOS_IPP_FLAG_EVENT |\ + DRM_EXYNOS_IPP_FLAG_TEST_ONLY | DRM_EXYNOS_IPP_FLAG_NONBLOCK) + +/** + * Perform image processing described by array of drm_exynos_ipp_task_* + * structures (parameters array). + * + * @ipp_id: id of IPP module to run the task + * @flags: bitmask of drm_exynos_ipp_flag values + * @reserved: padding + * @params_size: size of parameters array (in bytes) + * @params_ptr: pointer to parameters array or NULL + * @user_data: (optional) data for drm event + */ +struct drm_exynos_ioctl_ipp_commit { + __u32 ipp_id; + __u32 flags; + __u32 reserved; + __u32 params_size; + __u64 params_ptr; + __u64 user_data; +}; + #define DRM_EXYNOS_GEM_CREATE 0x00 #define DRM_EXYNOS_GEM_MAP 0x01 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */ @@ -147,6 +360,11 @@ struct drm_exynos_g2d_exec { #define DRM_EXYNOS_G2D_EXEC 0x22 /* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */ +/* IPP - Image Post Processing */ +#define DRM_EXYNOS_IPP_GET_RESOURCES 0x40 +#define DRM_EXYNOS_IPP_GET_CAPS 0x41 +#define DRM_EXYNOS_IPP_GET_LIMITS 0x42 +#define DRM_EXYNOS_IPP_COMMIT 0x43 #define DRM_IOCTL_EXYNOS_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create) @@ -165,8 +383,20 @@ struct drm_exynos_g2d_exec { #define DRM_IOCTL_EXYNOS_G2D_EXEC DRM_IOWR(DRM_COMMAND_BASE + \ DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec) +#define DRM_IOCTL_EXYNOS_IPP_GET_RESOURCES DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_GET_RESOURCES, \ + struct drm_exynos_ioctl_ipp_get_res) +#define DRM_IOCTL_EXYNOS_IPP_GET_CAPS DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_GET_CAPS, struct drm_exynos_ioctl_ipp_get_caps) +#define DRM_IOCTL_EXYNOS_IPP_GET_LIMITS DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_GET_LIMITS, \ + struct drm_exynos_ioctl_ipp_get_limits) +#define DRM_IOCTL_EXYNOS_IPP_COMMIT DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_EXYNOS_IPP_COMMIT, struct drm_exynos_ioctl_ipp_commit) + /* EXYNOS specific events */ #define DRM_EXYNOS_G2D_EVENT 0x80000000 +#define DRM_EXYNOS_IPP_EVENT 0x80000002 struct drm_exynos_g2d_event { struct drm_event base; @@ -177,6 +407,16 @@ struct drm_exynos_g2d_event { __u32 reserved; }; +struct drm_exynos_ipp_event { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 ipp_id; + __u32 sequence; + __u64 reserved; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h new file mode 100644 index 000000000000..7b6627783608 --- /dev/null +++ b/include/uapi/drm/v3d_drm.h @@ -0,0 +1,194 @@ +/* + * Copyright © 2014-2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _V3D_DRM_H_ +#define _V3D_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_V3D_SUBMIT_CL 0x00 +#define DRM_V3D_WAIT_BO 0x01 +#define DRM_V3D_CREATE_BO 0x02 +#define DRM_V3D_MMAP_BO 0x03 +#define DRM_V3D_GET_PARAM 0x04 +#define DRM_V3D_GET_BO_OFFSET 0x05 + +#define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) +#define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) +#define DRM_IOCTL_V3D_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo) +#define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) +#define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) +#define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) + +/** + * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute an optional binner + * command list, and a render command list. + */ +struct drm_v3d_submit_cl { + /* Pointer to the binner command list. + * + * This is the first set of commands executed, which runs the + * coordinate shader to determine where primitives land on the screen, + * then writes out the state updates and draw calls necessary per tile + * to the tile allocation BO. + */ + __u32 bcl_start; + + /** End address of the BCL (first byte after the BCL) */ + __u32 bcl_end; + + /* Offset of the render command list. + * + * This is the second set of commands executed, which will either + * execute the tiles that have been set up by the BCL, or a fixed set + * of tiles (in the case of RCL-only blits). + */ + __u32 rcl_start; + + /** End address of the RCL (first byte after the RCL) */ + __u32 rcl_end; + + /** An optional sync object to wait on before starting the BCL. */ + __u32 in_sync_bcl; + /** An optional sync object to wait on before starting the RCL. */ + __u32 in_sync_rcl; + /** An optional sync object to place the completion fence in. */ + __u32 out_sync; + + /* Offset of the tile alloc memory + * + * This is optional on V3D 3.3 (where the CL can set the value) but + * required on V3D 4.1. + */ + __u32 qma; + + /** Size of the tile alloc memory. */ + __u32 qms; + + /** Offset of the tile state data array. */ + __u32 qts; + + /* Pointer to a u32 array of the BOs that are referenced by the job. + */ + __u64 bo_handles; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /* Pad, must be zero-filled. */ + __u32 pad; +}; + +/** + * struct drm_v3d_wait_bo - ioctl argument for waiting for + * completion of the last DRM_V3D_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_v3d_wait_bo { + __u32 handle; + __u32 pad; + __u64 timeout_ns; +}; + +/** + * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_v3d_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /** + * Returned offset for the BO in the V3D address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + * + * This offset value will always be nonzero, since various HW + * units treat 0 specially. + */ + __u32 offset; +}; + +/** + * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_v3d_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_v3d_param { + DRM_V3D_PARAM_V3D_UIFCFG, + DRM_V3D_PARAM_V3D_HUB_IDENT1, + DRM_V3D_PARAM_V3D_HUB_IDENT2, + DRM_V3D_PARAM_V3D_HUB_IDENT3, + DRM_V3D_PARAM_V3D_CORE0_IDENT0, + DRM_V3D_PARAM_V3D_CORE0_IDENT1, + DRM_V3D_PARAM_V3D_CORE0_IDENT2, +}; + +struct drm_v3d_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the V3D address space for this DRM fd. + * This is the same value returned by drm_v3d_create_bo, if that was called + * from this DRM fd. + */ +struct drm_v3d_get_bo_offset { + __u32 handle; + __u32 offset; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _V3D_DRM_H_ */ diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index b95a0e11cb07..2cac6277a1d7 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -183,10 +183,17 @@ struct drm_vc4_submit_cl { /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmonid; - /* Unused field to align this struct on 64 bits. Must be set to 0. - * If one ever needs to add an u32 field to this struct, this field - * can be used. + /* Syncobj handle to wait on. If set, processing of this render job + * will not start until the syncobj is signaled. 0 means ignore. */ + __u32 in_sync; + + /* Syncobj handle to export fence to. If set, the fence in the syncobj + * will be replaced with a fence that signals upon completion of this + * render job. 0 means ignore. + */ + __u32 out_sync; + __u32 pad2; }; diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 4b04ead26cd9..f43c3c6171ff 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -260,6 +260,7 @@ struct virtio_gpu_cmd_submit { }; #define VIRTIO_GPU_CAPSET_VIRGL 1 +#define VIRTIO_GPU_CAPSET_VIRGL2 2 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { |