diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 477 |
1 files changed, 324 insertions, 153 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cc69005f5b46..4db92e0a60da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,8 +23,9 @@ */ #include <drm/amdgpu_drm.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> -#include <drm/drm_fbdev_generic.h> +#include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem.h> #include <drm/drm_managed.h> #include <drm/drm_pciids.h> @@ -50,6 +51,8 @@ #include "amdgpu_reset.h" #include "amdgpu_sched.h" #include "amdgpu_xgmi.h" +#include "amdgpu_userq.h" +#include "amdgpu_userq_fence.h" #include "../amdxcp/amdgpu_xcp_drv.h" /* @@ -116,9 +119,16 @@ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ + * - 3.58.0 - Add GFX12 DCC support + * - 3.59.0 - Cleared VRAM + * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) + * - 3.61.0 - Contains fix for RV/PCO compute queues + * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT + * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size + * - 3.64.0 - Userq IP support query */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 57 +#define KMS_DRIVER_MINOR 64 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -129,6 +139,11 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), + AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), + AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), + AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), + AMDGPU_DEBUG_SMU_POOL = BIT(7), + AMDGPU_DEBUG_VM_USERPTR = BIT(8), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -165,7 +180,18 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; -bool enforce_isolation; +int amdgpu_enforce_isolation = -1; +int amdgpu_modeset = -1; + +/* Specifies the default granularity for SVM, used in buffer + * migration and restoration of backing memory when handling + * recoverable page faults. + * + * The value is given as log(numPages(buffer)); for a 2 MiB + * buffer it computes to be 9 + */ +uint amdgpu_svm_default_granularity = 9; + /* * OverDrive(bit 14) disabled by default * GFX DCS(bit 19) disabled by default @@ -195,10 +221,13 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; +int amdgpu_mes_log_enable = 0; int amdgpu_mes_kiq; +int amdgpu_uni_mes = 1; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -211,8 +240,10 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; - -static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); +int amdgpu_damage_clips = -1; /* auto */ +int amdgpu_umsch_mm_fwlog; +int amdgpu_rebar = -1; /* auto */ +int amdgpu_user_queue = -1; DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -228,9 +259,6 @@ DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), - .delayed_reset_work = __DELAYED_WORK_INITIALIZER( - mgpu_info.delayed_reset_work, - amdgpu_drv_delayed_reset_work_handler, 0), }; int amdgpu_ras_enable = -1; uint amdgpu_ras_mask = 0xffffffff; @@ -265,7 +293,8 @@ module_param_named(gartsize, amdgpu_gart_size, uint, 0600); /** * DOC: gttsize (int) * Restrict the size of GTT domain (for userspace use) in MiB for testing. - * The default is -1 (Use 1/2 RAM, minimum value is 3GB). + * The default is -1 (Use value specified by TTM). + * This parameter is deprecated and will be removed in the future. */ MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)"); module_param_named(gttsize, amdgpu_gtt_size, int, 0600); @@ -313,6 +342,13 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); /** + * DOC: svm_default_granularity (uint) + * Used in buffer migration and handling of recoverable page faults + */ +MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB"); +module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644); + +/** * DOC: lockup_timeout (string) * Set GPU scheduler timeout value in ms. * @@ -366,7 +402,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); * Setting the value to 0 disables this functionality. * Setting the value to -2 is auto enabled with power down when displays are attached. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -377,7 +413,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444); * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device). */ MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); -module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); +module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); /** * DOC: bapm (int) @@ -435,7 +471,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); * Enable experimental hw support (1 = enable). The default is 0 (disabled). */ MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); -module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); +module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444); /** * DOC: dc (int) @@ -546,14 +582,14 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). */ MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); -module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444); /** * DOC: emu_mode (int) * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled). */ MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); -module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); +module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444); /** * DOC: ras_enable (int) @@ -593,7 +629,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); #ifdef CONFIG_DRM_AMDGPU_SI #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_si_support = 0; +int amdgpu_si_support; MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_si_support = 1; @@ -612,7 +648,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); #ifdef CONFIG_DRM_AMDGPU_CIK #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_cik_support = 0; +int amdgpu_cik_support; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_cik_support = 1; @@ -666,6 +702,15 @@ MODULE_PARM_DESC(mes, module_param_named(mes, amdgpu_mes, int, 0444); /** + * DOC: mes_log_enable (int) + * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes_log_enable, + "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)"); +module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); + +/** * DOC: mes_kiq (int) * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. * (0 = disabled (default), 1 = enabled) @@ -675,6 +720,15 @@ MODULE_PARM_DESC(mes_kiq, module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444); /** + * DOC: uni_mes (int) + * Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(uni_mes, + "Enable Unified Micro Engine Scheduler (0 = disabled, 1 = enabled(default)"); +module_param_named(uni_mes, amdgpu_uni_mes, int, 0444); + +/** * DOC: noretry (int) * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that * do not support per-process XNACK this also disables retry page faults. @@ -690,7 +744,7 @@ module_param_named(noretry, amdgpu_noretry, int, 0644); */ MODULE_PARM_DESC(force_asic_type, "A non negative value used to specify the asic type for all supported GPUs"); -module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); +module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444); /** * DOC: use_xgmi_p2p (int) @@ -709,7 +763,7 @@ module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444); * assigns queues to HQDs. */ int sched_policy = KFD_SCHED_POLICY_HWS; -module_param(sched_policy, int, 0444); +module_param_unsafe(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); @@ -759,7 +813,7 @@ MODULE_PARM_DESC(send_sigterm, * Setting 1 enables halt on hang. */ int halt_if_hws_hang; -module_param(halt_if_hws_hang, int, 0644); +module_param_unsafe(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); /** @@ -768,7 +822,7 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau * check says. Default value: false (rely on MEC2 firmware version check). */ bool hws_gws_support; -module_param(hws_gws_support, bool, 0444); +module_param_unsafe(hws_gws_support, bool, 0444); MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)"); /** @@ -801,7 +855,7 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa */ int amdgpu_no_queue_eviction_on_vm_fault; MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); -module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); +module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif /** @@ -809,7 +863,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm */ int amdgpu_mtype_local; MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); -module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); +module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444); /** * DOC: pcie_p2p (bool) @@ -848,18 +902,31 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); * the ABM algorithm, with 1 being the least reduction and 4 being the most * reduction. * - * Defaults to 0, or disabled. Userspace can still override this level later - * after boot. + * Defaults to -1, or auto. Userspace can only override this level after + * boot if it's set to auto. */ -uint amdgpu_dm_abm_level; -MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); -module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +int amdgpu_dm_abm_level = -1; +MODULE_PARM_DESC(abmlevel, + "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))"); +module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444); int amdgpu_backlight = -1; MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); module_param_named(backlight, amdgpu_backlight, bint, 0444); /** + * DOC: damageclips (int) + * Enable or disable damage clips support. If damage clips support is disabled, + * we will force full frame updates, irrespective of what user space sends to + * us. + * + * Defaults to -1 (where it is enabled unless a PSR-SU display is detected). + */ +MODULE_PARM_DESC(damageclips, + "Damage clips support (0 = disable, 1 = enable, -1 auto (default))"); +module_param_named(damageclips, amdgpu_damage_clips, int, 0444); + +/** * DOC: tmz (int) * Trusted Memory Zone (TMZ) is a method to protect data being written * to or read from memory. @@ -870,11 +937,37 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on) module_param_named(tmz, amdgpu_tmz, int, 0444); /** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); -module_param_named(reset_method, amdgpu_reset_method, int, 0444); +module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644); /** * DOC: bad_page_threshold (int) Bad page threshold is specifies the @@ -882,7 +975,7 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0444); * result in the GPU entering bad status when the number of total * faulty pages by ECC exceeds the threshold value. */ -MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)"); +MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); @@ -915,6 +1008,13 @@ MODULE_PARM_DESC(umsch_mm, module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444); /** + * DOC: umsch_mm_fwlog (int) + * Enable umschfw log output for debugging, the default is disabled. + */ +MODULE_PARM_DESC(umsch_mm_fwlog, "Enable umschfw log(0 = disable (default value), 1 = enable)"); +module_param_named(umsch_mm_fwlog, amdgpu_umsch_mm_fwlog, int, 0444); + +/** * DOC: smu_pptable_id (int) * Used to override pptable id. id = 0 use VBIOS pptable. * id > 0 use the soft pptable with specicfied id. @@ -939,11 +1039,20 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); /** - * DOC: enforce_isolation (bool) - * enforce process isolation between graphics and compute via using the same reserved vmid. + * DOC: enforce_isolation (int) + * enforce process isolation between graphics and compute. + * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader) + */ +module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444); +MODULE_PARM_DESC(enforce_isolation, +"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)"); + +/** + * DOC: modeset (int) + * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto). */ -module_param(enforce_isolation, bool, 0444); -MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); +MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)"); +module_param_named(modeset, amdgpu_modeset, int, 0444); /** * DOC: seamless (int) @@ -961,9 +1070,14 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); * limits the VRAM size reported to ROCm applications to the visible * size, usually 256MB. * - 0x4: Disable GPU soft recovery, always do a full reset + * - 0x8: Use VRAM for firmware loading + * - 0x10: Enable ACA based RAS logging + * - 0x20: Enable experimental resets + * - 0x40: Disable ring resets + * - 0x80: Use VRAM for SMU pool */ MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); -module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); +module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); /** * DOC: agp (int) @@ -990,6 +1104,28 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); +/** + * DOC: rebar (int) + * Allow BAR resizing. Disable this to prevent the driver from attempting + * to resize the BAR if the GPU supports it and there is available MMIO space. + * Note that this just prevents the driver from resizing the BAR. The BIOS + * may have already resized the BAR at boot time. + */ +MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(rebar, amdgpu_rebar, int, 0444); + +/** + * DOC: user_queue (int) + * Enable user queues on systems that support user queues. Possible values: + * + * - -1 = auto (ASIC specific default) + * - 0 = user queues disabled + * - 1 = user queues enabled and kernel queues enabled (if supported) + * - 2 = user queues enabled and kernel queues disabled + */ +MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)"); +module_param_named(user_queue, amdgpu_user_queue, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ @@ -1703,7 +1839,6 @@ static const u16 amdgpu_unsupported_pciidlist[] = { }; static const struct pci_device_id pciidlist[] = { -#ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1776,8 +1911,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, {0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY}, -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU}, @@ -1860,7 +1993,6 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, -#endif /* topaz */ {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, @@ -2123,6 +2255,29 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: place fw in vram for frontdoor loading\n"); adev->debug_use_vram_fw_buf = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_RAS_ACA) { + pr_info("debug: enable RAS ACA\n"); + adev->debug_enable_ras_aca = true; + } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_EXP_RESETS) { + pr_info("debug: enable experimental reset features\n"); + adev->debug_exp_resets = true; + } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) { + pr_info("debug: ring reset disabled\n"); + adev->debug_disable_gpu_ring_reset = true; + } + if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) { + pr_info("debug: use vram for smu pool\n"); + adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM; + } + if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) { + pr_info("debug: VM mode debug for userptr is enabled\n"); + adev->debug_vm_userptr = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2150,6 +2305,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0, i; bool supports_atomic = false; + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + if (drm_firmware_drivers_only() && amdgpu_modeset == -1) + return -EINVAL; + } + /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { if (amdgpu_unsupported_pciidlist[i] == pdev->device) @@ -2182,14 +2343,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENOTSUPP; } + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: #ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: + if (!amdgpu_si_support) { dev_info(&pdev->dev, "SI support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2197,16 +2358,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without SI support.\n"); + return -ENODEV; #endif + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: #ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: + if (!amdgpu_cik_support) { dev_info(&pdev->dev, "CIK support provided by radeon.\n"); dev_info(&pdev->dev, @@ -2214,8 +2377,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ); return -ENODEV; } - } + break; +#else + dev_info(&pdev->dev, "amdgpu is built without CIK support.\n"); + return -ENODEV; #endif + default: + break; + } adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) @@ -2255,17 +2424,25 @@ retry_init: if (ret) goto err_pci; + ret = amdgpu_amdkfd_drm_client_create(adev); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors */ if (adev->mode_info.mode_config_initialized && !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) { + const struct drm_format_info *format; + /* select 8 bpp console on low vram cards */ if (adev->gmc.real_vram_size <= (32*1024*1024)) - drm_fbdev_generic_setup(adev_to_drm(adev), 8); + format = drm_format_info(DRM_FORMAT_C8); else - drm_fbdev_generic_setup(adev_to_drm(adev), 32); + format = NULL; + + drm_client_setup(adev_to_drm(adev), format); } ret = amdgpu_debugfs_init(adev); @@ -2330,6 +2507,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct amdgpu_device *adev = drm_to_adev(dev); amdgpu_xcp_dev_unplug(adev); + amdgpu_gmc_prepare_nps_mode_change(adev); drm_dev_unplug(dev); if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { @@ -2368,78 +2546,6 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) adev->mp1_state = PP_MP1_STATE_NONE; } -/** - * amdgpu_drv_delayed_reset_work_handler - work handler for reset - * - * @work: work_struct. - */ -static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) -{ - struct list_head device_list; - struct amdgpu_device *adev; - int i, r; - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - mutex_lock(&mgpu_info.mutex); - if (mgpu_info.pending_reset == true) { - mutex_unlock(&mgpu_info.mutex); - return; - } - mgpu_info.pending_reset = true; - mutex_unlock(&mgpu_info.mutex); - - /* Use a common context, just need to make sure full reset is done */ - reset_context.method = AMD_RESET_METHOD_NONE; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - for (i = 0; i < mgpu_info.num_dgpu; i++) { - adev = mgpu_info.gpu_ins[i].adev; - reset_context.reset_req_dev = adev; - r = amdgpu_device_pre_asic_reset(adev, &reset_context); - if (r) { - dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", - r, adev_to_drm(adev)->unique); - } - if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work)) - r = -EALREADY; - } - for (i = 0; i < mgpu_info.num_dgpu; i++) { - adev = mgpu_info.gpu_ins[i].adev; - flush_work(&adev->xgmi_reset_work); - adev->gmc.xgmi.pending_reset = false; - } - - /* reset function will rebuild the xgmi hive info , clear it now */ - for (i = 0; i < mgpu_info.num_dgpu; i++) - amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev); - - INIT_LIST_HEAD(&device_list); - - for (i = 0; i < mgpu_info.num_dgpu; i++) - list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list); - - /* unregister the GPU first, reset function will add them back */ - list_for_each_entry(adev, &device_list, reset_list) - amdgpu_unregister_gpu_instance(adev); - - /* Use a common context, just need to make sure full reset is done */ - set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - r = amdgpu_do_asic_reset(&device_list, &reset_context); - - if (r) { - DRM_ERROR("reinit gpus failure"); - return; - } - for (i = 0; i < mgpu_info.num_dgpu; i++) { - adev = mgpu_info.gpu_ins[i].adev; - if (!adev->kfd.init_complete) - amdgpu_amdkfd_device_init(adev); - amdgpu_ttm_set_buffer_funcs_status(adev, true); - } -} - static int amdgpu_pmops_prepare(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); @@ -2476,8 +2582,20 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) adev->in_s3 = true; - if (!adev->in_s0ix && !adev->in_s3) + if (!adev->in_s0ix && !adev->in_s3) { + /* don't allow going deep first time followed by s2idle the next time */ + if (adev->last_suspend_state != PM_SUSPEND_ON && + adev->last_suspend_state != pm_suspend_target_state) { + drm_err_once(drm_dev, "Unsupported suspend state %d\n", + pm_suspend_target_state); + return -EINVAL; + } return 0; + } + + /* cache the state last used for suspend */ + adev->last_suspend_state = pm_suspend_target_state; + return amdgpu_device_suspend(drm_dev, true); } @@ -2519,9 +2637,7 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int r; - adev->in_s4 = true; r = amdgpu_device_suspend(drm_dev, true); - adev->in_s4 = false; if (r) return r; @@ -2618,6 +2734,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev) return 0; } +static int amdgpu_runtime_idle_check_userq(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + ret = -EBUSY; + goto done; + } + } +done: + mutex_unlock(&adev->userq_mutex); + + return ret; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2633,6 +2772,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ret = amdgpu_runtime_idle_check_display(dev); if (ret) return ret; + ret = amdgpu_runtime_idle_check_userq(dev); + if (ret) + return ret; /* wait for all rings to drain before suspending */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { @@ -2646,7 +2788,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } adev->in_runpm = true; - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* @@ -2656,7 +2798,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) * platforms. * TODO: this may be also needed for PX capable platform. */ - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_UNLOAD; ret = amdgpu_device_prepare(drm_dev); @@ -2665,15 +2807,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; return ret; } - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ @@ -2682,9 +2824,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* nothing to do */ - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_enter(drm_dev); } @@ -2707,7 +2850,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!pci_device_is_present(adev->pdev)) adev->no_hw_access = true; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* Only need to handle PCI state in the driver for ATPX @@ -2719,22 +2862,23 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (ret) return ret; pci_set_master(pdev); - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ pci_set_master(pdev); - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) pci_disable_device(pdev); return ret; } - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; adev->in_runpm = false; return 0; @@ -2744,8 +2888,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ - int ret = 1; + int ret; if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); @@ -2753,12 +2896,30 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) } ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + goto done; + ret = amdgpu_runtime_idle_check_userq(dev); +done: pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); return ret; } +static int amdgpu_drm_release(struct inode *inode, struct file *filp) +{ + struct drm_file *file_priv = filp->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + if (fpriv) { + fpriv->evf_mgr.fd_closing = true; + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + } + + return drm_release(inode, filp); +} + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2810,7 +2971,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, .flush = amdgpu_flush, - .release = drm_release, + .release = amdgpu_drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, @@ -2821,6 +2982,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, #endif + .fop_flags = FOP_UNSIGNED_OFFSET, }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) @@ -2856,6 +3018,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct drm_driver amdgpu_kms_driver = { @@ -2866,11 +3031,11 @@ static const struct drm_driver amdgpu_kms_driver = { DRIVER_SYNCOBJ_TIMELINE, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, - .lastclose = amdgpu_driver_lastclose_kms, .ioctls = amdgpu_ioctls_kms, .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), .dumb_create = amdgpu_mode_dumb_create, .dumb_map_offset = amdgpu_mode_dumb_mmap, + DRM_FBDEV_TTM_DRIVER_OPS, .fops = &amdgpu_driver_kms_fops, .release = &amdgpu_driver_release_kms, #ifdef CONFIG_PROC_FS @@ -2881,7 +3046,6 @@ static const struct drm_driver amdgpu_kms_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = KMS_DRIVER_MAJOR, .minor = KMS_DRIVER_MINOR, .patchlevel = KMS_DRIVER_PATCHLEVEL, @@ -2893,11 +3057,11 @@ const struct drm_driver amdgpu_partition_driver = { DRIVER_SYNCOBJ_TIMELINE, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, - .lastclose = amdgpu_driver_lastclose_kms, .ioctls = amdgpu_ioctls_kms, .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), .dumb_create = amdgpu_mode_dumb_create, .dumb_map_offset = amdgpu_mode_dumb_mmap, + DRM_FBDEV_TTM_DRIVER_OPS, .fops = &amdgpu_driver_kms_fops, .release = &amdgpu_driver_release_kms, @@ -2905,7 +3069,6 @@ const struct drm_driver amdgpu_partition_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = KMS_DRIVER_MAJOR, .minor = KMS_DRIVER_MINOR, .patchlevel = KMS_DRIVER_PATCHLEVEL, @@ -2940,9 +3103,6 @@ static int __init amdgpu_init(void) { int r; - if (drm_firmware_drivers_only()) - return -EINVAL; - r = amdgpu_sync_init(); if (r) goto error_sync; @@ -2951,6 +3111,10 @@ static int __init amdgpu_init(void) if (r) goto error_fence; + r = amdgpu_userq_fence_slab_init(); + if (r) + goto error_fence; + DRM_INFO("amdgpu kernel modesetting enabled.\n"); amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); @@ -2958,6 +3122,12 @@ static int __init amdgpu_init(void) /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ amdgpu_amdkfd_init(); + if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + pr_crit("Overdrive is enabled, please disable it before " + "reporting any bugs unrelated to overdrive.\n"); + } + /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); @@ -2976,6 +3146,7 @@ static void __exit amdgpu_exit(void) amdgpu_acpi_release(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } |