diff options
50 files changed, 477 insertions, 175 deletions
diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index 136c4e333be7..786c618ba3be 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -11,9 +11,11 @@ systems. Some systems use variants that don't meet branding requirements, and so are not advertised as being I2C but come under different names, e.g. TWI (Two Wire Interface), IIC. -The official I2C specification is the `"I2C-bus specification and user -manual" (UM10204) <https://www.nxp.com/docs/en/user-guide/UM10204.pdf>`_ -published by NXP Semiconductors. +The latest official I2C specification is the `"I2C-bus specification and user +manual" (UM10204) <https://www.nxp.com/webapp/Download?colCode=UM10204>`_ +published by NXP Semiconductors. However, you need to log-in to the site to +access the PDF. An older version of the specification (revision 6) is archived +`here <https://web.archive.org/web/20210813122132/https://www.nxp.com/docs/en/user-guide/UM10204.pdf>`_. SMBus (System Management Bus) is based on the I2C protocol, and is mostly a subset of I2C protocols and signaling. Many I2C devices will work on an diff --git a/MAINTAINERS b/MAINTAINERS index fb18ce7168aa..dd36acc87ce6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3769,7 +3769,8 @@ S: Supported F: drivers/net/wireless/broadcom/brcm80211/ BROADCOM BRCMSTB GPIO DRIVER -M: Gregory Fong <gregory.0xf0@gmail.com> +M: Doug Berger <opendmb@gmail.com> +M: Florian Fainelli <f.fainelli@gmail.com> L: bcm-kernel-feedback-list@broadcom.com S: Supported F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Gobble Gobble # *DOCUMENTATION* diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index c736cf2ac76b..e2c5b296120d 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -68,7 +68,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "(__parainstructions|__alt_instructions)(_end)?|" "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|" "__(start|end)_pci_.*|" -#if CONFIG_FW_LOADER_BUILTIN +#if CONFIG_FW_LOADER "__(start|end)_builtin_fw|" #endif "__(start|stop)___ksymtab(_gpl)?|" diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index 3d6ef37a7702..b3a9b8488f11 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c +++ b/drivers/gpio/gpio-aspeed-sgpio.c @@ -395,7 +395,7 @@ static void aspeed_sgpio_irq_handler(struct irq_desc *desc) reg = ioread32(bank_reg(data, bank, reg_irq_status)); for_each_set_bit(p, ®, 32) - generic_handle_domain_irq(gc->irq.domain, i * 32 + p * 2); + generic_handle_domain_irq(gc->irq.domain, (i * 32 + p) * 2); } chained_irq_exit(ic, desc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e651b959141..694c3726e0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3166,6 +3166,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) { switch (asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + case CHIP_TOPAZ: + /* chips with no display hardware */ + return false; #if defined(CONFIG_DRM_AMD_DC) case CHIP_TAHITI: case CHIP_PITCAIRN: @@ -4461,7 +4467,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, j, r = 0; + int i, r = 0; struct amdgpu_job *job = NULL; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -4483,15 +4489,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, /*clear job fence from fence drv to avoid force_completion *leave NULL and vm flush fence in fence drv */ - for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) { - struct dma_fence *old, **ptr; + amdgpu_fence_driver_clear_job_fences(ring); - ptr = &ring->fence_drv.fences[j]; - old = rcu_dereference_protected(*ptr, 1); - if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) { - RCU_INIT_POINTER(*ptr, NULL); - } - } /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ea00090b3fb3..bcc9343353b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -526,10 +526,15 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) } } +union gc_info { + struct gc_info_v1_0 v1; + struct gc_info_v2_0 v2; +}; + int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { struct binary_header *bhdr; - struct gc_info_v1_0 *gc_info; + union gc_info *gc_info; if (!adev->mman.discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); @@ -537,28 +542,55 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) } bhdr = (struct binary_header *)adev->mman.discovery_bin; - gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin + + gc_info = (union gc_info *)(adev->mman.discovery_bin + le16_to_cpu(bhdr->table_list[GC].offset)); - - adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); - adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + - le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); - adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); - adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); - adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); - adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); - adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); - adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); - adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); - adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); - adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); - adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); - adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); - adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / - le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); - + switch (gc_info->v1.header.version_major) { + case 1: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se); + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) + + le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa)); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); + break; + case 2: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); + break; + default: + dev_err(adev->dev, + "Unhandled GC info table %d.%d\n", + gc_info->v1.header.version_major, + gc_info->v1.header.version_minor); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ae6ab93c868b..7444484a12bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -384,7 +384,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; - if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM) + if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; r = ttm_bo_validate(&bo->tbo, &placement, &ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad95de6399af..86ca80da9eea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -328,10 +328,11 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); /** * DOC: runpm (int) - * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down - * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. + * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down + * the dGPUs when they are idle if supported. The default is -1 (auto enable). + * Setting the value to 0 disables this functionality. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -2153,7 +2154,10 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s3 = true; r = amdgpu_device_suspend(drm_dev, true); adev->in_s3 = false; - + if (r) + return r; + if (!adev->in_s0ix) + r = amdgpu_asic_reset(adev); return r; } @@ -2234,12 +2238,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* + * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some + * proper cleanups and put itself into a state ready for PNP. That + * can address some random resuming failure observed on BOCO capable + * platforms. + * TODO: this may be also needed for PX capable platform. + */ + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; return ret; } + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; + if (amdgpu_device_supports_px(drm_dev)) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3b7e86ea7167..9afd11ca2709 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void) * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; +static const struct dma_fence_ops amdgpu_job_fence_ops; static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) { struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); - if (__f->base.ops == &amdgpu_fence_ops) + if (__f->base.ops == &amdgpu_fence_ops || + __f->base.ops == &amdgpu_job_fence_ops) return __f; return NULL; @@ -158,19 +160,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd } seq = ++ring->fence_drv.sync_seq; - if (job != NULL && job->job_run_counter) { + if (job && job->job_run_counter) { /* reinit seq for resubmitted jobs */ fence->seqno = seq; } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, - seq); - } - - if (job != NULL) { - /* mark this fence has a parent job */ - set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags); + if (job) + dma_fence_init(fence, &amdgpu_job_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); + else + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, @@ -621,6 +622,25 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) } /** + * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring + * + * @ring: fence of the ring to be cleared + * + */ +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) +{ + int i; + struct dma_fence *old, **ptr; + + for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { + ptr = &ring->fence_drv.fences[i]; + old = rcu_dereference_protected(*ptr, 1); + if (old && old->ops == &amdgpu_job_fence_ops) + RCU_INIT_POINTER(*ptr, NULL); + } +} + +/** * amdgpu_fence_driver_force_completion - force signal latest fence of ring * * @ring: fence of the ring to signal @@ -643,16 +663,14 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_ring *ring; + return (const char *)to_amdgpu_fence(f)->ring->name; +} - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); +static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } - return (const char *)ring->name; + return (const char *)to_amdgpu_ring(job->base.sched)->name; } /** @@ -665,18 +683,25 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) */ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_ring *ring; + if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + return true; +} - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } +/** + * amdgpu_job_fence_enable_signaling - enable signalling on job fence + * @f: fence + * + * This is the simliar function with amdgpu_fence_enable_signaling above, it + * only handles the job embedded fence. + */ +static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - if (!timer_pending(&ring->fence_drv.fallback_timer)) - amdgpu_fence_schedule_fallback(ring); + if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); return true; } @@ -692,19 +717,23 @@ static void amdgpu_fence_free(struct rcu_head *rcu) { struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - /* free job if fence has a parent job */ - struct amdgpu_job *job; - - job = container_of(f, struct amdgpu_job, hw_fence); - kfree(job); - } else { /* free fence_slab if it's separated fence*/ - struct amdgpu_fence *fence; + kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); +} - fence = to_amdgpu_fence(f); - kmem_cache_free(amdgpu_fence_slab, fence); - } +/** + * amdgpu_job_fence_free - free up the job with embedded fence + * + * @rcu: RCU callback head + * + * Free up the job with embedded fence after the RCU grace period. + */ +static void amdgpu_job_fence_free(struct rcu_head *rcu) +{ + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); + + /* free job if fence has a parent job */ + kfree(container_of(f, struct amdgpu_job, hw_fence)); } /** @@ -720,6 +749,19 @@ static void amdgpu_fence_release(struct dma_fence *f) call_rcu(&f->rcu, amdgpu_fence_free); } +/** + * amdgpu_job_fence_release - callback that job embedded fence can be freed + * + * @f: fence + * + * This is the simliar function with amdgpu_fence_release above, it + * only handles the job embedded fence. + */ +static void amdgpu_job_fence_release(struct dma_fence *f) +{ + call_rcu(&f->rcu, amdgpu_job_fence_free); +} + static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, @@ -727,6 +769,12 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; +static const struct dma_fence_ops amdgpu_job_fence_ops = { + .get_driver_name = amdgpu_fence_get_driver_name, + .get_timeline_name = amdgpu_job_fence_get_timeline_name, + .enable_signaling = amdgpu_job_fence_enable_signaling, + .release = amdgpu_job_fence_release, +}; /* * Fence debugfs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4d380e79752c..fae7d185ad0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -53,9 +53,6 @@ enum amdgpu_ring_priority_level { #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) -/* fence flag bit to indicate the face is embedded in job*/ -#define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1) - #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) #define AMDGPU_IB_POOL_SIZE (1024 * 1024) @@ -114,6 +111,7 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index d54d720b3cf6..3799226defc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -246,6 +246,13 @@ static int vcn_v1_0_suspend(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool idle_work_unexecuted; + + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); + if (idle_work_unexecuted) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + } r = vcn_v1_0_hw_fini(adev); if (r) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index f4c9a458ace8..9df38e2ee4f4 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -158,6 +158,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, union display_idle_optimization_u idle_info = { 0 }; idle_info.idle_info.df_request_disabled = 1; idle_info.idle_info.phy_ref_clk_off = 1; + idle_info.idle_info.s0i2_rdy = 1; dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); /* update power state */ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index c8457babfdea..c0bdc23702c8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3945,12 +3945,9 @@ static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst; #if defined(CONFIG_DRM_AMD_DC_DCN) config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA; - + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY || pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - link_enc = pipe_ctx->stream->link->link_enc; - config.dio_output_type = pipe_ctx->stream->link->ep_type; - config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY) link_enc = pipe_ctx->stream->link->link_enc; else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index 34001a30d449..10e613ec7d24 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -78,6 +78,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 3883f918b3bb..83f5d9aaffcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1069,7 +1069,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0fa381088d1d..faec0297ec0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -603,7 +603,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d452a0d1777e..79313d1ab5d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -874,7 +874,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 79a66e0c4303..98852b586295 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -840,7 +840,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index fbaa03f26d8b..e472b729d869 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index fcf96cf08c76..16e7059393fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -211,7 +211,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 4a9b64023675..87cec14b7870 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 4f6e639e9353..17e2f2bb29ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -101,6 +101,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 18896294ae12..27afbe6ec0fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -355,6 +355,14 @@ static const struct dce110_clk_src_regs clk_src_regs[] = { clk_src_regs(3, D), clk_src_regs(4, E) }; +/*pll_id being rempped in dmub, in driver it is logical instance*/ +static const struct dce110_clk_src_regs clk_src_regs_b0[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, F), + clk_src_regs(3, G), + clk_src_regs(4, E) +}; static const struct dce110_clk_src_shift cs_shift = { CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) @@ -994,7 +1002,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -2276,14 +2284,27 @@ static bool dcn31_resource_construct( dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + /*move phypllx_pixclk_resync to dmub next*/ + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs_b0[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs_b0[3], false); + } else { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL3, &clk_src_regs[3], false); + } + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL4, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 416fe7a721d8..a513363b3326 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -49,4 +49,35 @@ struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); +/*temp: B0 specific before switch to dcn313 headers*/ +#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL +#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e +#define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1 +#define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f +#define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1 + +//PHYPLLF_PIXCLK_RESYNC_CNTL +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L + +//PHYPLLG_PIXCLK_RESYNC_CNTL +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L +#endif #endif /* _DCN31_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 7ec4331e67f2..a486769b66c6 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -143,6 +143,55 @@ struct gc_info_v1_0 { uint32_t gc_num_gl2a; }; +struct gc_info_v1_1 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; +}; + +struct gc_info_v2_0 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_packer_per_sc; +}; + typedef struct harvest_info_header { uint32_t signature; /* Table Signature */ uint32_t version; /* Table Version */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8a3244585d80..8a817932acdf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1568,9 +1568,7 @@ static int smu_suspend(void *handle) smu->watermarks_bitmap &= ~(WATERMARKS_LOADED); - /* skip CGPG when in S0ix */ - if (smu->is_apu && !adev->in_s0ix) - smu_set_gfx_cgpg(&adev->smu, false); + smu_set_gfx_cgpg(&adev->smu, false); return 0; } @@ -1601,8 +1599,7 @@ static int smu_resume(void *handle) return ret; } - if (smu->is_apu) - smu_set_gfx_cgpg(&adev->smu, true); + smu_set_gfx_cgpg(&adev->smu, true); smu->disable_uclk_switch = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 43028f2cd28b..9c91e79c955f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -120,7 +120,8 @@ int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate) int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) { - if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + /* Until now the SMU12 only implemented for Renoir series so here neen't do APU check. */ + if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) || smu->adev->in_s0ix) return 0; return smu_cmn_send_smc_msg_with_param(smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 59a7d276541d..7d50827cf0a8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1621,7 +1621,7 @@ static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en) { return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GmiPwrDnControl, - en ? 1 : 0, + en ? 0 : 1, NULL); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..c37c9f0d8167 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -564,6 +564,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, container_of_user(base, typeof(*ext), base); const struct set_proto_ctx_engines *set = data; struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance prev_engine; u64 flags; int err = 0, n, i, j; u16 slot, width, num_siblings; @@ -629,7 +630,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, /* Create contexts / engines */ for (i = 0; i < width; ++i) { intel_engine_mask_t current_mask = 0; - struct i915_engine_class_instance prev_engine; for (j = 0; j < num_siblings; ++j) { struct i915_engine_class_instance ci; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9b24d9b5ade1..cb0bf6ffd0e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3017,7 +3017,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context, - eb->context->parallel.seqno, + eb->context->parallel.seqno++, false); if (!fence_array) { kfree(fences); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..0ae416aa76dc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -353,15 +353,22 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (ret) return ret; - } - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); + fobj = NULL; + } else { + fobj = dma_resv_shared_list(resv); + } - if (fence) { + /* Waiting for the exclusive fence first causes performance regressions + * under some circumstances. So manually wait for the shared ones first. + */ + for (i = 0; i < (fobj ? fobj->shared_count : 0) && !ret; ++i) { struct nouveau_channel *prev = NULL; bool must_wait = true; + fence = rcu_dereference_protected(fobj->shared[i], + dma_resv_held(resv)); + f = nouveau_local_fence(fence, chan->drm); if (f) { rcu_read_lock(); @@ -373,20 +380,13 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (must_wait) ret = dma_fence_wait(fence, intr); - - return ret; } - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { + fence = dma_resv_excl_fence(resv); + if (fence) { struct nouveau_channel *prev = NULL; bool must_wait = true; - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(resv)); - f = nouveau_local_fence(fence, chan->drm); if (f) { rcu_read_lock(); @@ -398,6 +398,8 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (must_wait) ret = dma_fence_wait(fence, intr); + + return ret; } return ret; diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index bce0e8bb7852..cf5d049342ea 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -535,6 +535,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo sizeof(rdwr_arg))) return -EFAULT; + if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0) + return -EINVAL; + if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c index 429411c6c0a8..a85a4f33aea8 100644 --- a/drivers/input/joystick/spaceball.c +++ b/drivers/input/joystick/spaceball.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/input.h> #include <linux/serio.h> +#include <asm/unaligned.h> #define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver" @@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball) case 'D': /* Ball data */ if (spaceball->idx != 15) return; - for (i = 0; i < 6; i++) + /* + * Skip first three bytes; read six axes worth of data. + * Axis values are signed 16-bit big-endian. + */ + data += 3; + for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) { input_report_abs(dev, spaceball_axes[i], - (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2])); + (__s16)get_unaligned_be16(&data[i * 2])); + } break; case 'K': /* Button data */ diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index bfa26651c0be..627048bc6a12 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface, set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); set_bit(BTN_LEFT, input_dev->keybit); + INIT_WORK(&dev->work, atp_reinit); + error = input_register_device(dev->input); if (error) goto err_free_buffer; @@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface, /* save our data pointer in this interface device */ usb_set_intfdata(iface, dev); - INIT_WORK(&dev->work, atp_reinit); - return 0; err_free_buffer: diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 284b939fb1ea..059dae8909ee 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3100,6 +3100,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; + char *tmp_persistent_address = conn->persistent_address; + char *tmp_local_ipaddr = conn->local_ipaddr; del_timer_sync(&conn->transport_timer); @@ -3121,8 +3123,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) spin_lock_bh(&session->frwd_lock); free_pages((unsigned long) conn->data, get_order(ISCSI_DEF_MAX_RECV_SEG_LEN)); - kfree(conn->persistent_address); - kfree(conn->local_ipaddr); /* regular RX path uses back_lock */ spin_lock_bh(&session->back_lock); kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task, @@ -3134,6 +3134,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) mutex_unlock(&session->eh_mutex); iscsi_destroy_conn(cls_conn); + kfree(tmp_persistent_address); + kfree(tmp_local_ipaddr); } EXPORT_SYMBOL_GPL(iscsi_conn_teardown); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index bd6d459afce5..08b2e85dcd7d 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2954,8 +2954,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf, char mybuf[64]; char *pbuf; - if (nbytes > 64) - nbytes = 64; + if (nbytes > 63) + nbytes = 63; memset(mybuf, 0, sizeof(mybuf)); diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index c2ba65224633..1f037b8ab904 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -586,9 +586,12 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, * Commands like INQUIRY may transfer less data than * requested by the initiator via bufflen. Set residual * count to make upper layer aware of the actual amount - * of data returned. + * of data returned. There are cases when controller + * returns zero dataLen with non zero data - do not set + * residual count in that case. */ - scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); + if (e->dataLen && (e->dataLen < scsi_bufflen(cmd))) + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); cmd->result = (DID_OK << 16); break; diff --git a/fs/namespace.c b/fs/namespace.c index 659a8f39c61a..b696543adab8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4263,12 +4263,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, return err; err = user_path_at(dfd, path, kattr.lookup_flags, &target); - if (err) - return err; - - err = do_mount_setattr(&target, &kattr); + if (!err) { + err = do_mount_setattr(&target, &kattr); + path_put(&target); + } finish_mount_kattr(&kattr); - path_put(&target); return err; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 58e744b78c2c..936dc0b6c226 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -277,6 +277,7 @@ enum vmscan_throttle_state { VMSCAN_THROTTLE_WRITEBACK, VMSCAN_THROTTLE_ISOLATED, VMSCAN_THROTTLE_NOPROGRESS, + VMSCAN_THROTTLE_CONGESTED, NR_VMSCAN_THROTTLE, }; diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index f25a6149d3ba..ca2e9009a651 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -30,12 +30,14 @@ #define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) #define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED) #define _VMSCAN_THROTTLE_NOPROGRESS (1 << VMSCAN_THROTTLE_NOPROGRESS) +#define _VMSCAN_THROTTLE_CONGESTED (1 << VMSCAN_THROTTLE_CONGESTED) #define show_throttle_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \ {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"}, \ - {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"} \ + {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"}, \ + {_VMSCAN_THROTTLE_CONGESTED, "VMSCAN_THROTTLE_CONGESTED"} \ ) : "VMSCAN_THROTTLE_NONE" diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 4fbd729edc9e..ad65436756af 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -353,6 +353,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct damon_ctx *ctx = file->private_data; + struct damon_target *t, *next_t; bool id_is_pid = true; char *kbuf, *nrs; unsigned long *targets; @@ -397,8 +398,12 @@ static ssize_t dbgfs_target_ids_write(struct file *file, goto unlock_out; } - /* remove targets with previously-set primitive */ - damon_set_targets(ctx, NULL, 0); + /* remove previously set targets */ + damon_for_each_target_safe(t, next_t, ctx) { + if (targetid_is_pid(ctx)) + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } /* Configure the context for the address space type */ if (id_is_pid) diff --git a/mm/vmscan.c b/mm/vmscan.c index fb9584641ac7..700434db5735 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1021,6 +1021,39 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } +static bool skip_throttle_noprogress(pg_data_t *pgdat) +{ + int reclaimable = 0, write_pending = 0; + int i; + + /* + * If kswapd is disabled, reschedule if necessary but do not + * throttle as the system is likely near OOM. + */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + + /* + * If there are a lot of dirty/writeback pages then do not + * throttle as throttling will occur when the pages cycle + * towards the end of the LRU if still under writeback. + */ + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zone *zone = pgdat->node_zones + i; + + if (!populated_zone(zone)) + continue; + + reclaimable += zone_reclaimable_pages(zone); + write_pending += zone_page_state_snapshot(zone, + NR_ZONE_WRITE_PENDING); + } + if (2 * write_pending <= reclaimable) + return true; + + return false; +} + void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) { wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; @@ -1056,8 +1089,16 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) } break; + case VMSCAN_THROTTLE_CONGESTED: + fallthrough; case VMSCAN_THROTTLE_NOPROGRESS: - timeout = HZ/2; + if (skip_throttle_noprogress(pgdat)) { + cond_resched(); + return; + } + + timeout = 1; + break; case VMSCAN_THROTTLE_ISOLATED: timeout = HZ/50; @@ -3321,7 +3362,7 @@ again: if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc)) @@ -3386,16 +3427,16 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) } /* - * Do not throttle kswapd on NOPROGRESS as it will throttle on - * VMSCAN_THROTTLE_WRITEBACK if there are too many pages under - * writeback and marked for immediate reclaim at the tail of - * the LRU. + * Do not throttle kswapd or cgroup reclaim on NOPROGRESS as it will + * throttle on VMSCAN_THROTTLE_WRITEBACK if there are too many pages + * under writeback and marked for immediate reclaim at the tail of the + * LRU. */ - if (current_is_kswapd()) + if (current_is_kswapd() || cgroup_reclaim(sc)) return; /* Throttle if making no progress at high prioities. */ - if (sc->priority < DEF_PRIORITY - 2) + if (sc->priority == 1 && !sc->nr_reclaimed) reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS); } @@ -3415,6 +3456,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) unsigned long nr_soft_scanned; gfp_t orig_mask; pg_data_t *last_pgdat = NULL; + pg_data_t *first_pgdat = NULL; /* * If the number of buffer_heads in the machine exceeds the maximum @@ -3478,14 +3520,19 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) /* need some check for avoid more shrink_zone() */ } + if (!first_pgdat) + first_pgdat = zone->zone_pgdat; + /* See comment about same check for global reclaim above */ if (zone->zone_pgdat == last_pgdat) continue; last_pgdat = zone->zone_pgdat; shrink_node(zone->zone_pgdat, sc); - consider_reclaim_throttle(zone->zone_pgdat, sc); } + if (first_pgdat) + consider_reclaim_throttle(first_pgdat, sc); + /* * Restore to original mask to avoid the impact on the caller if we * promoted it to __GFP_HIGHMEM. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9434367af166..c82b033e8942 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2473,7 +2473,7 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; - if (scripting_ops && scripting_ops->process_switch) + if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample)) scripting_ops->process_switch(event, sample, machine); if (!script->show_switch_events) diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py index 1d3a189a9a54..66452a8ec358 100644 --- a/tools/perf/scripts/python/intel-pt-events.py +++ b/tools/perf/scripts/python/intel-pt-events.py @@ -32,8 +32,7 @@ try: except: broken_pipe_exception = IOError -glb_switch_str = None -glb_switch_printed = True +glb_switch_str = {} glb_insn = False glb_disassembler = None glb_src = False @@ -70,6 +69,7 @@ def trace_begin(): ap = argparse.ArgumentParser(usage = "", add_help = False) ap.add_argument("--insn-trace", action='store_true') ap.add_argument("--src-trace", action='store_true') + ap.add_argument("--all-switch-events", action='store_true') global glb_args global glb_insn global glb_src @@ -256,10 +256,6 @@ def print_srccode(comm, param_dict, sample, symbol, dso, with_insn): print(start_str, src_str) def do_process_event(param_dict): - global glb_switch_printed - if not glb_switch_printed: - print(glb_switch_str) - glb_switch_printed = True event_attr = param_dict["attr"] sample = param_dict["sample"] raw_buf = param_dict["raw_buf"] @@ -274,6 +270,11 @@ def do_process_event(param_dict): dso = get_optional(param_dict, "dso") symbol = get_optional(param_dict, "symbol") + cpu = sample["cpu"] + if cpu in glb_switch_str: + print(glb_switch_str[cpu]) + del glb_switch_str[cpu] + if name[0:12] == "instructions": if glb_src: print_srccode(comm, param_dict, sample, symbol, dso, True) @@ -336,8 +337,6 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): sys.exit(1) def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x): - global glb_switch_printed - global glb_switch_str if out: out_str = "Switch out " else: @@ -350,6 +349,10 @@ def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_pree machine_str = "" else: machine_str = "machine PID %d" % machine_pid - glb_switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \ + switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \ (out_str, pid, tid, cpu, ts / 1000000000, ts %1000000000, np_pid, np_tid, machine_str, preempt_str) - glb_switch_printed = False + if glb_args.all_switch_events: + print(switch_str); + else: + global glb_switch_str + glb_switch_str[cpu] = switch_str diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index e9bfe856a5de..b1be59b4e2a4 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -170,9 +170,11 @@ void ui__exit(bool wait_for_ok) "Press any key...", 0); SLtt_set_cursor_visibility(1); - SLsmg_refresh(); - SLsmg_reset_smg(); + if (!pthread_mutex_trylock(&ui__lock)) { + SLsmg_refresh(); + SLsmg_reset_smg(); + pthread_mutex_unlock(&ui__lock); + } SLang_reset_tty(); - perf_error__unregister(&perf_tui_eops); } diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 254601060b39..666b59baeb70 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -66,7 +66,12 @@ static bool key_equal(const void *key1, const void *key2, struct hashmap *ids__new(void) { - return hashmap__new(key_hash, key_equal, NULL); + struct hashmap *hash; + + hash = hashmap__new(key_hash, key_equal, NULL); + if (IS_ERR(hash)) + return NULL; + return hash; } void ids__free(struct hashmap *ids) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 10c3187e4c5a..e8613cbda331 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3625,6 +3625,7 @@ static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) *args = p; return 0; } + p += 1; while (1) { vmcs = strtoull(p, &p, 0); if (errno) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6ae58406f4fc..8dfbba15aeb8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1659,6 +1659,21 @@ bool is_pmu_core(const char *name) return !strcmp(name, "cpu") || is_arm_pmu_core(name); } +static bool pmu_alias_is_duplicate(struct sevent *alias_a, + struct sevent *alias_b) +{ + /* Different names -> never duplicates */ + if (strcmp(alias_a->name, alias_b->name)) + return false; + + /* Don't remove duplicates for hybrid PMUs */ + if (perf_pmu__is_hybrid(alias_a->pmu) && + perf_pmu__is_hybrid(alias_b->pmu)) + return false; + + return true; +} + void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, bool long_desc, bool details_flag, bool deprecated, const char *pmu_name) @@ -1744,12 +1759,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) { - if (!aliases[j].pmu || !aliases[j - 1].pmu || - !strcmp(aliases[j].pmu, aliases[j - 1].pmu)) { - continue; - } - } + if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) + continue; if (name_only) { printf("%s ", aliases[j].name); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 8a09057d2f22..9354a5e0321c 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -87,7 +87,7 @@ static bool test_uffdio_minor = false; static bool map_shared; static int shm_fd; -static int huge_fd; +static int huge_fd = -1; /* only used for hugetlb_shared test */ static char *huge_fd_off0; static unsigned long long *count_verify; static int uffd = -1; @@ -223,6 +223,9 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) static void hugetlb_release_pages(char *rel_area) { + if (huge_fd == -1) + return; + if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, rel_area == huge_fd_off0 ? 0 : nr_pages * page_size, nr_pages * page_size)) @@ -235,16 +238,17 @@ static void hugetlb_allocate_area(void **alloc_area) char **alloc_area_alias; *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | - MAP_HUGETLB, - huge_fd, *alloc_area == area_src ? 0 : - nr_pages * page_size); + map_shared ? MAP_SHARED : + MAP_PRIVATE | MAP_HUGETLB | + (*alloc_area == area_src ? 0 : MAP_NORESERVE), + huge_fd, + *alloc_area == area_src ? 0 : nr_pages * page_size); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); if (map_shared) { area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_HUGETLB, + MAP_SHARED, huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) |