diff options
author | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2022-11-14 14:32:34 -0500 |
---|---|---|
committer | Rodrigo Vivi <rodrigo.vivi@intel.com> | 2022-11-14 14:32:34 -0500 |
commit | 002c6ca75289a4ac4f6738213dd2d258704886e4 (patch) | |
tree | d99ec659974b2121cef3e06f54a97d719d0a94dd /drivers/gpu/drm/i915/gem | |
parent | 801543b2593b04eef974a73d3ea03e7efbd5ffae (diff) | |
parent | c02f20d38fb90eba606277874581db124ace42c4 (diff) |
Merge drm/drm-next into drm-intel-next
Catch up on 6.1-rc cycle in order to solve the intel_backlight
conflict on linux-next.
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
26 files changed, 830 insertions, 319 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dabdfe09f5e5..01402f3c58f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1269,6 +1269,10 @@ static void i915_gem_context_release_work(struct work_struct *work) trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); + if (ctx->syncobj) drm_syncobj_put(ctx->syncobj); @@ -1383,14 +1387,8 @@ kill_engines(struct i915_gem_engines *engines, bool exit, bool persistent) */ for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; - bool skip = false; - - if (exit) - skip = intel_context_set_exiting(ce); - else if (!persistent) - skip = intel_context_exit_nonpersistent(ce, NULL); - if (skip) + if ((exit || !persistent) && intel_context_revoke(ce)) continue; /* Already marked. */ /* @@ -1454,7 +1452,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, int err; /* serialises with execbuf */ - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + intel_context_close(ce); if (!intel_context_pin_if_active(ce)) continue; @@ -1521,10 +1519,6 @@ static void context_close(struct i915_gem_context *ctx) ctx->file_priv = ERR_PTR(-EBADF); - spin_lock(&ctx->i915->gem.contexts.lock); - list_del(&ctx->link); - spin_unlock(&ctx->i915->gem.contexts.lock); - client = ctx->client; if (client) { spin_lock(&client->ctx_lock); @@ -2304,7 +2298,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, } args->ctx_id = id; - drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index f5062d0c6333..ec6f7ae47783 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -25,43 +25,44 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) return to_intel_bo(buf->priv); } -static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, +static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - struct sg_table *st; + struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf); + struct sg_table *sgt; struct scatterlist *src, *dst; int ret, i; - /* Copy sg so that we make an independent mapping */ - st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (st == NULL) { + /* + * Make a copy of the object's sgt, so that we can make an independent + * mapping + */ + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { ret = -ENOMEM; goto err; } - ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL); if (ret) goto err_free; - src = obj->mm.pages->sgl; - dst = st->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { + dst = sgt->sgl; + for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); - src = sg_next(src); } - ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); if (ret) goto err_free_sg; - return st; + return sgt; err_free_sg: - sg_free_table(st); + sg_free_table(sgt); err_free: - kfree(st); + kfree(sgt); err: return ERR_PTR(ret); } @@ -72,7 +73,7 @@ static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); void *vaddr; - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -236,15 +237,15 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *pages; + struct sg_table *sgt; unsigned int sg_page_sizes; assert_object_held(obj); - pages = dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + sgt = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); /* * DG1 is special here since it still snoops transactions even with @@ -261,16 +262,16 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) (!HAS_LLC(i915) && !IS_DG1(i915))) wbinvd_on_all_cpus(); - sg_page_sizes = i915_sg_dma_sizes(pages->sgl); - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + sg_page_sizes = i915_sg_dma_sizes(sgt->sgl); + __i915_gem_object_set_pages(obj, sgt, sg_page_sizes); return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *sgt) { - dma_buf_unmap_attachment(obj->base.import_attach, pages, + dma_buf_unmap_attachment(obj->base.import_attach, sgt, DMA_BIDIRECTIONAL); } @@ -313,7 +314,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, get_dma_buf(dma_buf); obj = i915_gem_object_alloc(); - if (obj == NULL) { + if (!obj) { ret = -ENOMEM; goto fail_detach; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 1674b0c5802b..d44a152ce680 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -397,7 +397,7 @@ struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, u32 alignment, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, unsigned int flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); @@ -434,7 +434,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ vma = ERR_PTR(-ENOSPC); if ((flags & PIN_MAPPABLE) == 0 && - (!view || view->type == I915_GGTT_VIEW_NORMAL)) + (!view || view->type == I915_GTT_VIEW_NORMAL)) vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, flags | PIN_MAPPABLE | PIN_NONBLOCK); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c3e41295b6b5..ffa30d2dd47f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2425,7 +2425,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, /* Check whether the file_priv has already selected one ring. */ if ((int)file_priv->bsd_engine < 0) file_priv->bsd_engine = - get_random_int() % num_vcs_engines(dev_priv); + prandom_u32_max(num_vcs_engines(dev_priv)); return file_priv->bsd_engine; } @@ -2955,11 +2955,6 @@ await_fence_array(struct i915_execbuffer *eb, int err; for (n = 0; n < eb->num_fences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - if (!eb->fences[n].dma_fence) continue; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..629acb403a2c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include <linux/scatterlist.h> #include <linux/slab.h> -#include <linux/swiotlb.h> #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, get_order(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 1b88ea13435c..5a7a14e85c3f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -12,8 +12,6 @@ struct drm_i915_private; struct drm_i915_gem_object; struct intel_memory_region; -extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; - void __iomem * i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, unsigned long n, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 0c5c43852e24..e63329bc8065 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -194,17 +194,17 @@ int i915_gem_mmap_gtt_version(void) return 4; } -static inline struct i915_ggtt_view +static inline struct i915_gtt_view compute_partial_view(const struct drm_i915_gem_object *obj, pgoff_t page_offset, unsigned int chunk) { - struct i915_ggtt_view view; + struct i915_gtt_view view; if (i915_gem_object_is_tiled(obj)) chunk = roundup(chunk, tile_row_pages(obj) ?: 1); - view.type = I915_GGTT_VIEW_PARTIAL; + view.type = I915_GTT_VIEW_PARTIAL; view.partial.offset = rounddown(page_offset, chunk); view.partial.size = min_t(unsigned int, chunk, @@ -212,7 +212,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj, /* If the partial covers the entire object, just create a normal VMA. */ if (chunk >= obj->base.size >> PAGE_SHIFT) - view.type = I915_GGTT_VIEW_NORMAL; + view.type = I915_GTT_VIEW_NORMAL; return view; } @@ -341,12 +341,12 @@ retry: PIN_NOEVICT); if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { /* Use a partial view if it is bigger than available space */ - struct i915_ggtt_view view = + struct i915_gtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); unsigned int flags; flags = PIN_MAPPABLE | PIN_NOSEARCH; - if (view.type == I915_GGTT_VIEW_NORMAL) + if (view.type == I915_GTT_VIEW_NORMAL) flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ /* @@ -357,7 +357,7 @@ retry: vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) { flags = PIN_MAPPABLE; - view.type = I915_GGTT_VIEW_PARTIAL; + view.type = I915_GTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags); } @@ -394,7 +394,7 @@ retry: /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, - area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), + area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->iomap); @@ -413,7 +413,7 @@ retry: vma->mmo = mmo; if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) - intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref, + intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); if (write) { @@ -550,6 +550,22 @@ out: intel_runtime_pm_put(&i915->runtime_pm, wakeref); } +void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_device *bdev = bo->bdev; + + drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping); + + /* + * We have exclusive access here via runtime suspend. All other callers + * must first grab the rpm wakeref. + */ + GEM_BUG_ON(!obj->userfault_count); + list_del(&obj->userfault_link); + obj->userfault_count = 0; +} + void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) { struct i915_mmap_offset *mmo, *mn; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h index efee9e0d2508..1fa91b3033b3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h @@ -27,6 +27,7 @@ int i915_gem_dumb_mmap_offset(struct drm_file *file_priv, void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj); void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj); +void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj); void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj); #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 389e9f157ca5..733696057761 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -238,7 +238,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) { /* Skip serialisation and waking the device if known to be not used. */ - if (obj->userfault_count) + if (obj->userfault_count && !IS_DGFX(to_i915(obj->base.dev))) i915_gem_object_release_mmap_gtt(obj); if (!RB_EMPTY_ROOT(&obj->mmo.offsets)) { @@ -290,7 +290,21 @@ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj) __i915_gem_object_free_mmaps(obj); atomic_set(&obj->mm.pages_pin_count, 0); + + /* + * dma_buf_unmap_attachment() requires reservation to be + * locked. The imported GEM shouldn't share reservation lock + * and ttm_bo_cleanup_memtype_use() shouldn't be invoked for + * dma-buf, so it's safe to take the lock. + */ + if (obj->base.import_attach) + i915_gem_object_lock(obj, NULL); + __i915_gem_object_put_pages(obj); + + if (obj->base.import_attach) + i915_gem_object_unlock(obj); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); } @@ -444,6 +458,16 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset io_mapping_unmap(src_map); } +static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj) +{ + GEM_BUG_ON(!i915_gem_object_has_iomem(obj)); + + if (IS_DGFX(to_i915(obj->base.dev))) + return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource); + + return true; +} + /** * i915_gem_object_read_from_page - read data from the page of a GEM object * @obj: GEM object to read from @@ -466,7 +490,7 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, if (i915_gem_object_has_struct_page(obj)) i915_gem_object_read_from_page_kmap(obj, offset, dst, size); - else if (i915_gem_object_has_iomem(obj)) + else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj)) i915_gem_object_read_from_page_iomap(obj, offset, dst, size); else return -ENODEV; @@ -653,6 +677,41 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, enum intel_region_id id) { + return __i915_gem_object_migrate(obj, ww, id, obj->flags); +} + +/** + * __i915_gem_object_migrate - Migrate an object to the desired region id, with + * control of the extra flags + * @obj: The object to migrate. + * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may + * not be successful in evicting other objects to make room for this object. + * @id: The region id to migrate to. + * @flags: The object flags. Normally just obj->flags. + * + * Attempt to migrate the object to the desired memory region. The + * object backend must support migration and the object may not be + * pinned, (explicitly pinned pages or pinned vmas). The object must + * be locked. + * On successful completion, the object will have pages pointing to + * memory in the new region, but an async migration task may not have + * completed yet, and to accomplish that, i915_gem_object_wait_migration() + * must be called. + * + * Note: the @ww parameter is not used yet, but included to make sure + * callers put some effort into obtaining a valid ww ctx if one is + * available. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance + * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and + * -EBUSY if the object is pinned. + */ +int __i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id, + unsigned int flags) +{ struct drm_i915_private *i915 = to_i915(obj->base.dev); struct intel_memory_region *mr; @@ -672,7 +731,7 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, return 0; } - return obj->ops->migrate(obj, mr); + return obj->ops->migrate(obj, mr, flags); } /** @@ -723,6 +782,9 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) bool lmem_placement = false; int i; + if (!HAS_FLAT_CCS(to_i915(obj->base.dev))) + return false; + for (i = 0; i < obj->mm.n_placements; i++) { /* Compression is not allowed for the objects with smem placement */ if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 6f0a3ce35567..6b9ecff42bb5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -482,6 +482,10 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, enum i915_map_type type); +enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + bool always_coherent); + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size); @@ -543,7 +547,7 @@ struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, u32 alignment, - const struct i915_ggtt_view *view, + const struct i915_gtt_view *view, unsigned int flags); void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); @@ -608,6 +612,10 @@ bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); int i915_gem_object_migrate(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, enum intel_region_id id); +int __i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id, + unsigned int flags); bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, enum intel_region_id id); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 9f6b14ec189a..d0d6772e6f36 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -107,7 +107,8 @@ struct drm_i915_gem_object_ops { * pinning or for as long as the object lock is held. */ int (*migrate)(struct drm_i915_gem_object *obj, - struct intel_memory_region *mr); + struct intel_memory_region *mr, + unsigned int flags); void (*release)(struct drm_i915_gem_object *obj); @@ -298,7 +299,8 @@ struct drm_i915_gem_object { }; /** - * Whether the object is currently in the GGTT mmap. + * Whether the object is currently in the GGTT or any other supported + * fake offset mmap backed by lmem. */ unsigned int userfault_count; struct list_head userfault_link; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 4df50b049cea..16f845663ff2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -466,6 +466,18 @@ void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, return ret; } +enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + bool always_coherent) +{ + if (i915_gem_object_is_lmem(obj)) + return I915_MAP_WC; + if (HAS_LLC(i915) || always_coherent) + return I915_MAP_WB; + else + return I915_MAP_WC; +} + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 00359ec9d58b..0d812f4d787d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -22,9 +22,12 @@ void i915_gem_suspend(struct drm_i915_private *i915) { + struct intel_gt *gt; + unsigned int i; + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); - intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref, 0); + intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0); flush_workqueue(i915->wq); /* @@ -36,7 +39,8 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - intel_gt_suspend_prepare(to_gt(i915)); + for_each_gt(gt, i915, i) + intel_gt_suspend_prepare(gt); i915_gem_drain_freed_objects(i915); } @@ -131,7 +135,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) &i915->mm.purge_list, NULL }, **phase; + struct intel_gt *gt; unsigned long flags; + unsigned int i; bool flush = false; /* @@ -154,7 +160,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ - intel_gt_suspend_late(to_gt(i915)); + for_each_gt(gt, i915, i) + intel_gt_suspend_late(gt); spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { @@ -212,7 +219,8 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - int ret; + struct intel_gt *gt; + int ret, i, j; GEM_TRACE("%s\n", dev_name(i915->drm.dev)); @@ -224,8 +232,25 @@ void i915_gem_resume(struct drm_i915_private *i915) * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(to_gt(i915)); + for_each_gt(gt, i915, i) + if (intel_gt_resume(gt)) + goto err_wedged; ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU); GEM_WARN_ON(ret); + + return; + +err_wedged: + for_each_gt(gt, i915, j) { + if (!intel_gt_is_wedged(gt)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU[%u], declaring it wedged!\n", + j); + intel_gt_set_wedged(gt); + } + + if (j == i) + break; + } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index f42ca1179f37..2f7804492cd5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; @@ -369,14 +369,14 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, __start_cpu_write(obj); /* - * On non-LLC platforms, force the flush-on-acquire if this is ever + * On non-LLC igfx platforms, force the flush-on-acquire if this is ever * swapped-in. Our async flush path is not trust worthy enough yet(and * happens in the wrong order), and with some tricks it's conceivable * for userspace to change the cache-level to I915_CACHE_NONE after the * pages are swapped-in, and since execbuf binds the object before doing * the async flush, we have a race window. */ - if (!HAS_LLC(i915)) + if (!HAS_LLC(i915) && !IS_DGFX(i915)) obj->cache_dirty = true; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 4f4c9461a23b..0c70711818ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915, mutex_unlock(&i915->mm.stolen_lock); } -static int i915_adjust_stolen(struct drm_i915_private *i915, - struct resource *dsm) +static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm) +{ + return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start; +} + +static int adjust_stolen(struct drm_i915_private *i915, + struct resource *dsm) { struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct intel_uncore *uncore = ggtt->vm.gt->uncore; - struct resource *r; - if (dsm->start == 0 || dsm->end <= dsm->start) + if (!valid_stolen_size(i915, dsm)) return -EINVAL; /* + * Make sure we don't clobber the GTT if it's within stolen memory + * * TODO: We have yet too encounter the case where the GTT wasn't at the * end of stolen. With that assumption we could simplify this. */ - - /* Make sure we don't clobber the GTT if it's within stolen memory */ if (GRAPHICS_VER(i915) <= 4 && !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; @@ -131,12 +135,25 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, } } + if (!valid_stolen_size(i915, dsm)) + return -EINVAL; + + return 0; +} + +static int request_smem_stolen(struct drm_i915_private *i915, + struct resource *dsm) +{ + struct resource *r; + /* - * With stolen lmem, we don't need to check if the address range - * overlaps with the non-stolen system memory range, since lmem is local - * to the gpu. + * With stolen lmem, we don't need to request system memory for the + * address range since it's local to the gpu. + * + * Starting MTL, in IGFX devices the stolen memory is exposed via + * LMEMBAR and shall be considered similar to stolen lmem. */ - if (HAS_LMEM(i915)) + if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915)) return 0; /* @@ -371,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); - *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; - switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { case GEN8_STOLEN_RESERVED_1M: *size = 1024 * 1024; @@ -390,101 +405,70 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, *size = 8 * 1024 * 1024; MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); } + + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) + /* the base is initialized to stolen top so subtract size to get base */ + *base -= *size; + else + *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; } -static int i915_gem_init_stolen(struct intel_memory_region *mem) +/* + * Initialize i915->dsm_reserved to contain the reserved space within the Data + * Stolen Memory. This is a range on the top of DSM that is reserved, not to + * be used by driver, so must be excluded from the region passed to the + * allocator later. In the spec this is also called as WOPCM. + * + * Our expectation is that the reserved space is at the top of the stolen + * region, as it has been the case for every platform, and *never* at the + * bottom, so the calculation here can be simplified. + */ +static int init_reserved_stolen(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = mem->i915; struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; - resource_size_t reserved_total, reserved_size; - - mutex_init(&i915->mm.stolen_lock); - - if (intel_vgpu_active(i915)) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "iGVT-g active"); - return 0; - } - - if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "DMAR active"); - return 0; - } - - if (resource_size(&mem->region) == 0) - return 0; - - i915->dsm = mem->region; - - if (i915_adjust_stolen(i915, &i915->dsm)) - return 0; - - GEM_BUG_ON(i915->dsm.start == 0); - GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); + resource_size_t reserved_size; + int ret = 0; stolen_top = i915->dsm.end + 1; reserved_base = stolen_top; reserved_size = 0; - switch (GRAPHICS_VER(i915)) { - case 2: - case 3: - break; - case 4: - if (!IS_G4X(i915)) - break; - fallthrough; - case 5: - g4x_get_stolen_reserved(i915, uncore, + if (GRAPHICS_VER(i915) >= 11) { + icl_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); - break; - case 6: - gen6_get_stolen_reserved(i915, uncore, - &reserved_base, &reserved_size); - break; - case 7: - if (IS_VALLEYVIEW(i915)) - vlv_get_stolen_reserved(i915, uncore, - &reserved_base, &reserved_size); - else - gen7_get_stolen_reserved(i915, uncore, - &reserved_base, &reserved_size); - break; - case 8: - case 9: + } else if (GRAPHICS_VER(i915) >= 8) { if (IS_LP(i915)) chv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); else bdw_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); - break; - default: - MISSING_CASE(GRAPHICS_VER(i915)); - fallthrough; - case 11: - case 12: - icl_get_stolen_reserved(i915, uncore, - &reserved_base, - &reserved_size); - break; + } else if (GRAPHICS_VER(i915) >= 7) { + if (IS_VALLEYVIEW(i915)) + vlv_get_stolen_reserved(i915, uncore, + &reserved_base, &reserved_size); + else + gen7_get_stolen_reserved(i915, uncore, + &reserved_base, &reserved_size); + } else if (GRAPHICS_VER(i915) >= 6) { + gen6_get_stolen_reserved(i915, uncore, + &reserved_base, &reserved_size); + } else if (GRAPHICS_VER(i915) >= 5 || IS_G4X(i915)) { + g4x_get_stolen_reserved(i915, uncore, + &reserved_base, &reserved_size); } - /* - * Our expectation is that the reserved space is at the top of the - * stolen region and *never* at the bottom. If we see !reserved_base, - * it likely means we failed to read the registers correctly. - */ + /* No reserved stolen */ + if (reserved_base == stolen_top) + goto bail_out; + if (!reserved_base) { drm_err(&i915->drm, "inconsistent reservation %pa + %pa; ignoring\n", &reserved_base, &reserved_size); - reserved_base = stolen_top; - reserved_size = 0; + ret = -EINVAL; + goto bail_out; } i915->dsm_reserved = @@ -494,19 +478,55 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) drm_err(&i915->drm, "Stolen reserved area %pR outside stolen memory %pR\n", &i915->dsm_reserved, &i915->dsm); - return 0; + ret = -EINVAL; + goto bail_out; } + return 0; + +bail_out: + i915->dsm_reserved = + (struct resource)DEFINE_RES_MEM(reserved_base, 0); + + return ret; +} + +static int i915_gem_init_stolen(struct intel_memory_region *mem) +{ + struct drm_i915_private *i915 = mem->i915; + + mutex_init(&i915->mm.stolen_lock); + + if (intel_vgpu_active(i915)) { + drm_notice(&i915->drm, + "%s, disabling use of stolen memory\n", + "iGVT-g active"); + return -ENOSPC; + } + + if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { + drm_notice(&i915->drm, + "%s, disabling use of stolen memory\n", + "DMAR active"); + return -ENOSPC; + } + + if (adjust_stolen(i915, &mem->region)) + return -ENOSPC; + + if (request_smem_stolen(i915, &mem->region)) + return -ENOSPC; + + i915->dsm = mem->region; + + if (init_reserved_stolen(i915)) + return -ENOSPC; + /* Exclude the reserved region from driver use */ - mem->region.end = reserved_base - 1; + mem->region.end = i915->dsm_reserved.start - 1; mem->io_size = min(mem->io_size, resource_size(&mem->region)); - /* It is possible for the reserved area to end before the end of stolen - * memory, so just consider the start. */ - reserved_total = stolen_top - reserved_base; - - i915->stolen_usable_size = - resource_size(&i915->dsm) - reserved_total; + i915->stolen_usable_size = resource_size(&mem->region); drm_dbg(&i915->drm, "Memory reserved for graphics device: %lluK, usable: %lluK\n", @@ -514,7 +534,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) (u64)i915->stolen_usable_size >> 10); if (i915->stolen_usable_size == 0) - return 0; + return -ENOSPC; /* Basic memrange allocator for stolen space. */ drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); @@ -752,11 +772,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915, static int init_stolen_smem(struct intel_memory_region *mem) { + int err; + /* * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. */ - return i915_gem_init_stolen(mem); + err = i915_gem_init_stolen(mem); + if (err) + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + + return 0; } static int release_stolen_smem(struct intel_memory_region *mem) @@ -773,26 +799,25 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { static int init_stolen_lmem(struct intel_memory_region *mem) { + struct drm_i915_private *i915 = mem->i915; int err; if (GEM_WARN_ON(resource_size(&mem->region) == 0)) - return -ENODEV; + return 0; - /* - * TODO: For stolen lmem we mostly just care about populating the dsm - * related bits and setting up the drm_mm allocator for the range. - * Perhaps split up i915_gem_init_stolen() for this. - */ err = i915_gem_init_stolen(mem); - if (err) - return err; + if (err) { + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + return 0; + } - if (mem->io_size && !io_mapping_init_wc(&mem->iomap, - mem->io_start, - mem->io_size)) { - err = -EIO; + if (mem->io_size && + !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size)) goto err_cleanup; - } + + drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", + &mem->io_start); + drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start); return 0; @@ -815,6 +840,29 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = { .init_object = _i915_gem_object_stolen_init, }; +static int mtl_get_gms_size(struct intel_uncore *uncore) +{ + u16 ggc, gms; + + ggc = intel_uncore_read16(uncore, GGC); + + /* check GGMS, should be fixed 0x3 (8MB) */ + if ((ggc & GGMS_MASK) != GGMS_MASK) + return -EIO; + + /* return valid GMS value, -EIO if invalid */ + gms = REG_FIELD_GET(GMS_MASK, ggc); + switch (gms) { + case 0x0 ... 0x04: + return gms * 32; + case 0xf0 ... 0xfe: + return (gms - 0xf0 + 1) * 4; + default: + MISSING_CASE(gms); + return -EIO; + } +} + struct intel_memory_region * i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, u16 instance) @@ -825,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct intel_memory_region *mem; resource_size_t io_start, io_size; resource_size_t min_page_size; + int ret; if (WARN_ON_ONCE(instance)) return ERR_PTR(-ENODEV); @@ -832,12 +881,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR)) return ERR_PTR(-ENXIO); - /* Use DSM base address instead for stolen memory */ - dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE); - if (IS_DG1(uncore->i915)) { + if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) { lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - if (WARN_ON(lmem_size < dsm_base)) - return ERR_PTR(-ENODEV); } else { resource_size_t lmem_range; @@ -846,13 +891,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, lmem_size *= SZ_1G; } - dsm_size = lmem_size - dsm_base; - if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) { + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) { + /* + * MTL dsm size is in GGC register. + * Also MTL uses offset to DSMBASE in ptes, so i915 + * uses dsm_base = 0 to setup stolen region. + */ + ret = mtl_get_gms_size(uncore); + if (ret < 0) { + drm_err(&i915->drm, "invalid MTL GGC register setting\n"); + return ERR_PTR(ret); + } + + dsm_base = 0; + dsm_size = (resource_size_t)(ret * SZ_1M); + + GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M); + GEM_BUG_ON((dsm_size + SZ_8M) > lmem_size); + } else { + /* Use DSM base address instead for stolen memory */ + dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK; + if (WARN_ON(lmem_size < dsm_base)) + return ERR_PTR(-ENODEV); + dsm_size = lmem_size - dsm_base; + } + + io_size = dsm_size; + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) { + io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + SZ_8M; + } else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) { io_start = 0; io_size = 0; } else { io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base; - io_size = dsm_size; } min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : @@ -866,16 +937,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, if (IS_ERR(mem)) return mem; - /* - * TODO: consider creating common helper to just print all the - * interesting stuff from intel_memory_region, which we can use for all - * our probed regions. - */ - - drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", - &mem->io_start); - drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base); - intel_memory_region_set_name(mem, "stolen-local"); mem->private = true; @@ -900,6 +961,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, intel_memory_region_set_name(mem, "stolen-system"); mem->private = true; + return mem; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index bc9c432edffe..25129af70f70 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -279,7 +279,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, struct i915_ttm_tt *i915_tt; int ret; - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return NULL; i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); @@ -297,7 +297,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, i915_tt->is_shmem = true; } - if (HAS_FLAT_CCS(i915) && i915_gem_object_needs_ccs_pages(obj)) + if (i915_gem_object_needs_ccs_pages(obj)) ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size, NUM_BYTES_PER_CCS_BYTE), PAGE_SIZE); @@ -362,7 +362,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return false; /* @@ -510,7 +510,7 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (likely(obj)) { + if (bo->resource && !i915_ttm_is_ghost_object(bo)) { __i915_gem_object_pages_fini(obj); i915_ttm_free_cached_io_rsgt(obj); } @@ -529,7 +529,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); @@ -615,7 +615,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); int ret; - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return; ret = i915_ttm_move_notify(bo); @@ -640,7 +640,7 @@ bool i915_ttm_resource_mappable(struct ttm_resource *res) if (!i915_ttm_cpu_maps_iomem(res)) return true; - return bman_res->used_visible_size == bman_res->base.num_pages; + return bman_res->used_visible_size == PFN_UP(bman_res->base.size); } static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) @@ -648,7 +648,7 @@ static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo); bool unknown_state; - if (!obj) + if (i915_ttm_is_ghost_object(mem->bo)) return -EINVAL; if (!kref_get_unless_zero(&obj->base.refcount)) @@ -681,7 +681,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long base; unsigned int ofs; - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(bo)); GEM_WARN_ON(bo->ttm); base = obj->mm.region->iomap.base - obj->mm.region->region.start; @@ -690,6 +690,50 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } +static int i915_ttm_access_memory(struct ttm_buffer_object *bo, + unsigned long offset, void *buf, + int len, int write) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + resource_size_t iomap = obj->mm.region->iomap.base - + obj->mm.region->region.start; + unsigned long page = offset >> PAGE_SHIFT; + unsigned long bytes_left = len; + + /* + * TODO: For now just let it fail if the resource is non-mappable, + * otherwise we need to perform the memcpy from the gpu here, without + * interfering with the object (like moving the entire thing). + */ + if (!i915_ttm_resource_mappable(bo->resource)) + return -EIO; + + offset -= page << PAGE_SHIFT; + do { + unsigned long bytes = min(bytes_left, PAGE_SIZE - offset); + void __iomem *ptr; + dma_addr_t daddr; + + daddr = i915_gem_object_get_dma_address(obj, page); + ptr = ioremap_wc(iomap + daddr + offset, bytes); + if (!ptr) + return -EIO; + + if (write) + memcpy_toio(ptr, buf, bytes); + else + memcpy_fromio(buf, ptr, bytes); + iounmap(ptr); + + page++; + buf += bytes; + bytes_left -= bytes; + offset = 0; + } while (bytes_left); + + return len; +} + /* * All callbacks need to take care not to downcast a struct ttm_buffer_object * without checking its subclass, since it might be a TTM ghost object. @@ -706,6 +750,7 @@ static struct ttm_device_funcs i915_ttm_bo_driver = { .delete_mem_notify = i915_ttm_delete_mem_notify, .io_mem_reserve = i915_ttm_io_mem_reserve, .io_mem_pfn = i915_ttm_io_mem_pfn, + .access_memory = i915_ttm_access_memory, }; /** @@ -839,9 +884,10 @@ static int __i915_ttm_migrate(struct drm_i915_gem_object *obj, } static int i915_ttm_migrate(struct drm_i915_gem_object *obj, - struct intel_memory_region *mr) + struct intel_memory_region *mr, + unsigned int flags) { - return __i915_ttm_migrate(obj, mr, obj->flags); + return __i915_ttm_migrate(obj, mr, flags); } static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, @@ -980,12 +1026,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) struct vm_area_struct *area = vmf->vma; struct ttm_buffer_object *bo = area->vm_private_data; struct drm_device *dev = bo->base.dev; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + intel_wakeref_t wakeref = 0; vm_fault_t ret; int idx; - obj = i915_ttm_to_gem(bo); - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return VM_FAULT_SIGBUS; /* Sanity check that we allow writing into this object */ @@ -1002,6 +1048,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (i915_ttm_cpu_maps_iomem(bo->resource)) + wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + if (!i915_ttm_resource_mappable(bo->resource)) { int err = -ENODEV; int i; @@ -1021,9 +1070,11 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) } if (err) { - drm_dbg(dev, "Unable to make resource CPU accessible\n"); + drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n", + ERR_PTR(err)); dma_resv_unlock(bo->base.resv); - return VM_FAULT_SIGBUS; + ret = VM_FAULT_SIGBUS; + goto out_rpm; } } @@ -1034,12 +1085,33 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - return ret; + goto out_rpm; + + /* + * ttm_bo_vm_reserve() already has dma_resv_lock. + * userfault_count is protected by dma_resv lock and rpm wakeref. + */ + if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) { + obj->userfault_count = 1; + spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list); + spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + } + + if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) + intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref, + msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); i915_ttm_adjust_lru(obj); dma_resv_unlock(bo->base.resv); + +out_rpm: + if (wakeref) + intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref); + return ret; } @@ -1061,7 +1133,7 @@ static void ttm_vm_open(struct vm_area_struct *vma) struct drm_i915_gem_object *obj = i915_ttm_to_gem(vma->vm_private_data); - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data)); i915_gem_object_get(obj); } @@ -1070,7 +1142,7 @@ static void ttm_vm_close(struct vm_area_struct *vma) struct drm_i915_gem_object *obj = i915_ttm_to_gem(vma->vm_private_data); - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data)); i915_gem_object_put(obj); } @@ -1091,7 +1163,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj) { + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + intel_wakeref_t wakeref = 0; + + assert_object_held_shared(obj); + + if (i915_ttm_cpu_maps_iomem(bo->resource)) { + wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + + /* userfault_count is protected by obj lock and rpm wakeref. */ + if (obj->userfault_count) { + spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + list_del(&obj->userfault_link); + spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + obj->userfault_count = 0; + } + } + ttm_bo_unmap_virtual(i915_gem_to_ttm(obj)); + + if (wakeref) + intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref); } static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index e4842b4296fc..2a94a99ef76b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -28,18 +28,26 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); /** + * i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object. + * @bo: Pointer to the ttm buffer object + * + * Return: True if the ttm bo is not a i915 object but a ghost ttm object, + * False otherwise. + */ +static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo) +{ + return bo->destroy != i915_ttm_bo_destroy; +} + +/** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding * struct drm_i915_gem_object. * - * Return: Pointer to the embedding struct ttm_buffer_object, or NULL - * if the object was not an i915 ttm object. + * Return: Pointer to the embedding struct ttm_buffer_object. */ static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (bo->destroy != i915_ttm_bo_destroy) - return NULL; - return container_of(bo, struct drm_i915_gem_object, __do_not_access); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 9a7e50534b84..f59f812dc6d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, bool clear; int ret; - if (GEM_WARN_ON(!obj)) { + if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) { ttm_bo_move_null(bo, dst_mem); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c index 9aad84059d56..07e49f22f2de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c @@ -79,7 +79,12 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply, goto out_no_populate; err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false); - GEM_WARN_ON(err); + if (err) { + drm_err(&i915->drm, + "Unable to copy from device to system memory, err:%pe\n", + ERR_PTR(err)); + goto out_no_populate; + } ttm_bo_wait_ctx(backup_bo, &ctx); obj->ttm.backup = backup; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 8423df021b71..1b1a22716722 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); struct sg_table *st; unsigned int sg_page_sizes; struct page **pvec; @@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_readonly(obj)) gup_flags |= FOLL_WRITE; - pinned = ret = 0; + pinned = 0; while (pinned < num_pages) { ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE, num_pages - pinned, gup_flags, @@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) pinned += ret; } - ret = 0; ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) @@ -426,12 +425,11 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { static int probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { - const unsigned long end = addr + len; + VMA_ITERATOR(vmi, mm, addr); struct vm_area_struct *vma; - int ret = -EFAULT; mmap_read_lock(mm); - for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + for_each_vma_range(vmi, vma, addr + len) { /* Check for holes, note that we also update the addr below */ if (vma->vm_start > addr) break; @@ -439,16 +437,13 @@ probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) break; - if (vma->vm_end >= end) { - ret = 0; - break; - } - addr = vma->vm_end; } mmap_read_unlock(mm); - return ret; + if (vma) + return -EFAULT; + return 0; } /* diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c570cf780079..0cb99e75b0bc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915, GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); size = obj->base.size; - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + !HAS_64K_PAGES(i915)) size = round_up(size, I915_GTT_PAGE_SIZE_2M); n = 0; @@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915, * size and ensure the vma offset is at the start of the pt * boundary, however to improve coverage we opt for testing both * aligned and unaligned offsets. + * + * With PS64 this is no longer the case, but to ensure we + * sometimes get the compact layout for smaller objects, apply + * the round_up anyway. */ if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) offset_low = round_down(offset_low, @@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg) { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + { SZ_2M + SZ_64K, SZ_64K }, }; int i, j; int err; @@ -1540,6 +1546,154 @@ out_put: return err; } +static int igt_ppgtt_mixed(void *arg) +{ + struct drm_i915_private *i915 = arg; + const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct file *file; + I915_RND_STATE(prng); + LIST_HEAD(objects); + struct intel_memory_region *mr; + struct i915_vma *vma; + unsigned int count; + u32 i, addr; + int *order; + int n, err; + + /* + * Sanity check mixing 4K and 64K pages within the same page-table via + * the new PS64 TLB hint. + */ + + if (!HAS_64K_PAGES(i915)) { + pr_info("device lacks PS64, skipping\n"); + return 0; + } + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + + i = 0; + addr = 0; + do { + u32 sz; + + sz = i915_prandom_u32_max_state(SZ_4M, &prng); + sz = max_t(u32, sz, SZ_4K); + + mr = i915->mm.regions[INTEL_REGION_LMEM_0]; + if (i & 1) + mr = i915->mm.regions[INTEL_REGION_SMEM]; + + obj = i915_gem_object_create_region(mr, sz, 0, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } + + list_add_tail(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_put; + } + + addr = round_up(addr, mr->min_page_size); + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) + goto err_put; + + if (mr->type == INTEL_MEMORY_LOCAL && + (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) { + err = -EINVAL; + goto err_put; + } + + addr += obj->base.size; + i++; + } while (addr <= SZ_16M); + + n = 0; + count = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + n++; + } + i915_gem_context_unlock_engines(ctx); + if (!n) + goto err_put; + + order = i915_random_order(count * count, &prng); + if (!order) { + err = -ENOMEM; + goto err_put; + } + + i = 0; + addr = 0; + engines = i915_gem_context_lock_engines(ctx); + list_for_each_entry(obj, &objects, st_link) { + u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng); + + addr = round_up(addr, obj->mm.region->min_page_size); + + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + offset_in_page(rnd) / sizeof(u32), rnd + 1); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + (PAGE_SIZE / sizeof(u32)) - 1, + rnd + 2); + if (err) + break; + + addr += obj->base.size; + + cond_resched(); + } + + i915_gem_context_unlock_engines(ctx); + kfree(order); +err_put: + list_for_each_entry_safe(obj, on, &objects, st_link) { + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +out_vm: + i915_vm_put(vm); +out: + fput(file); + return err; +} + static int igt_tmpfs_fallback(void *arg) { struct drm_i915_private *i915 = arg; @@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), SUBTEST(igt_ppgtt_compact), + SUBTEST(igt_ppgtt_mixed), }; if (!HAS_PPGTT(i915)) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 13b088cc787e..a666d7e610f5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -434,5 +434,5 @@ int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_gem_coherency), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index c6ad67b90e8a..d8864444432b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -179,97 +179,108 @@ out_file: } struct parallel_switch { - struct task_struct *tsk; + struct kthread_worker *worker; + struct kthread_work work; struct intel_context *ce[2]; + int result; }; -static int __live_parallel_switch1(void *data) +static void __live_parallel_switch1(struct kthread_work *work) { - struct parallel_switch *arg = data; + struct parallel_switch *arg = + container_of(work, typeof(*arg), work); IGT_TIMEOUT(end_time); unsigned long count; count = 0; + arg->result = 0; do { struct i915_request *rq = NULL; - int err, n; + int n; - err = 0; - for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { + for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *prev = rq; rq = i915_request_create(arg->ce[n]); if (IS_ERR(rq)) { i915_request_put(prev); - return PTR_ERR(rq); + arg->result = PTR_ERR(rq); + break; } i915_request_get(rq); if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); + arg->result = + i915_request_await_dma_fence(rq, + &prev->fence); i915_request_put(prev); } i915_request_add(rq); } + + if (IS_ERR_OR_NULL(rq)) + break; + if (i915_request_wait(rq, 0, HZ) < 0) - err = -ETIME; + arg->result = -ETIME; + i915_request_put(rq); - if (err) - return err; count++; - } while (!__igt_timeout(end_time, NULL)); + } while (!arg->result && !__igt_timeout(end_time, NULL)); - pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); - return 0; + pr_info("%s: %lu switches (sync) <%d>\n", + arg->ce[0]->engine->name, count, arg->result); } -static int __live_parallel_switchN(void *data) +static void __live_parallel_switchN(struct kthread_work *work) { - struct parallel_switch *arg = data; + struct parallel_switch *arg = + container_of(work, typeof(*arg), work); struct i915_request *rq = NULL; IGT_TIMEOUT(end_time); unsigned long count; int n; count = 0; + arg->result = 0; do { - for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *prev = rq; - int err = 0; rq = i915_request_create(arg->ce[n]); if (IS_ERR(rq)) { i915_request_put(prev); - return PTR_ERR(rq); + arg->result = PTR_ERR(rq); + break; } i915_request_get(rq); if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); + arg->result = + i915_request_await_dma_fence(rq, + &prev->fence); i915_request_put(prev); } i915_request_add(rq); - if (err) { - i915_request_put(rq); - return err; - } } count++; - } while (!__igt_timeout(end_time, NULL)); - i915_request_put(rq); + } while (!arg->result && !__igt_timeout(end_time, NULL)); - pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); - return 0; + if (!IS_ERR_OR_NULL(rq)) + i915_request_put(rq); + + pr_info("%s: %lu switches (many) <%d>\n", + arg->ce[0]->engine->name, count, arg->result); } static int live_parallel_switch(void *arg) { struct drm_i915_private *i915 = arg; - static int (* const func[])(void *arg) = { + static void (* const func[])(struct kthread_work *) = { __live_parallel_switch1, __live_parallel_switchN, NULL, @@ -277,7 +288,7 @@ static int live_parallel_switch(void *arg) struct parallel_switch *data = NULL; struct i915_gem_engines *engines; struct i915_gem_engines_iter it; - int (* const *fn)(void *arg); + void (* const *fn)(struct kthread_work *); struct i915_gem_context *ctx; struct intel_context *ce; struct file *file; @@ -348,9 +359,22 @@ static int live_parallel_switch(void *arg) } } + for (n = 0; n < count; n++) { + struct kthread_worker *worker; + + if (!data[n].ce[0]) + continue; + + worker = kthread_create_worker(0, "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(worker)) + goto out; + + data[n].worker = worker; + } + for (fn = func; !err && *fn; fn++) { struct igt_live_test t; - int n; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) @@ -360,30 +384,17 @@ static int live_parallel_switch(void *arg) if (!data[n].ce[0]) continue; - data[n].tsk = kthread_run(*fn, &data[n], - "igt/parallel:%s", - data[n].ce[0]->engine->name); - if (IS_ERR(data[n].tsk)) { - err = PTR_ERR(data[n].tsk); - break; - } - get_task_struct(data[n].tsk); + data[n].result = 0; + kthread_init_work(&data[n].work, *fn); + kthread_queue_work(data[n].worker, &data[n].work); } - yield(); /* start all threads before we kthread_stop() */ - for (n = 0; n < count; n++) { - int status; - - if (IS_ERR_OR_NULL(data[n].tsk)) - continue; - - status = kthread_stop(data[n].tsk); - if (status && !err) - err = status; - - put_task_struct(data[n].tsk); - data[n].tsk = NULL; + if (data[n].ce[0]) { + kthread_flush_work(&data[n].work); + if (data[n].result && !err) + err = data[n].result; + } } if (igt_live_test_end(&t)) @@ -399,6 +410,9 @@ out: intel_context_unpin(data[n].ce[m]); intel_context_put(data[n].ce[m]); } + + if (data[n].worker) + kthread_destroy_worker(data[n].worker); } kfree(data); out_file: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 62c61af77a42..e57f9390076c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -6,8 +6,12 @@ #include "i915_drv.h" #include "i915_selftest.h" +#include "gem/i915_gem_context.h" +#include "mock_context.h" #include "mock_dmabuf.h" +#include "igt_gem_utils.h" +#include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" static int igt_dmabuf_export(void *arg) @@ -140,6 +144,75 @@ out_ret: return err; } +static int verify_access(struct drm_i915_private *i915, + struct drm_i915_gem_object *native_obj, + struct drm_i915_gem_object *import_obj) +{ + struct i915_gem_engines_iter it; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct i915_vma *vma; + struct file *file; + u32 *vaddr; + int err = 0, i; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (intel_engine_can_store_dword(ce->engine)) + break; + } + i915_gem_context_unlock_engines(ctx); + if (!ce) + goto out_file; + + vma = i915_vma_instance(import_obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_file; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_file; + + err = igt_gpu_fill_dw(ce, vma, 0, + vma->size >> PAGE_SHIFT, 0xdeadbeaf); + i915_vma_unpin(vma); + if (err) + goto out_file; + + err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT); + if (err) + goto out_file; + + vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_file; + } + + for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) { + if (vaddr[i] != 0xdeadbeaf) { + pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]); + err = -EINVAL; + goto out_file; + } + } + +out_file: + fput(file); + return err; +} + static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, struct intel_memory_region **regions, unsigned int num_regions) @@ -154,7 +227,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, force_different_devices = true; - obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + obj = __i915_gem_object_create_user(i915, SZ_8M, regions, num_regions); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", @@ -206,6 +279,10 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, i915_gem_object_unlock(import_obj); + err = verify_access(i915, obj, import_obj); + if (err) + goto out_import; + /* Now try a fake an importer */ import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev); if (IS_ERR(import_attach)) { @@ -213,7 +290,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_import; } - st = dma_buf_map_attachment(import_attach, DMA_BIDIRECTIONAL); + st = dma_buf_map_attachment_unlocked(import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(st)) { err = PTR_ERR(st); goto out_detach; @@ -226,7 +303,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, timeout = -ETIME; } err = timeout > 0 ? 0 : timeout; - dma_buf_unmap_attachment(import_attach, st, DMA_BIDIRECTIONAL); + dma_buf_unmap_attachment_unlocked(import_attach, st, DMA_BIDIRECTIONAL); out_detach: dma_buf_detach(dmabuf, import_attach); out_import: @@ -296,7 +373,7 @@ static int igt_dmabuf_import(void *arg) goto out_obj; } - err = dma_buf_vmap(dmabuf, &map); + err = dma_buf_vmap_unlocked(dmabuf, &map); dma_map = err ? NULL : map.vaddr; if (!dma_map) { pr_err("dma_buf_vmap failed\n"); @@ -337,7 +414,7 @@ static int igt_dmabuf_import(void *arg) err = 0; out_dma_map: - dma_buf_vunmap(dmabuf, &map); + dma_buf_vunmap_unlocked(dmabuf, &map); out_obj: i915_gem_object_put(obj); out_dmabuf: @@ -358,7 +435,7 @@ static int igt_dmabuf_import_ownership(void *arg) if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf); - err = dma_buf_vmap(dmabuf, &map); + err = dma_buf_vmap_unlocked(dmabuf, &map); ptr = err ? NULL : map.vaddr; if (!ptr) { pr_err("dma_buf_vmap failed\n"); @@ -367,7 +444,7 @@ static int igt_dmabuf_import_ownership(void *arg) } memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_vunmap(dmabuf, &map); + dma_buf_vunmap_unlocked(dmabuf, &map); obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); if (IS_ERR(obj)) { @@ -418,7 +495,7 @@ static int igt_dmabuf_export_vmap(void *arg) } i915_gem_object_put(obj); - err = dma_buf_vmap(dmabuf, &map); + err = dma_buf_vmap_unlocked(dmabuf, &map); ptr = err ? NULL : map.vaddr; if (!ptr) { pr_err("dma_buf_vmap failed\n"); @@ -435,7 +512,7 @@ static int igt_dmabuf_export_vmap(void *arg) memset(ptr, 0xc5, dmabuf->size); err = 0; - dma_buf_vunmap(dmabuf, &map); + dma_buf_vunmap_unlocked(dmabuf, &map); out: dma_buf_put(dmabuf); return err; @@ -476,5 +553,5 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_dmabuf_import_same_driver_lmem_smem), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index a68cc91ed084..3c486efe0bac 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -8,6 +8,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" @@ -94,7 +95,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, { const unsigned long npages = obj->base.size / PAGE_SIZE; struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_ggtt_view view; + struct i915_gtt_view view; struct i915_vma *vma; unsigned long page; u32 __iomem *io; @@ -211,7 +212,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, } for_each_prime_number_from(page, 1, npages) { - struct i915_ggtt_view view = + struct i915_gtt_view view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); u32 __iomem *io; struct page *p; @@ -1845,5 +1846,5 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_mmap_gpu), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c index fe0a890775e2..bdf5bb40ccf1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -95,5 +95,5 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_gem_huge), }; - return i915_subtests(tests, i915); + return i915_live_subtests(tests, i915); } |