diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
188 files changed, 3502 insertions, 3044 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 35bbe2b80596..39328567c200 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -1,3 +1,17 @@ +config DRM_I915_REQUEST_TIMEOUT + int "Default timeout for requests (ms)" + default 20000 # milliseconds + help + Configures the default timeout after which any user submissions will + be forcefully terminated. + + Beware setting this value lower, or close to heartbeat interval + rounded to whole seconds times three, in order to avoid allowing + misbehaving applications causing total rendering failure in unrelated + clients. + + May be 0 to disable the timeout. + config DRM_I915_FENCE_TIMEOUT int "Timeout for unsignaled foreign fences (ms, jiffy granularity)" default 10000 # milliseconds diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9faf2d1980e5..d0d936d9137b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -140,7 +140,6 @@ gem-y += \ gem/i915_gem_dmabuf.o \ gem/i915_gem_domain.o \ gem/i915_gem_execbuffer.o \ - gem/i915_gem_fence.o \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ gem/i915_gem_object_blt.o \ diff --git a/drivers/gpu/drm/i915/TODO.txt b/drivers/gpu/drm/i915/TODO.txt new file mode 100644 index 000000000000..81a82c9c203f --- /dev/null +++ b/drivers/gpu/drm/i915/TODO.txt @@ -0,0 +1,41 @@ +gem/gt TODO items +----------------- + +- For discrete memory manager, merge enough dg1 to be able to refactor it to + TTM. Then land pci ids (just in case that turns up an uapi problem). TTM has + improved a lot the past 2 years, there's no reason anymore not to use it. + +- Come up with a plan what to do with drm/scheduler and how to get there. + +- Roll out dma_fence critical section annotations. + +- There's a lot of complexity added past few years to make relocations faster. + That doesn't make sense given hw and gpu apis moved away from this model years + ago: + 1. Land a modern pre-bound uapi like VM_BIND + 2. Any complexity added in this area past few years which can't be justified + with VM_BIND using userspace should be removed. Looking at amdgpu dma_resv on + the bo and vm, plus some lru locks is all that needed. No complex rcu, + refcounts, caching, ... on everything. + This is the matching task on the vm side compared to ttm/dma_resv on the + backing storage side. + +- i915_sw_fence seems to be the main structure for the i915-gem dma_fence model. + How-to-dma_fence is core and drivers really shouldn't build their own world + here, treating everything else as a fixed platform. i915_sw_fence concepts + should be moved to dma_fence, drm/scheduler or atomic commit helpers. Or + removed if dri-devel consensus is that it's not a good idea. Once that's done + maybe even remove it if there's nothing left. + +Smaller things: +- i915_utils.h needs to be moved to the right places. + +- dma_fence_work should be in drivers/dma-buf + +- i915_mm.c should be moved to the right places. Some of the helpers also look a + bit fishy: + + https://lore.kernel.org/linux-mm/20210301083320.943079-1-hch@lst.de/ + +- tasklet helpers in i915_gem.h also look a bit misplaced and should + probably be moved to tasklet headers. diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d3af51e63380..4f8f994a639f 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -1018,6 +1018,7 @@ static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, + bool phys_cursor, const struct i915_ggtt_view *view, bool uses_fence, unsigned long *out_flags) @@ -1026,14 +1027,19 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj = intel_fb_obj(fb); intel_wakeref_t wakeref; + struct i915_gem_ww_ctx ww; struct i915_vma *vma; unsigned int pinctl; u32 alignment; + int ret; if (drm_WARN_ON(dev, !i915_gem_object_is_framebuffer(obj))) return ERR_PTR(-EINVAL); - alignment = intel_surf_alignment(fb, 0); + if (phys_cursor) + alignment = intel_cursor_alignment(dev_priv); + else + alignment = intel_surf_alignment(fb, 0); if (drm_WARN_ON(dev, alignment && !is_power_of_2(alignment))) return ERR_PTR(-EINVAL); @@ -1068,14 +1074,26 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, if (HAS_GMCH(dev_priv)) pinctl |= PIN_MAPPABLE; - vma = i915_gem_object_pin_to_display_plane(obj, - alignment, view, pinctl); - if (IS_ERR(vma)) + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); + if (!ret && phys_cursor) + ret = i915_gem_object_attach_phys(obj, alignment); + if (!ret) + ret = i915_gem_object_pin_pages(obj); + if (ret) goto err; - if (uses_fence && i915_vma_is_map_and_fenceable(vma)) { - int ret; + if (!ret) { + vma = i915_gem_object_pin_to_display_plane(obj, &ww, alignment, + view, pinctl); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unpin; + } + } + if (uses_fence && i915_vma_is_map_and_fenceable(vma)) { /* * Install a fence for tiled scan-out. Pre-i965 always needs a * fence, whereas 965+ only requires a fence if using @@ -1096,16 +1114,28 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, ret = i915_vma_pin_fence(vma); if (ret != 0 && DISPLAY_VER(dev_priv) < 4) { i915_vma_unpin(vma); - vma = ERR_PTR(ret); - goto err; + goto err_unpin; } + ret = 0; - if (ret == 0 && vma->fence) + if (vma->fence) *out_flags |= PLANE_HAS_FENCE; } i915_vma_get(vma); + +err_unpin: + i915_gem_object_unpin_pages(obj); err: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (ret) + vma = ERR_PTR(ret); + atomic_dec(&dev_priv->gpu_error.pending_fb_pin); intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return vma; @@ -7191,7 +7221,6 @@ static void intel_dump_plane_state(const struct intel_plane_state *plane_state) struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *i915 = to_i915(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; - struct drm_format_name_buf format_name; if (!fb) { drm_dbg_kms(&i915->drm, @@ -7202,10 +7231,9 @@ static void intel_dump_plane_state(const struct intel_plane_state *plane_state) } drm_dbg_kms(&i915->drm, - "[PLANE:%d:%s] fb: [FB:%d] %ux%u format = %s modifier = 0x%llx, visible: %s\n", + "[PLANE:%d:%s] fb: [FB:%d] %ux%u format = %p4cc modifier = 0x%llx, visible: %s\n", plane->base.base.id, plane->base.name, - fb->base.id, fb->width, fb->height, - drm_get_format_name(fb->format->format, &format_name), + fb->base.id, fb->width, fb->height, &fb->format->format, fb->modifier, yesno(plane_state->uapi.visible)); drm_dbg_kms(&i915->drm, "\trotation: 0x%x, scaler: %d\n", plane_state->hw.rotation, plane_state->scaler_id); @@ -10509,19 +10537,11 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct drm_framebuffer *fb = plane_state->hw.fb; struct i915_vma *vma; + bool phys_cursor = + plane->id == PLANE_CURSOR && + INTEL_INFO(dev_priv)->display.cursor_needs_physical; - if (plane->id == PLANE_CURSOR && - INTEL_INFO(dev_priv)->display.cursor_needs_physical) { - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - const int align = intel_cursor_alignment(dev_priv); - int err; - - err = i915_gem_object_attach_phys(obj, align); - if (err) - return err; - } - - vma = intel_pin_and_fence_fb_obj(fb, + vma = intel_pin_and_fence_fb_obj(fb, phys_cursor, &plane_state->view.gtt, intel_plane_uses_fence(plane_state), &plane_state->flags); @@ -10558,9 +10578,7 @@ int intel_prepare_plane_fb(struct drm_plane *_plane, struct drm_plane_state *_new_plane_state) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY }; struct intel_plane *plane = to_intel_plane(_plane); struct intel_plane_state *new_plane_state = to_intel_plane_state(_new_plane_state); @@ -10613,13 +10631,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (!obj) return 0; - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; ret = intel_plane_pin_fb(new_plane_state); - - i915_gem_object_unpin_pages(obj); if (ret) return ret; @@ -11081,7 +11094,7 @@ static int intel_user_framebuffer_create_handle(struct drm_framebuffer *fb, struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_private *i915 = to_i915(obj->base.dev); - if (obj->userptr.mm) { + if (i915_gem_object_is_userptr(obj)) { drm_dbg(&i915->drm, "attempting to use a userptr for a framebuffer, denied\n"); return -EINVAL; @@ -11154,13 +11167,9 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (!drm_any_plane_has_format(&dev_priv->drm, mode_cmd->pixel_format, mode_cmd->modifier[0])) { - struct drm_format_name_buf format_name; - drm_dbg_kms(&dev_priv->drm, - "unsupported pixel format %s / modifier 0x%llx\n", - drm_get_format_name(mode_cmd->pixel_format, - &format_name), - mode_cmd->modifier[0]); + "unsupported pixel format %p4cc / modifier 0x%llx\n", + &mode_cmd->pixel_format, mode_cmd->modifier[0]); goto err; } diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 45660f2c587f..105294ec2dcc 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -571,7 +571,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); struct i915_vma * -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, bool phys_cursor, const struct i915_ggtt_view *view, bool uses_fence, unsigned long *out_flags); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 490b6550a810..183c414d554a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -804,27 +804,25 @@ static void intel_plane_uapi_info(struct seq_file *m, struct intel_plane *plane) const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); const struct drm_framebuffer *fb = plane_state->uapi.fb; - struct drm_format_name_buf format_name; struct drm_rect src, dst; char rot_str[48]; src = drm_plane_state_src(&plane_state->uapi); dst = drm_plane_state_dest(&plane_state->uapi); - if (fb) - drm_get_format_name(fb->format->format, &format_name); - plane_rotation(rot_str, sizeof(rot_str), plane_state->uapi.rotation); - seq_printf(m, "\t\tuapi: [FB:%d] %s,0x%llx,%dx%d, visible=%s, src=" DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT ", rotation=%s\n", - fb ? fb->base.id : 0, fb ? format_name.str : "n/a", - fb ? fb->modifier : 0, - fb ? fb->width : 0, fb ? fb->height : 0, - plane_visibility(plane_state), - DRM_RECT_FP_ARG(&src), - DRM_RECT_ARG(&dst), - rot_str); + seq_puts(m, "\t\tuapi: [FB:"); + if (fb) + seq_printf(m, "%d] %p4cc,0x%llx,%dx%d", fb->base.id, + &fb->format->format, fb->modifier, fb->width, + fb->height); + else + seq_puts(m, "0] n/a,0x0,0x0,"); + seq_printf(m, ", visible=%s, src=" DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT + ", rotation=%s\n", plane_visibility(plane_state), + DRM_RECT_FP_ARG(&src), DRM_RECT_ARG(&dst), rot_str); if (plane_state->planar_linked_plane) seq_printf(m, "\t\tplanar: Linked to [PLANE:%d:%s] as a %s\n", @@ -837,19 +835,17 @@ static void intel_plane_hw_info(struct seq_file *m, struct intel_plane *plane) const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); const struct drm_framebuffer *fb = plane_state->hw.fb; - struct drm_format_name_buf format_name; char rot_str[48]; if (!fb) return; - drm_get_format_name(fb->format->format, &format_name); - plane_rotation(rot_str, sizeof(rot_str), plane_state->hw.rotation); - seq_printf(m, "\t\thw: [FB:%d] %s,0x%llx,%dx%d, visible=%s, src=" DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT ", rotation=%s\n", - fb->base.id, format_name.str, + seq_printf(m, "\t\thw: [FB:%d] %p4cc,0x%llx,%dx%d, visible=%s, src=" + DRM_RECT_FP_FMT ", dst=" DRM_RECT_FMT ", rotation=%s\n", + fb->base.id, &fb->format->format, fb->modifier, fb->width, fb->height, yesno(plane_state->uapi.visible), DRM_RECT_FP_ARG(&plane_state->uapi.src), diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 566fa72427b3..857126822a88 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -293,7 +293,7 @@ void intel_dsb_prepare(struct intel_crtc_state *crtc_state) goto out; } - buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + buf = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC); if (IS_ERR(buf)) { drm_err(&i915->drm, "Command buffer creation failed\n"); i915_vma_unpin_and_release(&vma, I915_VMA_RELEASE_MAP); diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 07db8e83f98e..ccd00e65a5fe 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -211,7 +211,7 @@ static int intelfb_create(struct drm_fb_helper *helper, * This also validates that any existing fb inherited from the * BIOS is suitable for own access. */ - vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, + vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, false, &view, false, &flags); if (IS_ERR(vma)) { ret = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 9211f6c97b26..e477b6114a60 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -755,6 +755,32 @@ static u32 overlay_cmd_reg(struct drm_intel_overlay_put_image *params) return cmd; } +static struct i915_vma *intel_overlay_pin_fb(struct drm_i915_gem_object *new_bo) +{ + struct i915_gem_ww_ctx ww; + struct i915_vma *vma; + int ret; + + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(new_bo, &ww); + if (!ret) { + vma = i915_gem_object_pin_to_display_plane(new_bo, &ww, 0, + NULL, PIN_MAPPABLE); + ret = PTR_ERR_OR_ZERO(vma); + } + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (ret) + return ERR_PTR(ret); + + return vma; +} + static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct drm_i915_gem_object *new_bo, struct drm_intel_overlay_put_image *params) @@ -776,12 +802,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, atomic_inc(&dev_priv->gpu_error.pending_fb_pin); - vma = i915_gem_object_pin_to_display_plane(new_bo, - 0, NULL, PIN_MAPPABLE); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); + vma = intel_overlay_pin_fb(new_bo); + if (IS_ERR(vma)) goto out_pin_section; - } + i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB); if (!overlay->active) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index bc0223716906..daf9284ef1f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -27,15 +27,8 @@ static void __do_clflush(struct drm_i915_gem_object *obj) static int clflush_work(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); - struct drm_i915_gem_object *obj = clflush->obj; - int err; - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - __do_clflush(obj); - i915_gem_object_unpin_pages(obj); + __do_clflush(clflush->obj); return 0; } @@ -44,6 +37,7 @@ static void clflush_release(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); + i915_gem_object_unpin_pages(clflush->obj); i915_gem_object_put(clflush->obj); } @@ -63,6 +57,11 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj) if (!clflush) return NULL; + if (__i915_gem_object_get_pages(obj) < 0) { + kfree(clflush); + return NULL; + } + dma_fence_work_init(&clflush->base, &clflush_ops); clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4d2f40cf237b..fd8ee52e17a4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -232,6 +232,8 @@ static void intel_context_set_gem(struct intel_context *ce, if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); + + intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us); } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -386,38 +388,6 @@ static bool __cancel_engine(struct intel_engine_cs *engine) return intel_engine_pulse(engine) == 0; } -static bool -__active_engine(struct i915_request *rq, struct intel_engine_cs **active) -{ - struct intel_engine_cs *engine, *locked; - bool ret = false; - - /* - * Serialise with __i915_request_submit() so that it sees - * is-banned?, or we know the request is already inflight. - * - * Note that rq->engine is unstable, and so we double - * check that we have acquired the lock on the final engine. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - locked = engine; - spin_lock(&locked->active.lock); - } - - if (i915_request_is_active(rq)) { - if (!__i915_request_is_complete(rq)) - *active = locked; - ret = true; - } - - spin_unlock_irq(&locked->active.lock); - - return ret; -} - static struct intel_engine_cs *active_engine(struct intel_context *ce) { struct intel_engine_cs *engine = NULL; @@ -445,7 +415,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) /* Check with the backend if the request is inflight */ found = true; if (likely(rcu_access_pointer(rq->timeline) == ce->timeline)) - found = __active_engine(rq, &engine); + found = i915_request_active_engine(rq, &engine); i915_request_put(rq); if (found) @@ -679,7 +649,7 @@ __create_context(struct drm_i915_private *i915) kref_init(&ctx->ref); ctx->i915 = i915; - ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); + ctx->sched.priority = I915_PRIORITY_NORMAL; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); @@ -822,6 +792,41 @@ static void __assign_timeline(struct i915_gem_context *ctx, context_apply_all(ctx, __apply_timeline, timeline); } +static int __apply_watchdog(struct intel_context *ce, void *timeout_us) +{ + return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us); +} + +static int +__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us) +{ + int ret; + + ret = context_apply_all(ctx, __apply_watchdog, + (void *)(uintptr_t)timeout_us); + if (!ret) + ctx->watchdog.timeout_us = timeout_us; + + return ret; +} + +static void __set_default_fence_expiry(struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = ctx->i915; + int ret; + + if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) || + !i915->params.request_timeout_ms) + return; + + /* Default expiry for user fences. */ + ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000); + if (ret) + drm_notice(&i915->drm, + "Failed to configure default fence expiry! (%d)", + ret); +} + static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { @@ -866,6 +871,8 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) intel_timeline_put(timeline); } + __set_default_fence_expiry(ctx); + trace_i915_context_create(ctx); return ctx; @@ -1959,7 +1966,7 @@ static int set_priority(struct i915_gem_context *ctx, !capable(CAP_SYS_NICE)) return -EPERM; - ctx->sched.priority = I915_USER_PRIORITY(priority); + ctx->sched.priority = priority; context_apply_all(ctx, __apply_priority, ctx); return 0; @@ -2463,7 +2470,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_PRIORITY: args->size = 0; - args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; + args->value = ctx->sched.priority; break; case I915_CONTEXT_PARAM_SSEU: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 1449f54924e0..340473aa70de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -154,6 +154,10 @@ struct i915_gem_context { */ atomic_t active_count; + struct { + u64 timeout_us; + } watchdog; + /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 04e9c04545ad..ccede73c6465 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -25,7 +25,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages_unlocked(obj); if (ret) goto err; @@ -82,7 +82,7 @@ static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); void *vaddr; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -123,42 +123,48 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); + struct i915_gem_ww_ctx ww; int err; - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_gem_object_lock_interruptible(obj, NULL); - if (err) - goto out; - - err = i915_gem_object_set_to_cpu_domain(obj, write); - i915_gem_object_unlock(obj); - -out: - i915_gem_object_unpin_pages(obj); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_pin_pages(obj); + if (!err) { + err = i915_gem_object_set_to_cpu_domain(obj, write); + i915_gem_object_unpin_pages(obj); + } + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct i915_gem_ww_ctx ww; int err; - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_gem_object_lock_interruptible(obj, NULL); - if (err) - goto out; - - err = i915_gem_object_set_to_gtt_domain(obj, false); - i915_gem_object_unlock(obj); - -out: - i915_gem_object_unpin_pages(obj); + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_pin_pages(obj); + if (!err) { + err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unpin_pages(obj); + } + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } @@ -244,6 +250,9 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } } + if (i915_gem_object_size_2big(dma_buf->size)) + return ERR_PTR(-E2BIG); + /* need to attach */ attach = dma_buf_attach(dma_buf, dev->dev); if (IS_ERR(attach)) @@ -258,7 +267,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, } drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, 0); obj->base.import_attach = attach; obj->base.resv = dma_buf->resv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 36f54cedaaeb..073822100da7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -335,7 +335,14 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, * not allowed to be changed by userspace. */ if (i915_gem_object_is_proxy(obj)) { - ret = -ENXIO; + /* + * Silently allow cached for userptr; the vulkan driver + * sets all objects to cached + */ + if (!i915_gem_object_is_userptr(obj) || + args->caching != I915_CACHING_CACHED) + ret = -ENXIO; + goto out; } @@ -359,12 +366,12 @@ out: */ struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, u32 alignment, const struct i915_ggtt_view *view, unsigned int flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_gem_ww_ctx ww; struct i915_vma *vma; int ret; @@ -372,11 +379,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); - i915_gem_ww_ctx_init(&ww, true); -retry: - ret = i915_gem_object_lock(obj, &ww); - if (ret) - goto err; /* * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is @@ -391,7 +393,7 @@ retry: HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE); if (ret) - goto err; + return ERR_PTR(ret); /* * As the user may map the buffer once pinned in the display plane @@ -404,33 +406,20 @@ retry: vma = ERR_PTR(-ENOSPC); if ((flags & PIN_MAPPABLE) == 0 && (!view || view->type == I915_GGTT_VIEW_NORMAL)) - vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment, + vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, flags | PIN_MAPPABLE | PIN_NONBLOCK); if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) - vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, + vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment, flags); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err; - } + if (IS_ERR(vma)) + return vma; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); i915_vma_mark_scanout(vma); i915_gem_object_flush_if_display_locked(obj); -err: - if (ret == -EDEADLK) { - ret = i915_gem_ww_ctx_backoff(&ww); - if (!ret) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - if (ret) - return ERR_PTR(ret); - return vma; } @@ -526,6 +515,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out; + if (i915_gem_object_is_userptr(obj)) { + /* + * Try to grab userptr pages, iris uses set_domain to check + * userptr validity + */ + err = i915_gem_object_userptr_validate(obj); + if (!err) + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_PRIORITY | + (write_domain ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT); + goto out; + } + /* * Proxy objects do not control access to the backing storage, ergo * they cannot be used as a means to manipulate the cache domain @@ -537,6 +541,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, goto out; } + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out; + /* * Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through @@ -548,7 +556,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, */ err = i915_gem_object_pin_pages(obj); if (err) - goto out; + goto out_unlock; /* * Already in the desired write domain? Nothing for us to do! @@ -563,10 +571,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (READ_ONCE(obj->write_domain) == read_domains) goto out_unpin; - err = i915_gem_object_lock_interruptible(obj, NULL); - if (err) - goto out_unpin; - if (read_domains & I915_GEM_DOMAIN_WC) err = i915_gem_object_set_to_wc_domain(obj, write_domain); else if (read_domains & I915_GEM_DOMAIN_GTT) @@ -574,13 +578,15 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, else err = i915_gem_object_set_to_cpu_domain(obj, write_domain); +out_unpin: + i915_gem_object_unpin_pages(obj); + +out_unlock: i915_gem_object_unlock(obj); - if (write_domain) + if (!err && write_domain) i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); -out_unpin: - i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d70ca36f74f6..5964e67c7d36 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -28,6 +28,7 @@ #include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -49,16 +50,19 @@ enum { #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; -#define __EXEC_OBJECT_HAS_PIN BIT(31) -#define __EXEC_OBJECT_HAS_FENCE BIT(30) -#define __EXEC_OBJECT_NEEDS_MAP BIT(29) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(28) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ +/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */ +#define __EXEC_OBJECT_HAS_PIN BIT(30) +#define __EXEC_OBJECT_HAS_FENCE BIT(29) +#define __EXEC_OBJECT_USERPTR_INIT BIT(28) +#define __EXEC_OBJECT_NEEDS_MAP BIT(27) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(26) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) #define __EXEC_ENGINE_PINNED BIT(30) -#define __EXEC_INTERNAL_FLAGS (~0u << 30) +#define __EXEC_USERPTR_USED BIT(29) +#define __EXEC_INTERNAL_FLAGS (~0u << 29) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -419,13 +423,14 @@ static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry, return pin_flags; } -static inline bool +static inline int eb_pin_vma(struct i915_execbuffer *eb, const struct drm_i915_gem_exec_object2 *entry, struct eb_vma *ev) { struct i915_vma *vma = ev->vma; u64 pin_flags; + int err; if (vma->node.size) pin_flags = vma->node.start; @@ -437,24 +442,29 @@ eb_pin_vma(struct i915_execbuffer *eb, pin_flags |= PIN_GLOBAL; /* Attempt to reuse the current location if available */ - /* TODO: Add -EDEADLK handling here */ - if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))) { + err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags); + if (err == -EDEADLK) + return err; + + if (unlikely(err)) { if (entry->flags & EXEC_OBJECT_PINNED) - return false; + return err; /* Failing that pick any _free_ space if suitable */ - if (unlikely(i915_vma_pin_ww(vma, &eb->ww, + err = i915_vma_pin_ww(vma, &eb->ww, entry->pad_to_size, entry->alignment, eb_pin_flags(entry, ev->flags) | - PIN_USER | PIN_NOEVICT))) - return false; + PIN_USER | PIN_NOEVICT); + if (unlikely(err)) + return err; } if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_pin_fence(vma))) { + err = i915_vma_pin_fence(vma); + if (unlikely(err)) { i915_vma_unpin(vma); - return false; + return err; } if (vma->fence) @@ -462,7 +472,10 @@ eb_pin_vma(struct i915_execbuffer *eb, } ev->flags |= __EXEC_OBJECT_HAS_PIN; - return !eb_vma_misplaced(entry, vma, ev->flags); + if (eb_vma_misplaced(entry, vma, ev->flags)) + return -EBADSLT; + + return 0; } static inline void @@ -483,6 +496,13 @@ eb_validate_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry, struct i915_vma *vma) { + /* Relocations are disallowed for all platforms after TGL-LP. This + * also covers all platforms with local memory. + */ + if (entry->relocation_count && + INTEL_GEN(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915)) + return -EINVAL; + if (unlikely(entry->flags & eb->invalid_flags)) return -EINVAL; @@ -853,6 +873,26 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) } eb_add_vma(eb, i, batch, vma); + + if (i915_gem_object_is_userptr(vma->obj)) { + err = i915_gem_object_userptr_submit_init(vma->obj); + if (err) { + if (i + 1 < eb->buffer_count) { + /* + * Execbuffer code expects last vma entry to be NULL, + * since we already initialized this entry, + * set the next value to NULL or we mess up + * cleanup handling. + */ + eb->vma[i + 1].vma = NULL; + } + + return err; + } + + eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT; + eb->args->flags |= __EXEC_USERPTR_USED; + } } if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) { @@ -898,7 +938,11 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) if (err) return err; - if (eb_pin_vma(eb, entry, ev)) { + err = eb_pin_vma(eb, entry, ev); + if (err == -EDEADLK) + return err; + + if (!err) { if (entry->offset != vma->node.start) { entry->offset = vma->node.start | UPDATE; eb->args->flags |= __EXEC_HAS_RELOC; @@ -914,6 +958,12 @@ static int eb_validate_vmas(struct i915_execbuffer *eb) } } + if (!(ev->flags & EXEC_OBJECT_WRITE)) { + err = dma_resv_reserve_shared(vma->resv, 1); + if (err) + return err; + } + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && eb_vma_misplaced(&eb->exec[i], vma, ev->flags)); } @@ -944,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(struct i915_execbuffer *eb, bool final) +static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -958,6 +1008,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final) eb_unreserve_vma(ev); + if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) { + ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT; + i915_gem_object_userptr_submit_fini(vma->obj); + } + if (final) i915_vma_put(vma); } @@ -1294,6 +1349,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, err = PTR_ERR(cmd); goto err_pool; } + intel_gt_buffer_pool_mark_used(pool); memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); @@ -1895,6 +1951,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb) return 0; } +static int eb_reinit_userptr(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int ret; + + if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))) + return 0; + + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + + if (!i915_gem_object_is_userptr(ev->vma->obj)) + continue; + + ret = i915_gem_object_userptr_submit_init(ev->vma->obj); + if (ret) + return ret; + + ev->flags |= __EXEC_OBJECT_USERPTR_INIT; + } + + return 0; +} + static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb, struct i915_request *rq) { @@ -1909,7 +1990,7 @@ repeat: } /* We may process another execbuffer during the unlock... */ - eb_release_vmas(eb, false); + eb_release_vmas(eb, false, true); i915_gem_ww_ctx_fini(&eb->ww); if (rq) { @@ -1951,7 +2032,7 @@ repeat: } if (!err) - flush_workqueue(eb->i915->mm.userptr_wq); + err = eb_reinit_userptr(eb); err_relock: i915_gem_ww_ctx_init(&eb->ww, true); @@ -2013,7 +2094,7 @@ repeat_validate: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false); + eb_release_vmas(eb, false, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto repeat_validate; @@ -2110,7 +2191,7 @@ retry: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false); + eb_release_vmas(eb, false, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto retry; @@ -2181,8 +2262,33 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) } if (err == 0) - err = i915_vma_move_to_active(vma, eb->request, flags); + err = i915_vma_move_to_active(vma, eb->request, + flags | __EXEC_OBJECT_NO_RESERVE); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { + spin_lock(&eb->i915->mm.notifier_lock); + + /* + * count is always at least 1, otherwise __EXEC_USERPTR_USED + * could not have been set + */ + for (i = 0; i < count; i++) { + struct eb_vma *ev = &eb->vma[i]; + struct drm_i915_gem_object *obj = ev->vma->obj; + + if (!i915_gem_object_is_userptr(obj)) + continue; + + err = i915_gem_object_userptr_submit_done(obj); + if (err) + break; + } + + spin_unlock(&eb->i915->mm.notifier_lock); } +#endif if (unlikely(err)) goto err_skip; @@ -2274,24 +2380,45 @@ struct eb_parse_work { struct i915_vma *trampoline; unsigned long batch_offset; unsigned long batch_length; + unsigned long *jump_whitelist; + const void *batch_map; + void *shadow_map; }; static int __eb_parse(struct dma_fence_work *work) { struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + int ret; + bool cookie; - return intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->trampoline); + cookie = dma_fence_begin_signalling(); + ret = intel_engine_cmd_parser(pw->engine, + pw->batch, + pw->batch_offset, + pw->batch_length, + pw->shadow, + pw->jump_whitelist, + pw->shadow_map, + pw->batch_map); + dma_fence_end_signalling(cookie); + + return ret; } static void __eb_parse_release(struct dma_fence_work *work) { struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + if (!IS_ERR_OR_NULL(pw->jump_whitelist)) + kfree(pw->jump_whitelist); + + if (pw->batch_map) + i915_gem_object_unpin_map(pw->batch->obj); + else + i915_gem_object_unpin_pages(pw->batch->obj); + + i915_gem_object_unpin_map(pw->shadow->obj); + if (pw->trampoline) i915_active_release(&pw->trampoline->active); i915_active_release(&pw->shadow->active); @@ -2341,6 +2468,8 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, struct i915_vma *trampoline) { struct eb_parse_work *pw; + struct drm_i915_gem_object *batch = eb->batch->vma->obj; + bool needs_clflush; int err; GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); @@ -2364,6 +2493,34 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, goto err_shadow; } + pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); + if (IS_ERR(pw->shadow_map)) { + err = PTR_ERR(pw->shadow_map); + goto err_trampoline; + } + + needs_clflush = + !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); + + pw->batch_map = ERR_PTR(-ENODEV); + if (needs_clflush && i915_has_memcpy_from_wc()) + pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); + + if (IS_ERR(pw->batch_map)) { + err = i915_gem_object_pin_pages(batch); + if (err) + goto err_unmap_shadow; + pw->batch_map = NULL; + } + + pw->jump_whitelist = + intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, + trampoline); + if (IS_ERR(pw->jump_whitelist)) { + err = PTR_ERR(pw->jump_whitelist); + goto err_unmap_batch; + } + dma_fence_work_init(&pw->base, &eb_parse_ops); pw->engine = eb->engine; @@ -2382,6 +2539,10 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, if (err) goto err_commit; + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_commit; + /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, @@ -2403,6 +2564,16 @@ err_commit: dma_fence_work_commit_imm(&pw->base); return err; +err_unmap_batch: + if (pw->batch_map) + i915_gem_object_unpin_map(batch); + else + i915_gem_object_unpin_pages(batch); +err_unmap_shadow: + i915_gem_object_unpin_map(shadow->obj); +err_trampoline: + if (trampoline) + i915_active_release(&trampoline->active); err_shadow: i915_active_release(&shadow->active); err_batch: @@ -2474,6 +2645,7 @@ static int eb_parse(struct i915_execbuffer *eb) err = PTR_ERR(shadow); goto err; } + intel_gt_buffer_pool_mark_used(pool); i915_gem_object_set_readonly(shadow->obj); shadow->private = pool; @@ -3263,7 +3435,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_lookup_vmas(&eb); if (err) { - eb_release_vmas(&eb, true); + eb_release_vmas(&eb, true, true); goto err_engine; } @@ -3335,6 +3507,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); + err_request: i915_request_get(eb.request); err = eb_request_add(&eb, err); @@ -3355,7 +3528,7 @@ err_request: i915_request_put(eb.request); err_vma: - eb_release_vmas(&eb, true); + eb_release_vmas(&eb, true, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); WARN_ON(err == -EDEADLK); @@ -3401,106 +3574,6 @@ static bool check_buffer_count(size_t count) return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); } -/* - * Legacy execbuffer just creates an exec2 list from the original exec object - * list array and passes it to the real function. - */ -int -i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_execbuffer *args = data; - struct drm_i915_gem_execbuffer2 exec2; - struct drm_i915_gem_exec_object *exec_list = NULL; - struct drm_i915_gem_exec_object2 *exec2_list = NULL; - const size_t count = args->buffer_count; - unsigned int i; - int err; - - if (!check_buffer_count(count)) { - drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count); - return -EINVAL; - } - - exec2.buffers_ptr = args->buffers_ptr; - exec2.buffer_count = args->buffer_count; - exec2.batch_start_offset = args->batch_start_offset; - exec2.batch_len = args->batch_len; - exec2.DR1 = args->DR1; - exec2.DR4 = args->DR4; - exec2.num_cliprects = args->num_cliprects; - exec2.cliprects_ptr = args->cliprects_ptr; - exec2.flags = I915_EXEC_RENDER; - i915_execbuffer2_set_context_id(exec2, 0); - - err = i915_gem_check_execbuffer(&exec2); - if (err) - return err; - - /* Copy in the exec list from userland */ - exec_list = kvmalloc_array(count, sizeof(*exec_list), - __GFP_NOWARN | GFP_KERNEL); - - /* Allocate extra slots for use by the command parser */ - exec2_list = kvmalloc_array(count + 2, eb_element_size(), - __GFP_NOWARN | GFP_KERNEL); - if (exec_list == NULL || exec2_list == NULL) { - drm_dbg(&i915->drm, - "Failed to allocate exec list for %d buffers\n", - args->buffer_count); - kvfree(exec_list); - kvfree(exec2_list); - return -ENOMEM; - } - err = copy_from_user(exec_list, - u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec_list) * count); - if (err) { - drm_dbg(&i915->drm, "copy %d exec entries failed %d\n", - args->buffer_count, err); - kvfree(exec_list); - kvfree(exec2_list); - return -EFAULT; - } - - for (i = 0; i < args->buffer_count; i++) { - exec2_list[i].handle = exec_list[i].handle; - exec2_list[i].relocation_count = exec_list[i].relocation_count; - exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; - exec2_list[i].alignment = exec_list[i].alignment; - exec2_list[i].offset = exec_list[i].offset; - if (INTEL_GEN(to_i915(dev)) < 4) - exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; - else - exec2_list[i].flags = 0; - } - - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); - if (exec2.flags & __EXEC_HAS_RELOC) { - struct drm_i915_gem_exec_object __user *user_exec_list = - u64_to_user_ptr(args->buffers_ptr); - - /* Copy the new buffer offsets back to the user's exec list. */ - for (i = 0; i < args->buffer_count; i++) { - if (!(exec2_list[i].offset & UPDATE)) - continue; - - exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); - exec2_list[i].offset &= PIN_OFFSET_MASK; - if (__copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset))) - break; - } - } - - kvfree(exec_list); - kvfree(exec2_list); - return err; -} - int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c deleted file mode 100644 index 8ab842c80f99..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "i915_gem_object.h" - -struct stub_fence { - struct dma_fence dma; - struct i915_sw_fence chain; -}; - -static int __i915_sw_fence_call -stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - struct stub_fence *stub = container_of(fence, typeof(*stub), chain); - - switch (state) { - case FENCE_COMPLETE: - dma_fence_signal(&stub->dma); - break; - - case FENCE_FREE: - dma_fence_put(&stub->dma); - break; - } - - return NOTIFY_DONE; -} - -static const char *stub_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *stub_timeline_name(struct dma_fence *fence) -{ - return "object"; -} - -static void stub_release(struct dma_fence *fence) -{ - struct stub_fence *stub = container_of(fence, typeof(*stub), dma); - - i915_sw_fence_fini(&stub->chain); - - BUILD_BUG_ON(offsetof(typeof(*stub), dma)); - dma_fence_free(&stub->dma); -} - -static const struct dma_fence_ops stub_fence_ops = { - .get_driver_name = stub_driver_name, - .get_timeline_name = stub_timeline_name, - .release = stub_release, -}; - -struct dma_fence * -i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) -{ - struct stub_fence *stub; - - assert_object_held(obj); - - stub = kmalloc(sizeof(*stub), GFP_KERNEL); - if (!stub) - return NULL; - - i915_sw_fence_init(&stub->chain, stub_notify); - dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock, - 0, 0); - - if (i915_sw_fence_await_reservation(&stub->chain, - obj->base.resv, NULL, true, - i915_fence_timeout(to_i915(obj->base.dev)), - I915_FENCE_GFP) < 0) - goto err; - - dma_resv_add_excl_fence(obj->base.resv, &stub->dma); - - return &stub->dma; - -err: - stub_release(&stub->dma); - return NULL; -} - -void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, - struct dma_fence *fence) -{ - struct stub_fence *stub = container_of(fence, typeof(*stub), dma); - - i915_sw_fence_commit(&stub->chain); -} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ad22f42541bd..21cc40897ca8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -138,8 +138,7 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { .name = "i915_gem_object_internal", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = i915_gem_object_get_pages_internal, .put_pages = i915_gem_object_put_pages_internal, }; @@ -178,7 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h index 87d8b27f426d..7fd22f3efbef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h @@ -14,8 +14,6 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 194f35342710..ce1c83c13d05 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -40,13 +40,13 @@ int __i915_gem_lmem_object_init(struct intel_memory_region *mem, struct drm_i915_private *i915 = mem->i915; drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class, flags); obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - i915_gem_object_init_memory_region(obj, mem, flags); + i915_gem_object_init_memory_region(obj, mem); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index ec28a6cde49b..2561a2f1e54f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -246,12 +246,15 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) area->vm_flags & VM_WRITE)) return VM_FAULT_SIGBUS; + if (i915_gem_object_lock_interruptible(obj, NULL)) + return VM_FAULT_NOPAGE; + err = i915_gem_object_pin_pages(obj); if (err) goto out; iomap = -1; - if (!i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE)) { + if (!i915_gem_object_has_struct_page(obj)) { iomap = obj->mm.region->iomap.base; iomap -= obj->mm.region->region.start; } @@ -269,6 +272,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) i915_gem_object_unpin_pages(obj); out: + i915_gem_object_unlock(obj); return i915_error_to_vmf_fault(err); } @@ -417,7 +421,9 @@ vm_access(struct vm_area_struct *area, unsigned long addr, { struct i915_mmap_offset *mmo = area->vm_private_data; struct drm_i915_gem_object *obj = mmo->obj; + struct i915_gem_ww_ctx ww; void *vaddr; + int err = 0; if (i915_gem_object_is_readonly(obj) && write) return -EACCES; @@ -426,10 +432,18 @@ vm_access(struct vm_area_struct *area, unsigned long addr, if (addr >= obj->base.size) return -EINVAL; + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (err) + goto out; + /* As this is primarily for debugging, let's focus on simplicity */ vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out; + } if (write) { memcpy(vaddr + addr, buf, len); @@ -439,6 +453,16 @@ vm_access(struct vm_area_struct *area, unsigned long addr, } i915_gem_object_unpin_map(obj); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (err) + return err; return len; } @@ -653,9 +677,8 @@ __assign_mmap_offset(struct drm_file *file, } if (mmap_type != I915_MMAP_TYPE_GTT && - !i915_gem_object_type_has(obj, - I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_HAS_IOMEM)) { + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) { err = -ENODEV; goto out; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 70f798405f7f..ea74cbca95be 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -60,10 +60,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops, - struct lock_class_key *key) + struct lock_class_key *key, unsigned flags) { - __mutex_init(&obj->mm.lock, ops->name ?: "obj->mm.lock", key); - spin_lock_init(&obj->vma.lock); INIT_LIST_HEAD(&obj->vma.list); @@ -78,16 +76,14 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, init_rcu_head(&obj->rcu); obj->ops = ops; + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + obj->flags = flags; obj->mm.madv = I915_MADV_WILLNEED; INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_page.lock); INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->mm.get_dma_page.lock); - - if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj)) - i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev), - &obj->mm.lock); } /** diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d0ae834d787a..2ebd79537aea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -16,6 +16,32 @@ #include "i915_gem_gtt.h" #include "i915_vma_types.h" +/* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + * + * Aside from our own locals (for which we have no excuse!): + * - sg_table embeds unsigned int for num_pages + * - get_user_pages*() mixed ints with longs + */ +#define GEM_CHECK_SIZE_OVERFLOW(sz) \ + GEM_WARN_ON((sz) >> PAGE_SHIFT > INT_MAX) + +static inline bool i915_gem_object_size_2big(u64 size) +{ + struct drm_i915_gem_object *obj; + + if (GEM_CHECK_SIZE_OVERFLOW(size)) + return true; + + if (overflows_type(size, obj->base.size)) + return true; + + return false; +} + void i915_gem_init__objects(struct drm_i915_private *i915); struct drm_i915_gem_object *i915_gem_object_alloc(void); @@ -23,7 +49,8 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops, - struct lock_class_key *key); + struct lock_class_key *key, + unsigned alloc_flags); struct drm_i915_gem_object * i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size); @@ -32,11 +59,21 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, const void *data, resource_size_t size); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; + void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages, bool needs_clflush); +int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args); +int i915_gem_object_pread_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args); + int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align); +void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, + struct sg_table *pages); +void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, + struct sg_table *pages); void i915_gem_flush_free_objects(struct drm_i915_private *i915); @@ -107,6 +144,20 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv) +/* + * If more than one potential simultaneous locker, assert held. + */ +static inline void assert_object_held_shared(struct drm_i915_gem_object *obj) +{ + /* + * Note mm list lookup is protected by + * kref_get_unless_zero(). + */ + if (IS_ENABLED(CONFIG_LOCKDEP) && + kref_read(&obj->base.refcount) > 0) + assert_object_held(obj); +} + static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, struct i915_gem_ww_ctx *ww, bool intr) @@ -152,11 +203,6 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) dma_resv_unlock(obj->base.resv); } -struct dma_fence * -i915_gem_object_lock_fence(struct drm_i915_gem_object *obj); -void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj, - struct dma_fence *fence); - static inline void i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) { @@ -215,7 +261,7 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj, static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE); + return obj->flags & I915_BO_ALLOC_STRUCT_PAGE; } static inline bool @@ -243,12 +289,6 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj) } static inline bool -i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL); -} - -static inline bool i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) { return READ_ONCE(obj->frontbuffer); @@ -299,22 +339,22 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset); + unsigned int *offset, bool allow_alloc); static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset) + unsigned int *offset, bool allow_alloc) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset); + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc); } static inline struct scatterlist * i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset) + unsigned int *offset, bool allow_alloc) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset); + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc); } struct page * @@ -341,27 +381,10 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); -enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ - I915_MM_NORMAL = 0, - /* - * Only used by struct_mutex, when called "recursively" from - * direct-reclaim-esque. Safe because there is only every one - * struct_mutex in the entire system. - */ - I915_MM_SHRINKER = 1, - /* - * Used for obj->mm.lock when allocating pages. Safe because the object - * isn't yet on any LRU, and therefore the shrinker can't deadlock on - * it. As soon as the object has pages, obj->mm.lock nests within - * fs_reclaim. - */ - I915_MM_GET_PAGES = 1, -}; - static inline int __must_check i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - might_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); + assert_object_held(obj); if (atomic_inc_not_zero(&obj->mm.pages_pin_count)) return 0; @@ -369,6 +392,8 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) return __i915_gem_object_get_pages(obj); } +int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj); + static inline bool i915_gem_object_has_pages(struct drm_i915_gem_object *obj) { @@ -427,6 +452,9 @@ void i915_gem_object_writeback(struct drm_i915_gem_object *obj); void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type); +void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, + enum i915_map_type type); + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size); @@ -495,6 +523,7 @@ int __must_check i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, u32 alignment, const struct i915_ggtt_view *view, unsigned int flags); @@ -558,4 +587,25 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); +#ifdef CONFIG_MMU_NOTIFIER +static inline bool +i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) +{ + return obj->userptr.notifier.mm; +} + +int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj); +int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj); +void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj); +int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj); +#else +static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; } + +static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } +static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } +static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); } +static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } + +#endif + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index d6dac21fce0b..df8e8c18c6c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -55,6 +55,9 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; + /* we pinned the pool, mark it as such */ + intel_gt_buffer_pool_mark_used(pool); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); @@ -277,6 +280,9 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, if (unlikely(err)) goto out_put; + /* we pinned the pool, mark it as such */ + intel_gt_buffer_pool_mark_used(pool); + cmd = i915_gem_object_pin_map(pool->obj, pool->type); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 0438e00d4ca7..8e485cb3343c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -7,6 +7,8 @@ #ifndef __I915_GEM_OBJECT_TYPES_H__ #define __I915_GEM_OBJECT_TYPES_H__ +#include <linux/mmu_notifier.h> + #include <drm/drm_gem.h> #include <uapi/drm/i915_drm.h> @@ -30,12 +32,10 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_HAS_IOMEM BIT(1) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) #define I915_GEM_OBJECT_IS_PROXY BIT(3) #define I915_GEM_OBJECT_NO_MMAP BIT(4) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -171,9 +171,12 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) -#define I915_BO_READONLY BIT(2) -#define I915_TILING_QUIRK_BIT 3 /* unknown swizzling; do not release! */ +#define I915_BO_ALLOC_STRUCT_PAGE BIT(2) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ + I915_BO_ALLOC_VOLATILE | \ + I915_BO_ALLOC_STRUCT_PAGE) +#define I915_BO_READONLY BIT(3) +#define I915_TILING_QUIRK_BIT 4 /* unknown swizzling; do not release! */ /* * Is the object to be mapped as read-only to the GPU @@ -213,7 +216,6 @@ struct drm_i915_gem_object { * Protects the pages and their use. Do not use directly, but * instead go through the pin/unpin interfaces. */ - struct mutex lock; atomic_t pages_pin_count; atomic_t shrink_pin; @@ -288,13 +290,16 @@ struct drm_i915_gem_object { unsigned long *bit_17; union { +#ifdef CONFIG_MMU_NOTIFIER struct i915_gem_userptr { uintptr_t ptr; + unsigned long notifier_seq; - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; + struct mmu_interval_notifier notifier; + struct page **pvec; + int page_ref; } userptr; +#endif struct drm_mm_node *stolen; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 43028f3539a6..aed8a37ccdc9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -19,7 +19,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, bool shrinkable; int i; - lockdep_assert_held(&obj->mm.lock); + assert_object_held_shared(obj); if (i915_gem_object_is_volatile(obj)) obj->mm.madv = I915_MADV_DONTNEED; @@ -70,6 +70,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct list_head *list; unsigned long flags; + assert_object_held(obj); spin_lock_irqsave(&i915->mm.obj_lock, flags); i915->mm.shrink_count++; @@ -91,6 +92,8 @@ int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); int err; + assert_object_held_shared(obj); + if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { drm_dbg(&i915->drm, "Attempting to obtain a purgeable object\n"); @@ -114,23 +117,41 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { int err; - err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); - if (err) - return err; + assert_object_held(obj); + + assert_object_held_shared(obj); if (unlikely(!i915_gem_object_has_pages(obj))) { GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); if (err) - goto unlock; + return err; smp_mb__before_atomic(); } atomic_inc(&obj->mm.pages_pin_count); -unlock: - mutex_unlock(&obj->mm.lock); + return 0; +} + +int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (!err) + err = i915_gem_object_pin_pages(obj); + + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); return err; } @@ -145,7 +166,7 @@ void i915_gem_object_truncate(struct drm_i915_gem_object *obj) /* Try to discard unwanted pages */ void i915_gem_object_writeback(struct drm_i915_gem_object *obj) { - lockdep_assert_held(&obj->mm.lock); + assert_object_held_shared(obj); GEM_BUG_ON(i915_gem_object_has_pages(obj)); if (obj->ops->writeback) @@ -176,6 +197,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; + assert_object_held_shared(obj); + pages = fetch_and_zero(&obj->mm.pages); if (IS_ERR_OR_NULL(pages)) return pages; @@ -199,17 +222,12 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; - int err; if (i915_gem_object_has_pinned_pages(obj)) return -EBUSY; /* May be called by shrinker from within get_pages() (on another bo) */ - mutex_lock(&obj->mm.lock); - if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { - err = -EBUSY; - goto unlock; - } + assert_object_held_shared(obj); i915_gem_object_release_mmap_offset(obj); @@ -226,17 +244,10 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) * get_pages backends we should be better able to handle the * cancellation of the async task in a more uniform manner. */ - if (!pages && !i915_gem_object_needs_async_cancel(obj)) - pages = ERR_PTR(-EINVAL); - - if (!IS_ERR(pages)) + if (!IS_ERR_OR_NULL(pages)) obj->ops->put_pages(obj, pages); - err = 0; -unlock: - mutex_unlock(&obj->mm.lock); - - return err; + return 0; } /* The 'mapping' part of i915_gem_object_pin_map() below */ @@ -333,18 +344,15 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type) { enum i915_map_type has_type; - unsigned int flags; bool pinned; void *ptr; int err; - flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; - if (!i915_gem_object_type_has(obj, flags)) + if (!i915_gem_object_has_struct_page(obj) && + !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) return ERR_PTR(-ENXIO); - err = mutex_lock_interruptible_nested(&obj->mm.lock, I915_MM_GET_PAGES); - if (err) - return ERR_PTR(err); + assert_object_held(obj); pinned = !(type & I915_MAP_OVERRIDE); type &= ~I915_MAP_OVERRIDE; @@ -354,10 +362,8 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); - if (err) { - ptr = ERR_PTR(err); - goto out_unlock; - } + if (err) + return ERR_PTR(err); smp_mb__before_atomic(); } @@ -392,13 +398,23 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, obj->mm.mapping = page_pack_bits(ptr, type); } -out_unlock: - mutex_unlock(&obj->mm.lock); return ptr; err_unpin: atomic_dec(&obj->mm.pages_pin_count); - goto out_unlock; + return ptr; +} + +void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, + enum i915_map_type type) +{ + void *ret; + + i915_gem_object_lock(obj, NULL); + ret = i915_gem_object_pin_map(obj, type); + i915_gem_object_unlock(obj); + + return ret; } void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, @@ -448,7 +464,8 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset) + unsigned int *offset, + bool allow_alloc) { const bool dma = iter == &obj->mm.get_dma_page; struct scatterlist *sg; @@ -470,6 +487,9 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (n < READ_ONCE(iter->sg_idx)) goto lookup; + if (!allow_alloc) + goto manual_lookup; + mutex_lock(&iter->lock); /* We prefer to reuse the last sg so that repeated lookup of this @@ -519,7 +539,16 @@ scan: if (unlikely(n < idx)) /* insertion completed by another thread */ goto lookup; - /* In case we failed to insert the entry into the radixtree, we need + goto manual_walk; + +manual_lookup: + idx = 0; + sg = obj->mm.pages->sgl; + count = __sg_page_count(sg); + +manual_walk: + /* + * In case we failed to insert the entry into the radixtree, we need * to look beyond the current sg. */ while (idx + count <= n) { @@ -566,7 +595,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - sg = i915_gem_object_get_sg(obj, n, &offset); + sg = i915_gem_object_get_sg(obj, n, &offset, true); return nth_page(sg_page(sg), offset); } @@ -592,7 +621,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg_dma(obj, n, &offset); + sg = i915_gem_object_get_sg_dma(obj, n, &offset, true); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 01fe89afe8c0..81dc2bf59bc3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -76,6 +76,8 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + /* We're no longer struct page backed */ + obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE; __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -89,7 +91,7 @@ err_pci: return -ENOMEM; } -static void +void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, struct sg_table *pages) { @@ -134,9 +136,8 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, vaddr, dma); } -static int -phys_pwrite(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_pwrite *args) +int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) { void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); @@ -165,9 +166,8 @@ phys_pwrite(struct drm_i915_gem_object *obj, return 0; } -static int -phys_pread(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_pread *args) +int i915_gem_object_pread_phys(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) { void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); @@ -186,62 +186,14 @@ phys_pread(struct drm_i915_gem_object *obj, return 0; } -static void phys_release(struct drm_i915_gem_object *obj) -{ - fput(obj->base.filp); -} - -static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { - .name = "i915_gem_object_phys", - .get_pages = i915_gem_object_get_pages_phys, - .put_pages = i915_gem_object_put_pages_phys, - - .pread = phys_pread, - .pwrite = phys_pwrite, - - .release = phys_release, -}; - -int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) +static int i915_gem_object_shmem_to_phys(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; - if (align > obj->base.size) - return -EINVAL; - - if (obj->ops == &i915_gem_phys_ops) - return 0; - - if (!i915_gem_object_is_shmem(obj)) - return -EINVAL; - - err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); - if (err) - return err; - - mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); - - if (obj->mm.madv != I915_MADV_WILLNEED) { - err = -EFAULT; - goto err_unlock; - } - - if (i915_gem_object_has_tiling_quirk(obj)) { - err = -EFAULT; - goto err_unlock; - } - - if (obj->mm.mapping) { - err = -EBUSY; - goto err_unlock; - } - pages = __i915_gem_object_unset_pages(obj); - obj->ops = &i915_gem_phys_ops; - - err = ____i915_gem_object_get_pages(obj); + err = i915_gem_object_get_pages_phys(obj); if (err) goto err_xfer; @@ -249,25 +201,57 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) __i915_gem_object_pin_pages(obj); if (!IS_ERR_OR_NULL(pages)) - i915_gem_shmem_ops.put_pages(obj, pages); + i915_gem_object_put_pages_shmem(obj, pages); i915_gem_object_release_memory_region(obj); - - mutex_unlock(&obj->mm.lock); return 0; err_xfer: - obj->ops = &i915_gem_shmem_ops; if (!IS_ERR_OR_NULL(pages)) { unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); __i915_gem_object_set_pages(obj, pages, sg_page_sizes); } -err_unlock: - mutex_unlock(&obj->mm.lock); return err; } +int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) +{ + int err; + + assert_object_held(obj); + + if (align > obj->base.size) + return -EINVAL; + + if (!i915_gem_object_is_shmem(obj)) + return -EINVAL; + + if (!i915_gem_object_has_struct_page(obj)) + return 0; + + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (err) + return err; + + if (obj->mm.madv != I915_MADV_WILLNEED) + return -EFAULT; + + if (i915_gem_object_has_tiling_quirk(obj)) + return -EFAULT; + + if (obj->mm.mapping || i915_gem_object_has_pinned_pages(obj)) + return -EBUSY; + + if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { + drm_dbg(obj->base.dev, + "Attempting to obtain a purgeable object\n"); + return -EFAULT; + } + + return i915_gem_object_shmem_to_phys(obj); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_phys.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 000e1cd8e920..8b9d7d14c4bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -116,7 +116,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) - i915_gem_shrink(i915, -1UL, NULL, ~0); + i915_gem_shrink(NULL, i915, -1UL, NULL, ~0); i915_gem_drain_freed_objects(i915); wbinvd_on_all_cpus(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 3e3dad22a683..6a84fb6dde24 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -106,13 +106,11 @@ err_free_sg: } void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, - struct intel_memory_region *mem, - unsigned long flags) + struct intel_memory_region *mem) { INIT_LIST_HEAD(&obj->mm.blocks); obj->mm.region = intel_memory_region_get(mem); - obj->flags |= flags; if (obj->base.size <= mem->min_page_size) obj->flags |= I915_BO_ALLOC_CONTIGUOUS; @@ -161,17 +159,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); - /* - * XXX: There is a prevalence of the assumption that we fit the - * object's page count inside a 32bit _signed_ variable. Let's document - * this and catch if we ever need to fix it. In the meantime, if you do - * spot such a local variable, please consider fixing! - */ - - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) + if (i915_gem_object_size_2big(size)) return ERR_PTR(-E2BIG); obj = i915_gem_object_alloc(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index f2ff6f8bff74..ebddc86d78f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -17,8 +17,7 @@ void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, struct sg_table *pages); void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, - struct intel_memory_region *mem, - unsigned long flags); + struct intel_memory_region *mem); void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 680b370a8ef3..a9bfa66c8da1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -99,7 +99,7 @@ rebuild_st: goto err_sg; } - i915_gem_shrink(i915, 2 * page_count, NULL, *s++); + i915_gem_shrink(NULL, i915, 2 * page_count, NULL, *s++); /* * We've tried hard to allocate the memory by reaping @@ -296,8 +296,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, __start_cpu_write(obj); } -static void -shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) +void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) { struct sgt_iter sgt_iter; struct pagevec pvec; @@ -331,6 +330,15 @@ shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) kfree(pages); } +static void +shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) +{ + if (likely(i915_gem_object_has_struct_page(obj))) + i915_gem_object_put_pages_shmem(obj, pages); + else + i915_gem_object_put_pages_phys(obj, pages); +} + static int shmem_pwrite(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg) @@ -343,6 +351,9 @@ shmem_pwrite(struct drm_i915_gem_object *obj, /* Caller already validated user args */ GEM_BUG_ON(!access_ok(user_data, arg->size)); + if (!i915_gem_object_has_struct_page(obj)) + return i915_gem_object_pwrite_phys(obj, arg); + /* * Before we instantiate/pin the backing store for our use, we * can prepopulate the shmemfs filp efficiently using a write into @@ -421,17 +432,27 @@ shmem_pwrite(struct drm_i915_gem_object *obj, return 0; } +static int +shmem_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *arg) +{ + if (!i915_gem_object_has_struct_page(obj)) + return i915_gem_object_pread_phys(obj, arg); + + return -ENODEV; +} + static void shmem_release(struct drm_i915_gem_object *obj) { - i915_gem_object_release_memory_region(obj); + if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE) + i915_gem_object_release_memory_region(obj); fput(obj->base.filp); } const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .name = "i915_gem_object_shmem", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = shmem_get_pages, .put_pages = shmem_put_pages, @@ -439,6 +460,7 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .writeback = shmem_writeback, .pwrite = shmem_pwrite, + .pread = shmem_pread, .release = shmem_release, }; @@ -491,7 +513,8 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -515,7 +538,7 @@ static int shmem_object_init(struct intel_memory_region *mem, i915_gem_object_set_cache_coherency(obj, cache_level); - i915_gem_object_init_memory_region(obj, mem, 0); + i915_gem_object_init_memory_region(obj, mem); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index c2dba1cd9532..3e248d3bd869 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -49,9 +49,9 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj, flags = I915_GEM_OBJECT_UNBIND_TEST; if (i915_gem_object_unbind(obj, flags) == 0) - __i915_gem_object_put_pages(obj); + return true; - return !i915_gem_object_has_pages(obj); + return false; } static void try_to_writeback(struct drm_i915_gem_object *obj, @@ -94,7 +94,8 @@ static void try_to_writeback(struct drm_i915_gem_object *obj, * The number of pages of backing storage actually released. */ unsigned long -i915_gem_shrink(struct drm_i915_private *i915, +i915_gem_shrink(struct i915_gem_ww_ctx *ww, + struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned int shrink) @@ -113,6 +114,7 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; + int err; trace_i915_gem_shrink(i915, target, shrink); @@ -200,25 +202,40 @@ i915_gem_shrink(struct drm_i915_private *i915, spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + err = 0; if (unsafe_drop_pages(obj, shrink)) { /* May arrive from get_pages on another bo */ - mutex_lock(&obj->mm.lock); - if (!i915_gem_object_has_pages(obj)) { + if (!ww) { + if (!i915_gem_object_trylock(obj)) + goto skip; + } else { + err = i915_gem_object_lock(obj, ww); + if (err) + goto skip; + } + + if (!__i915_gem_object_put_pages(obj)) { try_to_writeback(obj, shrink); count += obj->base.size >> PAGE_SHIFT; } - mutex_unlock(&obj->mm.lock); + if (!ww) + i915_gem_object_unlock(obj); } dma_resv_prune(obj->base.resv); scanned += obj->base.size >> PAGE_SHIFT; +skip: i915_gem_object_put(obj); spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (err) + break; } list_splice_tail(&still_in_list, phase->list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); + if (err) + return err; } if (shrink & I915_SHRINK_BOUND) @@ -249,7 +266,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) unsigned long freed = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - freed = i915_gem_shrink(i915, -1UL, NULL, + freed = i915_gem_shrink(NULL, i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); } @@ -295,7 +312,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) sc->nr_scanned = 0; - freed = i915_gem_shrink(i915, + freed = i915_gem_shrink(NULL, i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | @@ -304,7 +321,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) intel_wakeref_t wakeref; with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - freed += i915_gem_shrink(i915, + freed += i915_gem_shrink(NULL, i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_ACTIVE | @@ -329,7 +346,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, + freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -367,7 +384,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr intel_wakeref_t wakeref; with_intel_runtime_pm(&i915->runtime_pm, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, + freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_VMAPS); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h index b397d7785789..8512470f6fd6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h @@ -9,10 +9,12 @@ #include <linux/bits.h> struct drm_i915_private; +struct i915_gem_ww_ctx; struct mutex; /* i915_gem_shrinker.c */ -unsigned long i915_gem_shrink(struct drm_i915_private *i915, +unsigned long i915_gem_shrink(struct i915_gem_ww_ctx *ww, + struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index a1e197a6e999..b0597de206de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -630,20 +630,22 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, int err; drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class, 0); obj->stolen = stolen; obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - err = i915_gem_object_pin_pages(obj); - if (err) - return err; + if (WARN_ON(!i915_gem_object_trylock(obj))) + return -EBUSY; - i915_gem_object_init_memory_region(obj, mem, 0); + err = i915_gem_object_pin_pages(obj); + if (!err) + i915_gem_object_init_memory_region(obj, mem); + i915_gem_object_unlock(obj); - return 0; + return err; } static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, @@ -686,7 +688,7 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN], + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_STOLEN_SMEM], size, I915_BO_ALLOC_CONTIGUOUS); } @@ -726,7 +728,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *i915, resource_size_t stolen_offset, resource_size_t size) { - struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN]; + struct intel_memory_region *mem = i915->mm.regions[INTEL_REGION_STOLEN_SMEM]; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index d589d3d81085..9e8945013090 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -265,7 +265,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. */ - mutex_lock(&obj->mm.lock); if (i915_gem_object_has_pages(obj) && obj->mm.madv == I915_MADV_WILLNEED && i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { @@ -280,7 +279,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, i915_gem_object_set_tiling_quirk(obj); } } - mutex_unlock(&obj->mm.lock); spin_lock(&obj->vma.lock); for_each_ggtt_vma(vma, obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index f2eaed6aca3d..a657b99ec760 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -2,10 +2,39 @@ * SPDX-License-Identifier: MIT * * Copyright © 2012-2014 Intel Corporation + * + * Based on amdgpu_mn, which bears the following notice: + * + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König <christian.koenig@amd.com> */ #include <linux/mmu_context.h> -#include <linux/mmu_notifier.h> #include <linux/mempolicy.h> #include <linux/swap.h> #include <linux/sched/mm.h> @@ -15,408 +44,121 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" -struct i915_mm_struct { - struct mm_struct *mm; - struct drm_i915_private *i915; - struct i915_mmu_notifier *mn; - struct hlist_node node; - struct kref kref; - struct rcu_work work; -}; - -#if defined(CONFIG_MMU_NOTIFIER) -#include <linux/interval_tree.h> +#ifdef CONFIG_MMU_NOTIFIER -struct i915_mmu_notifier { - spinlock_t lock; - struct hlist_node node; - struct mmu_notifier mn; - struct rb_root_cached objects; - struct i915_mm_struct *mm; -}; - -struct i915_mmu_object { - struct i915_mmu_notifier *mn; - struct drm_i915_gem_object *obj; - struct interval_tree_node it; -}; - -static void add_object(struct i915_mmu_object *mo) +/** + * i915_gem_userptr_invalidate - callback to notify about mm change + * + * @mni: the range (mm) is about to update + * @range: details on the invalidation + * @cur_seq: Value to pass to mmu_interval_set_seq() + * + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. + */ +static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { - GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); - interval_tree_insert(&mo->it, &mo->mn->objects); -} + struct drm_i915_gem_object *obj = container_of(mni, struct drm_i915_gem_object, userptr.notifier); + struct drm_i915_private *i915 = to_i915(obj->base.dev); + long r; -static void del_object(struct i915_mmu_object *mo) -{ - if (RB_EMPTY_NODE(&mo->it.rb)) - return; + if (!mmu_notifier_range_blockable(range)) + return false; - interval_tree_remove(&mo->it, &mo->mn->objects); - RB_CLEAR_NODE(&mo->it.rb); -} + spin_lock(&i915->mm.notifier_lock); -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ - struct i915_mmu_object *mo = obj->userptr.mmu_object; + mmu_interval_set_seq(mni, cur_seq); + + spin_unlock(&i915->mm.notifier_lock); /* - * During mm_invalidate_range we need to cancel any userptr that - * overlaps the range being invalidated. Doing so requires the - * struct_mutex, and that risks recursion. In order to cause - * recursion, the user must alias the userptr address space with - * a GTT mmapping (possible with a MAP_FIXED) - then when we have - * to invalidate that mmaping, mm_invalidate_range is called with - * the userptr address *and* the struct_mutex held. To prevent that - * we set a flag under the i915_mmu_notifier spinlock to indicate - * whether this object is valid. + * We don't wait when the process is exiting. This is valid + * because the object will be cleaned up anyway. + * + * This is also temporarily required as a hack, because we + * cannot currently force non-consistent batch buffers to preempt + * and reschedule by waiting on it, hanging processes on exit. */ - if (!mo) - return; + if (current->flags & PF_EXITING) + return true; - spin_lock(&mo->mn->lock); - if (value) - add_object(mo); - else - del_object(mo); - spin_unlock(&mo->mn->lock); -} - -static int -userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, - const struct mmu_notifier_range *range) -{ - struct i915_mmu_notifier *mn = - container_of(_mn, struct i915_mmu_notifier, mn); - struct interval_tree_node *it; - unsigned long end; - int ret = 0; - - if (RB_EMPTY_ROOT(&mn->objects.rb_root)) - return 0; - - /* interval ranges are inclusive, but invalidate range is exclusive */ - end = range->end - 1; - - spin_lock(&mn->lock); - it = interval_tree_iter_first(&mn->objects, range->start, end); - while (it) { - struct drm_i915_gem_object *obj; - - if (!mmu_notifier_range_blockable(range)) { - ret = -EAGAIN; - break; - } - - /* - * The mmu_object is released late when destroying the - * GEM object so it is entirely possible to gain a - * reference on an object in the process of being freed - * since our serialisation is via the spinlock and not - * the struct_mutex - and consequently use it after it - * is freed and then double free it. To prevent that - * use-after-free we only acquire a reference on the - * object if it is not in the process of being destroyed. - */ - obj = container_of(it, struct i915_mmu_object, it)->obj; - if (!kref_get_unless_zero(&obj->base.refcount)) { - it = interval_tree_iter_next(it, range->start, end); - continue; - } - spin_unlock(&mn->lock); - - ret = i915_gem_object_unbind(obj, - I915_GEM_OBJECT_UNBIND_ACTIVE | - I915_GEM_OBJECT_UNBIND_BARRIER); - if (ret == 0) - ret = __i915_gem_object_put_pages(obj); - i915_gem_object_put(obj); - if (ret) - return ret; - - spin_lock(&mn->lock); - - /* - * As we do not (yet) protect the mmu from concurrent insertion - * over this range, there is no guarantee that this search will - * terminate given a pathologic workload. - */ - it = interval_tree_iter_first(&mn->objects, range->start, end); - } - spin_unlock(&mn->lock); - - return ret; + /* we will unbind on next submission, still have userptr pins */ + r = dma_resv_wait_timeout_rcu(obj->base.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + if (r <= 0) + drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); + return true; } -static const struct mmu_notifier_ops i915_gem_userptr_notifier = { - .invalidate_range_start = userptr_mn_invalidate_range_start, +static const struct mmu_interval_notifier_ops i915_gem_userptr_notifier_ops = { + .invalidate = i915_gem_userptr_invalidate, }; -static struct i915_mmu_notifier * -i915_mmu_notifier_create(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn; - - mn = kmalloc(sizeof(*mn), GFP_KERNEL); - if (mn == NULL) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&mn->lock); - mn->mn.ops = &i915_gem_userptr_notifier; - mn->objects = RB_ROOT_CACHED; - mn->mm = mm; - - return mn; -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ - struct i915_mmu_object *mo; - - mo = fetch_and_zero(&obj->userptr.mmu_object); - if (!mo) - return; - - spin_lock(&mo->mn->lock); - del_object(mo); - spin_unlock(&mo->mn->lock); - kfree(mo); -} - -static struct i915_mmu_notifier * -i915_mmu_notifier_find(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn, *old; - int err; - - mn = READ_ONCE(mm->mn); - if (likely(mn)) - return mn; - - mn = i915_mmu_notifier_create(mm); - if (IS_ERR(mn)) - return mn; - - err = mmu_notifier_register(&mn->mn, mm->mm); - if (err) { - kfree(mn); - return ERR_PTR(err); - } - - old = cmpxchg(&mm->mn, NULL, mn); - if (old) { - mmu_notifier_unregister(&mn->mn, mm->mm); - kfree(mn); - mn = old; - } - - return mn; -} - static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj) { - struct i915_mmu_notifier *mn; - struct i915_mmu_object *mo; - - if (flags & I915_USERPTR_UNSYNCHRONIZED) - return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; - - if (GEM_WARN_ON(!obj->userptr.mm)) - return -EINVAL; - - mn = i915_mmu_notifier_find(obj->userptr.mm); - if (IS_ERR(mn)) - return PTR_ERR(mn); - - mo = kzalloc(sizeof(*mo), GFP_KERNEL); - if (!mo) - return -ENOMEM; - - mo->mn = mn; - mo->obj = obj; - mo->it.start = obj->userptr.ptr; - mo->it.last = obj->userptr.ptr + obj->base.size - 1; - RB_CLEAR_NODE(&mo->it.rb); - - obj->userptr.mmu_object = mo; - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ - if (mn == NULL) - return; - - mmu_notifier_unregister(&mn->mn, mm); - kfree(mn); -} - -#else - -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ -} - -static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) -{ - if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) - return -ENODEV; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ -} - -#endif - -static struct i915_mm_struct * -__i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real) -{ - struct i915_mm_struct *it, *mm = NULL; - - rcu_read_lock(); - hash_for_each_possible_rcu(i915->mm_structs, - it, node, - (unsigned long)real) - if (it->mm == real && kref_get_unless_zero(&it->kref)) { - mm = it; - break; - } - rcu_read_unlock(); - - return mm; + return mmu_interval_notifier_insert(&obj->userptr.notifier, current->mm, + obj->userptr.ptr, obj->base.size, + &i915_gem_userptr_notifier_ops); } -static int -i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) +static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_mm_struct *mm, *new; - int ret = 0; - - /* During release of the GEM object we hold the struct_mutex. This - * precludes us from calling mmput() at that time as that may be - * the last reference and so call exit_mmap(). exit_mmap() will - * attempt to reap the vma, and if we were holding a GTT mmap - * would then call drm_gem_vm_close() and attempt to reacquire - * the struct mutex. So in order to avoid that recursion, we have - * to defer releasing the mm reference until after we drop the - * struct_mutex, i.e. we need to schedule a worker to do the clean - * up. - */ - mm = __i915_mm_struct_find(i915, current->mm); - if (mm) - goto out; - - new = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!new) - return -ENOMEM; + struct page **pvec = NULL; - kref_init(&new->kref); - new->i915 = to_i915(obj->base.dev); - new->mm = current->mm; - new->mn = NULL; - - spin_lock(&i915->mm_lock); - mm = __i915_mm_struct_find(i915, current->mm); - if (!mm) { - hash_add_rcu(i915->mm_structs, - &new->node, - (unsigned long)new->mm); - mmgrab(current->mm); - mm = new; + spin_lock(&i915->mm.notifier_lock); + if (!--obj->userptr.page_ref) { + pvec = obj->userptr.pvec; + obj->userptr.pvec = NULL; } - spin_unlock(&i915->mm_lock); - if (mm != new) - kfree(new); - -out: - obj->userptr.mm = mm; - return ret; -} - -static void -__i915_mm_struct_free__worker(struct work_struct *work) -{ - struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work); - - i915_mmu_notifier_free(mm->mn, mm->mm); - mmdrop(mm->mm); - kfree(mm); -} - -static void -__i915_mm_struct_free(struct kref *kref) -{ - struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); - - spin_lock(&mm->i915->mm_lock); - hash_del_rcu(&mm->node); - spin_unlock(&mm->i915->mm_lock); + GEM_BUG_ON(obj->userptr.page_ref < 0); + spin_unlock(&i915->mm.notifier_lock); - INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker); - queue_rcu_work(system_wq, &mm->work); -} - -static void -i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) -{ - if (obj->userptr.mm == NULL) - return; + if (pvec) { + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free); - obj->userptr.mm = NULL; + unpin_user_pages(pvec, num_pages); + kvfree(pvec); + } } -struct get_pages_work { - struct work_struct work; - struct drm_i915_gem_object *obj; - struct task_struct *task; -}; - -static struct sg_table * -__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, - struct page **pvec, unsigned long num_pages) +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; unsigned int sg_page_sizes; struct scatterlist *sg; + struct page **pvec; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + + spin_lock(&i915->mm.notifier_lock); + if (GEM_WARN_ON(!obj->userptr.page_ref)) { + spin_unlock(&i915->mm.notifier_lock); + ret = -EFAULT; + goto err_free; + } + + obj->userptr.page_ref++; + pvec = obj->userptr.pvec; + spin_unlock(&i915->mm.notifier_lock); alloc_table: sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, num_pages << PAGE_SHIFT, max_segment, NULL, 0, GFP_KERNEL); if (IS_ERR(sg)) { - kfree(st); - return ERR_CAST(sg); + ret = PTR_ERR(sg); + goto err; } ret = i915_gem_gtt_prepare_pages(obj, st); @@ -428,203 +170,20 @@ alloc_table: goto alloc_table; } - kfree(st); - return ERR_PTR(ret); + goto err; } sg_page_sizes = i915_sg_page_sizes(st->sgl); __i915_gem_object_set_pages(obj, st, sg_page_sizes); - return st; -} - -static void -__i915_gem_userptr_get_pages_worker(struct work_struct *_work) -{ - struct get_pages_work *work = container_of(_work, typeof(*work), work); - struct drm_i915_gem_object *obj = work->obj; - const unsigned long npages = obj->base.size >> PAGE_SHIFT; - unsigned long pinned; - struct page **pvec; - int ret; - - ret = -ENOMEM; - pinned = 0; - - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (pvec != NULL) { - struct mm_struct *mm = obj->userptr.mm->mm; - unsigned int flags = 0; - int locked = 0; - - if (!i915_gem_object_is_readonly(obj)) - flags |= FOLL_WRITE; - - ret = -EFAULT; - if (mmget_not_zero(mm)) { - while (pinned < npages) { - if (!locked) { - mmap_read_lock(mm); - locked = 1; - } - ret = pin_user_pages_remote - (mm, - obj->userptr.ptr + pinned * PAGE_SIZE, - npages - pinned, - flags, - pvec + pinned, NULL, &locked); - if (ret < 0) - break; - - pinned += ret; - } - if (locked) - mmap_read_unlock(mm); - mmput(mm); - } - } - - mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES); - if (obj->userptr.work == &work->work) { - struct sg_table *pages = ERR_PTR(ret); - - if (pinned == npages) { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, - npages); - if (!IS_ERR(pages)) { - pinned = 0; - pages = NULL; - } - } - - obj->userptr.work = ERR_CAST(pages); - if (IS_ERR(pages)) - __i915_gem_userptr_set_active(obj, false); - } - mutex_unlock(&obj->mm.lock); - - unpin_user_pages(pvec, pinned); - kvfree(pvec); - - i915_gem_object_put(obj); - put_task_struct(work->task); - kfree(work); -} - -static struct sg_table * -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) -{ - struct get_pages_work *work; - - /* Spawn a worker so that we can acquire the - * user pages without holding our mutex. Access - * to the user pages requires mmap_lock, and we have - * a strict lock ordering of mmap_lock, struct_mutex - - * we already hold struct_mutex here and so cannot - * call gup without encountering a lock inversion. - * - * Userspace will keep on repeating the operation - * (thanks to EAGAIN) until either we hit the fast - * path or the worker completes. If the worker is - * cancelled or superseded, the task is still run - * but the results ignored. (This leads to - * complications that we may have a stray object - * refcount that we need to be wary of when - * checking for existing objects during creation.) - * If the worker encounters an error, it reports - * that error back to this function through - * obj->userptr.work = ERR_PTR. - */ - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (work == NULL) - return ERR_PTR(-ENOMEM); - - obj->userptr.work = &work->work; - - work->obj = i915_gem_object_get(obj); - - work->task = current; - get_task_struct(work->task); - - INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); - queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); - - return ERR_PTR(-EAGAIN); -} - -static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) -{ - const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - struct mm_struct *mm = obj->userptr.mm->mm; - struct page **pvec; - struct sg_table *pages; - bool active; - int pinned; - unsigned int gup_flags = 0; - - /* If userspace should engineer that these pages are replaced in - * the vma between us binding this page into the GTT and completion - * of rendering... Their loss. If they change the mapping of their - * pages they need to create a new bo to point to the new vma. - * - * However, that still leaves open the possibility of the vma - * being copied upon fork. Which falls under the same userspace - * synchronisation issue as a regular bo, except that this time - * the process may not be expecting that a particular piece of - * memory is tied to the GPU. - * - * Fortunately, we can hook into the mmu_notifier in order to - * discard the page references prior to anything nasty happening - * to the vma (discard or cloning) which should prevent the more - * egregious cases from causing harm. - */ - - if (obj->userptr.work) { - /* active flag should still be held for the pending work */ - if (IS_ERR(obj->userptr.work)) - return PTR_ERR(obj->userptr.work); - else - return -EAGAIN; - } - - pvec = NULL; - pinned = 0; - - if (mm == current->mm) { - pvec = kvmalloc_array(num_pages, sizeof(struct page *), - GFP_KERNEL | - __GFP_NORETRY | - __GFP_NOWARN); - if (pvec) { - /* defer to worker if malloc fails */ - if (!i915_gem_object_is_readonly(obj)) - gup_flags |= FOLL_WRITE; - pinned = pin_user_pages_fast_only(obj->userptr.ptr, - num_pages, gup_flags, - pvec); - } - } - - active = false; - if (pinned < 0) { - pages = ERR_PTR(pinned); - pinned = 0; - } else if (pinned < num_pages) { - pages = __i915_gem_userptr_get_pages_schedule(obj); - active = pages == ERR_PTR(-EAGAIN); - } else { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); - active = !IS_ERR(pages); - } - if (active) - __i915_gem_userptr_set_active(obj, true); - - if (IS_ERR(pages)) - unpin_user_pages(pvec, pinned); - kvfree(pvec); + return 0; - return PTR_ERR_OR_ZERO(pages); +err: + i915_gem_object_userptr_drop_ref(obj); +err_free: + kfree(st); + return ret; } static void @@ -634,9 +193,6 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, struct sgt_iter sgt_iter; struct page *page; - /* Cancel any inflight work and force them to restart their gup */ - obj->userptr.work = NULL; - __i915_gem_userptr_set_active(obj, false); if (!pages) return; @@ -676,42 +232,224 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, } mark_page_accessed(page); - unpin_user_page(page); } obj->mm.dirty = false; sg_free_table(pages); kfree(pages); + + i915_gem_object_userptr_drop_ref(obj); +} + +static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool get_pages) +{ + struct sg_table *pages; + int err; + + err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (err) + return err; + + if (GEM_WARN_ON(i915_gem_object_has_pinned_pages(obj))) + return -EBUSY; + + assert_object_held(obj); + + pages = __i915_gem_object_unset_pages(obj); + if (!IS_ERR_OR_NULL(pages)) + i915_gem_userptr_put_pages(obj, pages); + + if (get_pages) + err = ____i915_gem_object_get_pages(obj); + + return err; +} + +int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; + struct page **pvec; + unsigned int gup_flags = 0; + unsigned long notifier_seq; + int pinned, ret; + + if (obj->userptr.notifier.mm != current->mm) + return -EFAULT; + + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + return ret; + + /* optimistically try to preserve current pages while unlocked */ + if (i915_gem_object_has_pages(obj) && + !mmu_interval_check_retry(&obj->userptr.notifier, + obj->userptr.notifier_seq)) { + spin_lock(&i915->mm.notifier_lock); + if (obj->userptr.pvec && + !mmu_interval_read_retry(&obj->userptr.notifier, + obj->userptr.notifier_seq)) { + obj->userptr.page_ref++; + + /* We can keep using the current binding, this is the fastpath */ + ret = 1; + } + spin_unlock(&i915->mm.notifier_lock); + } + + if (!ret) { + /* Make sure userptr is unbound for next attempt, so we don't use stale pages. */ + ret = i915_gem_object_userptr_unbind(obj, false); + } + i915_gem_object_unlock(obj); + if (ret < 0) + return ret; + + if (ret > 0) + return 0; + + notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); + + pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); + if (!pvec) + return -ENOMEM; + + if (!i915_gem_object_is_readonly(obj)) + gup_flags |= FOLL_WRITE; + + pinned = ret = 0; + while (pinned < num_pages) { + ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE, + num_pages - pinned, gup_flags, + &pvec[pinned]); + if (ret < 0) + goto out; + + pinned += ret; + } + ret = 0; + + spin_lock(&i915->mm.notifier_lock); + + if (mmu_interval_read_retry(&obj->userptr.notifier, + !obj->userptr.page_ref ? notifier_seq : + obj->userptr.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + if (!obj->userptr.page_ref++) { + obj->userptr.pvec = pvec; + obj->userptr.notifier_seq = notifier_seq; + + pvec = NULL; + } + +out_unlock: + spin_unlock(&i915->mm.notifier_lock); + +out: + if (pvec) { + unpin_user_pages(pvec, pinned); + kvfree(pvec); + } + + return ret; +} + +int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) +{ + if (mmu_interval_read_retry(&obj->userptr.notifier, + obj->userptr.notifier_seq)) { + /* We collided with the mmu notifier, need to retry */ + + return -EAGAIN; + } + + return 0; +} + +void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) +{ + i915_gem_object_userptr_drop_ref(obj); +} + +int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) +{ + int err; + + err = i915_gem_object_userptr_submit_init(obj); + if (err) + return err; + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (!err) { + /* + * Since we only check validity, not use the pages, + * it doesn't matter if we collide with the mmu notifier, + * and -EAGAIN handling is not required. + */ + err = i915_gem_object_pin_pages(obj); + if (!err) + i915_gem_object_unpin_pages(obj); + + i915_gem_object_unlock(obj); + } + + i915_gem_object_userptr_submit_fini(obj); + return err; } static void i915_gem_userptr_release(struct drm_i915_gem_object *obj) { - i915_gem_userptr_release__mmu_notifier(obj); - i915_gem_userptr_release__mm_struct(obj); + GEM_WARN_ON(obj->userptr.page_ref); + + mmu_interval_notifier_remove(&obj->userptr.notifier); + obj->userptr.notifier.mm = NULL; } static int i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) { - if (obj->userptr.mmu_object) - return 0; + drm_dbg(obj->base.dev, "Exporting userptr no longer allowed\n"); + + return -EINVAL; +} + +static int +i915_gem_userptr_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + drm_dbg(obj->base.dev, "pwrite to userptr no longer allowed\n"); + + return -EINVAL; +} - return i915_gem_userptr_init__mmu_notifier(obj, 0); +static int +i915_gem_userptr_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + drm_dbg(obj->base.dev, "pread from userptr no longer allowed\n"); + + return -EINVAL; } static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .name = "i915_gem_object_userptr", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE | + .flags = I915_GEM_OBJECT_IS_SHRINKABLE | I915_GEM_OBJECT_NO_MMAP | - I915_GEM_OBJECT_ASYNC_CANCEL, + I915_GEM_OBJECT_IS_PROXY, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, .dmabuf_export = i915_gem_userptr_dmabuf_export, + .pwrite = i915_gem_userptr_pwrite, + .pread = i915_gem_userptr_pread, .release = i915_gem_userptr_release, }; +#endif + /* * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -752,12 +490,12 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - static struct lock_class_key lock_class; + static struct lock_class_key __maybe_unused lock_class; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; - struct drm_i915_gem_object *obj; - int ret; - u32 handle; + struct drm_i915_gem_object __maybe_unused *obj; + int __maybe_unused ret; + u32 __maybe_unused handle; if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { /* We cannot support coherent userptr objects on hw without @@ -770,21 +508,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; - /* - * XXX: There is a prevalence of the assumption that we fit the - * object's page count inside a 32bit _signed_ variable. Let's document - * this and catch if we ever need to fix it. In the meantime, if you do - * spot such a local variable, please consider fixing! - * - * Aside from our own locals (for which we have no excuse!): - * - sg_table embeds unsigned int for num_pages - * - get_user_pages*() mixed ints with longs - */ - - if (args->user_size >> PAGE_SHIFT > INT_MAX) - return -E2BIG; - - if (overflows_type(args->user_size, obj->base.size)) + if (i915_gem_object_size_2big(args->user_size)) return -E2BIG; if (!args->user_size) @@ -796,6 +520,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) return -EFAULT; + if (args->flags & I915_USERPTR_UNSYNCHRONIZED) + return -ENODEV; + if (args->flags & I915_USERPTR_READ_ONLY) { /* * On almost all of the older hw, we cannot tell the GPU that @@ -805,17 +532,20 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENODEV; } +#ifdef CONFIG_MMU_NOTIFIER obj = i915_gem_object_alloc(); if (obj == NULL) return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); obj->userptr.ptr = args->user_ptr; + obj->userptr.notifier_seq = ULONG_MAX; if (args->flags & I915_USERPTR_READ_ONLY) i915_gem_object_set_readonly(obj); @@ -823,9 +553,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * at binding. This means that we need to hook into the mmu_notifier * in order to detect if the mmu is destroyed. */ - ret = i915_gem_userptr_init__mm_struct(obj); - if (ret == 0) - ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); + ret = i915_gem_userptr_init__mmu_notifier(obj); if (ret == 0) ret = drm_gem_handle_create(file, &obj->base, &handle); @@ -836,24 +564,20 @@ i915_gem_userptr_ioctl(struct drm_device *dev, args->handle = handle; return 0; +#else + return -ENODEV; +#endif } int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { - spin_lock_init(&dev_priv->mm_lock); - hash_init(dev_priv->mm_structs); - - dev_priv->mm.userptr_wq = - alloc_workqueue("i915-userptr-acquire", - WQ_HIGHPRI | WQ_UNBOUND, - 0); - if (!dev_priv->mm.userptr_wq) - return -ENOMEM; +#ifdef CONFIG_MMU_NOTIFIER + spin_lock_init(&dev_priv->mm.notifier_lock); +#endif return 0; } void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) { - destroy_workqueue(dev_priv->mm.userptr_wq); } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 2fb501a78a85..0c8ecfdf5405 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -89,7 +89,6 @@ static void huge_put_pages(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops huge_ops = { .name = "huge-gem", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, .get_pages = huge_get_pages, .put_pages = huge_put_pages, }; @@ -115,7 +114,8 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class); + i915_gem_object_init(obj, &huge_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index aacf4856ccb4..dadd485bc52f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -140,8 +140,7 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops huge_page_ops = { .name = "huge-gem", - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = get_huge_pages, .put_pages = put_huge_pages, }; @@ -168,7 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class); + i915_gem_object_init(obj, &huge_page_ops, &lock_class, + I915_BO_ALLOC_STRUCT_PAGE); i915_gem_object_set_volatile(obj); @@ -319,9 +319,9 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) drm_gem_private_object_init(&i915->drm, &obj->base, size); if (single) - i915_gem_object_init(obj, &fake_ops_single, &lock_class); + i915_gem_object_init(obj, &fake_ops_single, &lock_class, 0); else - i915_gem_object_init(obj, &fake_ops, &lock_class); + i915_gem_object_init(obj, &fake_ops, &lock_class, 0); i915_gem_object_set_volatile(obj); @@ -589,7 +589,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) goto out_put; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_put; @@ -653,15 +653,19 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) break; } + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); } return 0; out_unpin: + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); + i915_gem_object_unlock(obj); out_put: i915_gem_object_put(obj); @@ -675,8 +679,10 @@ static void close_object_list(struct list_head *objects, list_for_each_entry_safe(obj, on, objects, st_link) { list_del(&obj->st_link); + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); } } @@ -713,7 +719,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg) break; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); break; @@ -889,7 +895,7 @@ static int igt_mock_ppgtt_64K(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_object_put; @@ -943,8 +949,10 @@ static int igt_mock_ppgtt_64K(void *arg) } i915_vma_unpin(vma); + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); } } @@ -954,7 +962,9 @@ static int igt_mock_ppgtt_64K(void *arg) out_vma_unpin: i915_vma_unpin(vma); out_object_unpin: + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); + i915_gem_object_unlock(obj); out_object_put: i915_gem_object_put(obj); @@ -1024,7 +1034,7 @@ static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) if (err) return err; - ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(ptr)) return PTR_ERR(ptr); @@ -1304,7 +1314,7 @@ try_again: return err; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { if (err == -ENXIO || err == -E2BIG) { i915_gem_object_put(obj); @@ -1327,8 +1337,10 @@ try_again: __func__, size, i); } out_unpin: + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); out_put: i915_gem_object_put(obj); @@ -1402,7 +1414,7 @@ static int igt_ppgtt_sanity_check(void *arg) return err; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); goto out; @@ -1416,8 +1428,10 @@ static int igt_ppgtt_sanity_check(void *arg) err = igt_write_huge(ctx, obj); + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); i915_gem_object_put(obj); if (err) { @@ -1462,7 +1476,7 @@ static int igt_tmpfs_fallback(void *arg) goto out_restore; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto out_put; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 6a674a7994df..d36873885cc1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -45,7 +45,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine) goto err_flush; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put; @@ -157,7 +157,7 @@ static int prepare_blit(const struct tiled_blits *t, u32 src_pitch, dst_pitch; u32 cmd, *cs; - cs = i915_gem_object_pin_map(batch, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -377,7 +377,7 @@ static int verify_buffer(const struct tiled_blits *t, y = i915_prandom_u32_max_state(t->height, prng); p = y * t->width + x; - vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(buf->vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -564,7 +564,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, int err; int i; - map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(t->scratch.vma->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 1117d2a44518..e937b6629019 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -160,7 +160,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) if (err) return err; - map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); @@ -183,7 +183,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v) if (err) return err; - map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC); if (IS_ERR(map)) return PTR_ERR(map); @@ -200,17 +200,15 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v) u32 *cs; int err; + vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + i915_gem_object_lock(ctx->obj, NULL); err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); if (err) goto out_unlock; - vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_unlock; - } - rq = intel_engine_create_kernel_request(ctx->engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d3f87dc4eda3..5fef592390cb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1094,7 +1094,7 @@ __read_slice_count(struct intel_context *ce, if (ret < 0) return ret; - buf = i915_gem_object_pin_map(obj, I915_MAP_WB); + buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(buf)) { ret = PTR_ERR(buf); return ret; @@ -1511,7 +1511,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (IS_ERR(obj)) return PTR_ERR(obj); - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1622,7 +1622,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1658,7 +1658,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out; @@ -1715,7 +1715,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto out_vm; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto out_vm; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index b6d43880b0c1..dd74bc09ec88 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -194,7 +194,7 @@ static int igt_dmabuf_import_ownership(void *arg) dma_buf_put(dmabuf); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index e1d50a5a1477..4df505e4c53a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -116,7 +116,7 @@ static int igt_gpu_reloc(void *arg) if (IS_ERR(scratch)) return PTR_ERR(scratch); - map = i915_gem_object_pin_map(scratch, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(scratch, I915_MAP_WC); if (IS_ERR(map)) { err = PTR_ERR(map); goto err_scratch; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index d429c7643ff2..5cf6df49c333 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -322,7 +322,7 @@ static int igt_partial_tiling(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("Failed to allocate %u pages (%lu total), err=%d\n", nreal, obj->base.size / PAGE_SIZE, err); @@ -459,7 +459,7 @@ static int igt_smoke_tiling(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("Failed to allocate %u pages (%lu total), err=%d\n", nreal, obj->base.size / PAGE_SIZE, err); @@ -798,7 +798,7 @@ static int wc_set(struct drm_i915_gem_object *obj) { void *vaddr; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -814,7 +814,7 @@ static int wc_check(struct drm_i915_gem_object *obj) void *vaddr; int err = 0; - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -835,9 +835,8 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) return false; if (type != I915_MMAP_TYPE_GTT && - !i915_gem_object_type_has(obj, - I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) return false; return true; @@ -977,10 +976,8 @@ static const char *repr_mmap_type(enum i915_mmap_type type) static bool can_access(const struct drm_i915_gem_object *obj) { - unsigned int flags = - I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; - - return i915_gem_object_type_has(obj, flags); + return i915_gem_object_has_struct_page(obj) || + i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); } static int __igt_mmap_access(struct drm_i915_private *i915, @@ -1319,7 +1316,9 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, } if (type != I915_MMAP_TYPE_GTT) { + i915_gem_object_lock(obj, NULL); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); if (i915_gem_object_has_pages(obj)) { pr_err("Failed to put-pages object!\n"); err = -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c index bf853c40ec65..740ee8086a27 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -47,7 +47,7 @@ static int igt_gem_huge(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { pr_err("Failed to allocate %u pages (%lu total), err=%d\n", nreal, obj->base.size / PAGE_SIZE, err); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 23b6e11bbc3e..8c335d1a8406 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -220,7 +220,7 @@ static int igt_fill_blt_thread(void *arg) return PTR_ERR(ctx); prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->sched.priority = prio; } ce = i915_gem_context_get_engine(ctx, 0); @@ -262,7 +262,7 @@ static int igt_fill_blt_thread(void *arg) goto err_flush; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put; @@ -338,7 +338,7 @@ static int igt_copy_blt_thread(void *arg) return PTR_ERR(ctx); prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->sched.priority = prio; } ce = i915_gem_context_get_engine(ctx, 0); @@ -380,7 +380,7 @@ static int igt_copy_blt_thread(void *arg) goto err_flush; } - vaddr = i915_gem_object_pin_map(src, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put_src; @@ -400,7 +400,7 @@ static int igt_copy_blt_thread(void *arg) goto err_put_src; } - vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_put_dst; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 8cee68c6a6dc..3a6ce87f8b52 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,13 +25,21 @@ static int mock_phys_object(void *arg) goto out; } + if (!i915_gem_object_has_struct_page(obj)) { + err = -EINVAL; + pr_err("shmem has no struct page\n"); + goto out_obj; + } + + i915_gem_object_lock(obj, NULL); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); + i915_gem_object_unlock(obj); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; } - if (obj->ops != &i915_gem_phys_ops) { + if (i915_gem_object_has_struct_page(obj)) { pr_err("i915_gem_object_attach_phys did not create a phys object\n"); err = -EINVAL; goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index d6783061bc72..0b092c62bb34 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -55,7 +55,7 @@ igt_emit_store_dw(struct i915_vma *vma, if (IS_ERR(obj)) return ERR_CAST(obj); - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto err; diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c index 3a21cf63b3f0..591eb60785db 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: MIT - /* * Copyright © 2019 Intel Corporation */ @@ -37,6 +36,7 @@ void intel_gt_debugfs_register_files(struct dentry *root, { while (count--) { umode_t mode = files->fops->write ? 0644 : 0444; + if (!files->eval || files->eval(data)) debugfs_create_file(files->name, mode, root, data, diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index b491a64919c8..9646200d2792 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -143,7 +143,7 @@ static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs, int flush, int post) { GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c index ce38d1bcaba3..b388ceeeb1c9 100644 --- a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c @@ -161,7 +161,7 @@ u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + *cs++ = i915_request_active_seqno(rq) | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; @@ -359,7 +359,7 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset; + *cs++ = i915_request_active_seqno(rq); *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -374,7 +374,7 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) { GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -394,7 +394,7 @@ u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) int i; GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h index 3357228f3304..6a61a5c3a85a 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h @@ -59,9 +59,9 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) for (iter = gen6_pde_index(start); \ length > 0 && iter < I915_PDES && \ (pt = i915_pt_entry(pd, iter), true); \ - ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \ + ({ u32 temp = ALIGN(start + 1, 1 << GEN6_PDE_SHIFT); \ temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) + start += temp; length -= temp; }), ++iter) #define gen6_for_all_pdes(pt, pd, iter) \ for (iter = 0; \ diff --git a/drivers/gpu/drm/i915/gt/gen6_renderstate.c b/drivers/gpu/drm/i915/gt/gen6_renderstate.c index 11c8e7b3dd7c..555e83f3a93a 100644 --- a/drivers/gpu/drm/i915/gt/gen6_renderstate.c +++ b/drivers/gpu/drm/i915/gt/gen6_renderstate.c @@ -1,25 +1,7 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2014 Intel Corporation * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Generated by: intel-gpu-tools-1.8-220-g01153e7 */ diff --git a/drivers/gpu/drm/i915/gt/gen7_renderstate.c b/drivers/gpu/drm/i915/gt/gen7_renderstate.c index 655180646152..c36e84d30d24 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderstate.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderstate.c @@ -1,25 +1,7 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2014 Intel Corporation * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Generated by: intel-gpu-tools-1.8-220-g01153e7 */ diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 0bb340cacb13..732c2ed1d933 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -338,15 +338,14 @@ static u32 preempt_address(struct intel_engine_cs *engine) static u32 hwsp_offset(const struct i915_request *rq) { - const struct intel_timeline_cacheline *cl; + const struct intel_timeline *tl; - /* Before the request is executed, the timeline/cachline is fixed */ + /* Before the request is executed, the timeline is fixed */ + tl = rcu_dereference_protected(rq->timeline, + !i915_request_signaled(rq)); - cl = rcu_dereference_protected(rq->hwsp_cacheline, 1); - if (cl) - return cl->ggtt_offset; - - return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset; + /* See the comment in i915_request_active_seqno(). */ + return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno); } int gen8_emit_init_breadcrumb(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 755522ced60d..176c19633412 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -5,6 +5,8 @@ #include <linux/log2.h> +#include "gem/i915_gem_lmem.h" + #include "gen8_ppgtt.h" #include "i915_scatterlist.h" #include "i915_trace.h" @@ -35,6 +37,9 @@ static u64 gen8_pte_encode(dma_addr_t addr, if (unlikely(flags & PTE_READ_ONLY)) pte &= ~_PAGE_RW; + if (flags & PTE_LM) + pte |= GEN12_PPGTT_PTE_LM; + switch (level) { case I915_CACHE_NONE: pte |= PPAT_UNCACHED; @@ -145,6 +150,7 @@ static unsigned int gen8_pt_count(u64 start, u64 end) static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) { unsigned int shift = __gen8_pte_shift(vm->top); + return (vm->total + (1ull << shift) - 1) >> shift; } @@ -557,6 +563,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, static int gen8_init_scratch(struct i915_address_space *vm) { + u32 pte_flags; int ret; int i; @@ -580,9 +587,13 @@ static int gen8_init_scratch(struct i915_address_space *vm) if (ret) return ret; + pte_flags = vm->has_read_only; + if (i915_gem_object_is_lmem(vm->scratch[0])) + pte_flags |= PTE_LM; + vm->scratch[0]->encode = gen8_pte_encode(px_dma(vm->scratch[0]), - I915_CACHE_LLC, vm->has_read_only); + I915_CACHE_LLC, pte_flags); for (i = 1; i <= vm->top; i++) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/gt/gen8_renderstate.c b/drivers/gpu/drm/i915/gt/gen8_renderstate.c index 95288a34c15d..ef9d7b0dd2da 100644 --- a/drivers/gpu/drm/i915/gt/gen8_renderstate.c +++ b/drivers/gpu/drm/i915/gt/gen8_renderstate.c @@ -1,25 +1,7 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2014 Intel Corporation * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Generated by: intel-gpu-tools-1.8-220-g01153e7 */ diff --git a/drivers/gpu/drm/i915/gt/gen9_renderstate.c b/drivers/gpu/drm/i915/gt/gen9_renderstate.c index 7d3ac02f0177..428b724ded3e 100644 --- a/drivers/gpu/drm/i915/gt/gen9_renderstate.c +++ b/drivers/gpu/drm/i915/gt/gen9_renderstate.c @@ -1,25 +1,7 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2014 Intel Corporation * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Generated by: intel-gpu-tools-1.19-177-g68e2eab2 */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 34a645d6babd..38cc42783dfb 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -1,25 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * + * Copyright © 2015-2021 Intel Corporation */ #include <linux/kthread.h> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 349e7fa1488d..17cf2640b082 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index d24ab6fa0ee5..f83a73a2b39f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h index f053d8633fe2..3ecacc675f41 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_param.h +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -6,9 +6,18 @@ #ifndef INTEL_CONTEXT_PARAM_H #define INTEL_CONTEXT_PARAM_H -struct intel_context; +#include <linux/types.h> + +#include "intel_context.h" int intel_context_set_ring_size(struct intel_context *ce, long sz); long intel_context_get_ring_size(struct intel_context *ce); +static inline int +intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us) +{ + ce->watchdog.timeout_us = timeout_us; + return 0; +} + #endif /* INTEL_CONTEXT_PARAM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e10d78601bbd..ed8c447a7346 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ @@ -97,6 +96,10 @@ struct intel_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 + struct { + u64 timeout_us; + } watchdog; + u32 *lrc_reg_state; union { struct { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 376e82e17061..efe935f80c1a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1,25 +1,6 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * */ #include <drm/drm_print.h> @@ -619,6 +600,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine) } static int pin_ggtt_status_page(struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, struct i915_vma *vma) { unsigned int flags; @@ -639,12 +621,13 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, else flags = PIN_HIGH; - return i915_ggtt_pin(vma, NULL, 0, flags); + return i915_ggtt_pin(vma, ww, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; struct i915_vma *vma; void *vaddr; int ret; @@ -670,30 +653,39 @@ static int init_status_page(struct intel_engine_cs *engine) vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err; + goto err_put; } + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(obj, &ww); + if (!ret && !HWS_NEEDS_PHYSICAL(engine->i915)) + ret = pin_ggtt_status_page(engine, &ww, vma); + if (ret) + goto err; + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto err; + goto err_unpin; } engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); engine->status_page.vma = vma; - if (!HWS_NEEDS_PHYSICAL(engine->i915)) { - ret = pin_ggtt_status_page(engine, vma); - if (ret) - goto err_unpin; - } - - return 0; - err_unpin: - i915_gem_object_unpin_map(obj); + if (ret) + i915_vma_unpin(vma); err: - i915_gem_object_put(obj); + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); +err_put: + if (ret) + i915_gem_object_put(obj); return ret; } @@ -713,9 +705,12 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } + err = intel_engine_init_cmd_parser(engine); + if (err) + goto err_cmd_parser; + intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_execlists(engine); - intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); @@ -732,6 +727,8 @@ static int engine_setup_common(struct intel_engine_cs *engine) return 0; +err_cmd_parser: + intel_breadcrumbs_free(engine->breadcrumbs); err_status: cleanup_status_page(engine); return err; @@ -758,6 +755,7 @@ static int measure_breadcrumb_dw(struct intel_context *ce) frame->rq.engine = engine; frame->rq.context = ce; rcu_assign_pointer(frame->rq.timeline, ce->timeline); + frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno; frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); @@ -1234,14 +1232,14 @@ void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) { struct tasklet_struct *t = &engine->execlists.tasklet; - if (!t->func) + if (!t->callback) return; local_bh_disable(); if (tasklet_trylock(t)) { /* Must wait for any GPU reset in progress. */ if (__tasklet_is_enabled(t)) - t->func(t->data); + t->callback(t); tasklet_unlock(t); } local_bh_enable(); @@ -1268,14 +1266,8 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Waiting to drain ELSP? */ - if (execlists_active(&engine->execlists)) { - synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq); - - intel_engine_flush_submission(engine); - - if (execlists_active(&engine->execlists)) - return false; - } + synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq); + intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index d7be2b9339f9..b99ac41695f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ @@ -32,7 +31,7 @@ static bool next_heartbeat(struct intel_engine_cs *engine) delay = msecs_to_jiffies_timeout(delay); if (delay >= HZ) delay = round_jiffies_up_relative(delay); - mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay); + mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay + 1); return true; } @@ -81,9 +80,7 @@ static void show_heartbeat(const struct i915_request *rq, static void heartbeat(struct work_struct *wrk) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN }; struct intel_engine_cs *engine = container_of(wrk, typeof(*engine), heartbeat.work.work); struct intel_context *ce = engine->kernel_context; @@ -106,6 +103,13 @@ static void heartbeat(struct work_struct *wrk) goto out; if (engine->heartbeat.systole) { + long delay = READ_ONCE(engine->props.heartbeat_interval_ms); + + /* Safeguard against too-fast worker invocations */ + if (!time_after(jiffies, + rq->emitted_jiffies + msecs_to_jiffies(delay))) + goto out; + if (!i915_sw_fence_signaled(&rq->submit)) { /* * Not yet submitted, system is stalled. @@ -125,9 +129,9 @@ static void heartbeat(struct work_struct *wrk) * low latency and no jitter] the chance to naturally * complete before being preempted. */ - attr.priority = I915_PRIORITY_MASK; + attr.priority = 0; if (rq->sched.attr.priority >= attr.priority) - attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); + attr.priority = I915_PRIORITY_HEARTBEAT; if (rq->sched.attr.priority >= attr.priority) attr.priority = I915_PRIORITY_BARRIER; @@ -143,6 +147,8 @@ static void heartbeat(struct work_struct *wrk) "stopped heartbeat on %s", engine->name); } + + rq->emitted_jiffies = jiffies; goto out; } @@ -279,15 +285,14 @@ int intel_engine_pulse(struct intel_engine_cs *engine) mutex_unlock(&ce->timeline->mutex); } + intel_engine_flush_submission(engine); intel_engine_pm_put(engine); return err; } int intel_engine_flush_barriers(struct intel_engine_cs *engine) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN }; struct intel_context *ce = engine->kernel_context; struct i915_request *rq; int err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h index a7b8c0f9e005..a488ea3e84a3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index e67d09259dd0..7c9af86fdb1e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ @@ -27,12 +26,16 @@ static void dbg_poison_ce(struct intel_context *ce) int type = i915_coherent_map_type(ce->engine->i915); void *map; + if (!i915_gem_object_trylock(obj)) + return; + map = i915_gem_object_pin_map(obj, type); if (!IS_ERR(map)) { memset(map, CONTEXT_REDZONE, obj->base.size); i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); } + i915_gem_object_unlock(obj); } } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 418df0a13145..70ea46d6cfb0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index d2346b425547..883bafc44902 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 34e6096f196e..1cbd84eb24e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h index f845ea1cbfaa..3dc7e8ab9fbc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index ac1be7a632d3..de124870af44 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -274,22 +274,13 @@ static int effective_prio(const struct i915_request *rq) static int queue_prio(const struct intel_engine_execlists *execlists) { - struct i915_priolist *p; struct rb_node *rb; rb = rb_first_cached(&execlists->queue); if (!rb) return INT_MIN; - /* - * As the priolist[] are inverted, with the highest priority in [0], - * we have to flip the index value to become priority. - */ - p = to_priolist(rb); - if (!I915_USER_PRIORITY_SHIFT) - return p->priority; - - return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); + return to_priolist(rb)->priority; } static int virtual_prio(const struct intel_engine_execlists *el) @@ -470,6 +461,11 @@ static void reset_active(struct i915_request *rq, ce->lrc.lrca = lrc_update_regs(ce, engine, head); } +static bool bad_request(const struct i915_request *rq) +{ + return rq->fence.error && i915_request_started(rq); +} + static struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -482,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq) !intel_engine_has_heartbeat(engine))) intel_context_set_banned(ce); - if (unlikely(intel_context_is_banned(ce))) + if (unlikely(intel_context_is_banned(ce) || bad_request(rq))) reset_active(rq, engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) @@ -752,9 +748,8 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); - struct i915_request * const *port, *rq; + struct i915_request * const *port, *rq, *prev = NULL; struct intel_context *ce = NULL; - bool sentinel = false; u32 ccid = -1; trace_ports(execlists, msg, execlists->pending); @@ -804,15 +799,20 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, * Sentinels are supposed to be the last request so they flush * the current execution off the HW. Check that they are the only * request in the pending submission. + * + * NB: Due to the async nature of preempt-to-busy and request + * cancellation we need to handle the case where request + * becomes a sentinel in parallel to CSB processing. */ - if (sentinel) { + if (prev && i915_request_has_sentinel(prev) && + !READ_ONCE(prev->fence.error)) { GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", engine->name, ce->timeline->fence_context, port - execlists->pending); return false; } - sentinel = i915_request_has_sentinel(rq); + prev = rq; /* * We want virtual requests to only be in the first slot so @@ -948,7 +948,7 @@ static bool can_merge_rq(const struct i915_request *prev, if (__i915_request_is_complete(next)) return true; - if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & + if (unlikely((i915_request_flags(prev) | i915_request_flags(next)) & (BIT(I915_FENCE_FLAG_NOPREEMPT) | BIT(I915_FENCE_FLAG_SENTINEL)))) return false; @@ -1072,7 +1072,6 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) __i915_request_has_started(w) && !__i915_request_is_complete(rq)); - GEM_BUG_ON(i915_request_is_active(w)); if (!i915_request_is_ready(w)) continue; @@ -1080,6 +1079,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) continue; GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); + GEM_BUG_ON(i915_request_is_active(w)); list_move_tail(&w->sched.link, &list); } @@ -1208,7 +1208,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, return 0; /* Force a fast reset for terminated contexts (ignoring sysfs!) */ - if (unlikely(intel_context_is_banned(rq->context))) + if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))) return 1; return READ_ONCE(engine->props.preempt_timeout_ms); @@ -1452,9 +1452,8 @@ unlock: while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - int i; - priolist_for_each_request_consume(rq, rn, p, i) { + priolist_for_each_request_consume(rq, rn, p) { bool merge = true; /* @@ -2344,9 +2343,10 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine) * Check the unread Context Status Buffers and manage the submission of new * contexts to the ELSP accordingly. */ -static void execlists_submission_tasklet(unsigned long data) +static void execlists_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct intel_engine_cs * const engine = + from_tasklet(engine, t, execlists.tasklet); struct i915_request *post[2 * EXECLIST_MAX_PORTS]; struct i915_request **inactive; @@ -2457,11 +2457,31 @@ static void execlists_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->active.lock, flags); } +static int +__execlists_context_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, void **vaddr) +{ + int err; + + err = lrc_pre_pin(ce, engine, ww, vaddr); + if (err) + return err; + + if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { + lrc_init_state(ce, engine, *vaddr); + + __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); + } + + return 0; +} + static int execlists_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr) { - return lrc_pre_pin(ce, ce->engine, ww, vaddr); + return __execlists_context_pre_pin(ce, ce->engine, ww, vaddr); } static int execlists_context_pin(struct intel_context *ce, void *vaddr) @@ -2729,31 +2749,11 @@ static void enable_execlists(struct intel_engine_cs *engine) enable_error_interrupt(engine); } -static bool unexpected_starting_state(struct intel_engine_cs *engine) -{ - bool unexpected = false; - - if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { - drm_dbg(&engine->i915->drm, - "STOP_RING still set in RING_MI_MODE\n"); - unexpected = true; - } - - return unexpected; -} - static int execlists_resume(struct intel_engine_cs *engine) { intel_mocs_init_engine(engine); - intel_breadcrumbs_reset(engine->breadcrumbs); - if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { - struct drm_printer p = drm_debug_printer(__func__); - - intel_engine_dump(engine, &p, NULL); - } - enable_execlists(engine); return 0; @@ -2924,9 +2924,10 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) rcu_read_unlock(); } -static void nop_submission_tasklet(unsigned long data) +static void nop_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct intel_engine_cs * const engine = + from_tasklet(engine, t, execlists.tasklet); /* The driver is wedged; don't process any more events. */ WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); @@ -2962,17 +2963,18 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) - i915_request_mark_eio(rq); + i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); - int i; - priolist_for_each_request_consume(rq, rn, p, i) { - i915_request_mark_eio(rq); - __i915_request_submit(rq); + priolist_for_each_request_consume(rq, rn, p) { + if (i915_request_mark_eio(rq)) { + __i915_request_submit(rq); + i915_request_put(rq); + } } rb_erase_cached(&p->node, &execlists->queue); @@ -2981,7 +2983,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* On-hold requests will be flushed to timeline upon their release */ list_for_each_entry(rq, &engine->active.hold, sched.link) - i915_request_mark_eio(rq); + i915_request_put(i915_request_mark_eio(rq)); /* Cancel all attached virtual engines */ while ((rb = rb_first_cached(&execlists->virtual))) { @@ -2994,10 +2996,11 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) spin_lock(&ve->base.active.lock); rq = fetch_and_zero(&ve->request); if (rq) { - i915_request_mark_eio(rq); - - rq->engine = engine; - __i915_request_submit(rq); + if (i915_request_mark_eio(rq)) { + rq->engine = engine; + __i915_request_submit(rq); + i915_request_put(rq); + } i915_request_put(rq); ve->base.execlists.queue_priority_hint = INT_MIN; @@ -3011,7 +3014,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); - execlists->tasklet.func = nop_submission_tasklet; + execlists->tasklet.callback = nop_submission_tasklet; spin_unlock_irqrestore(&engine->active.lock, flags); rcu_read_unlock(); @@ -3072,7 +3075,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; engine->schedule = i915_schedule; - engine->execlists.tasklet.func = execlists_submission_tasklet; + engine->execlists.tasklet.callback = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; engine->reset.rewind = execlists_reset_rewind; @@ -3119,7 +3122,7 @@ static void execlists_release(struct intel_engine_cs *engine) static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { - /* Default vfuncs which can be overriden by each engine. */ + /* Default vfuncs which can be overridden by each engine. */ engine->resume = execlists_resume; @@ -3195,8 +3198,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) struct intel_uncore *uncore = engine->uncore; u32 base = engine->mmio_base; - tasklet_init(&engine->execlists.tasklet, - execlists_submission_tasklet, (unsigned long)engine); + tasklet_setup(&engine->execlists.tasklet, execlists_submission_tasklet); timer_setup(&engine->execlists.timer, execlists_timeslice, 0); timer_setup(&engine->execlists.preempt, execlists_preempt, 0); @@ -3244,7 +3246,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) static struct list_head *virtual_queue(struct virtual_engine *ve) { - return &ve->base.execlists.default_priolist.requests[0]; + return &ve->base.execlists.default_priolist.requests; } static void rcu_virtual_context_destroy(struct work_struct *wrk) @@ -3360,13 +3362,13 @@ static int virtual_context_alloc(struct intel_context *ce) } static int virtual_context_pre_pin(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - void **vaddr) + struct i915_gem_ww_ctx *ww, + void **vaddr) { struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - /* Note: we must use a real engine class for setting up reg state */ - return lrc_pre_pin(ce, ve->siblings[0], ww, vaddr); + /* Note: we must use a real engine class for setting up reg state */ + return __execlists_context_pre_pin(ce, ve->siblings[0], ww, vaddr); } static int virtual_context_pin(struct intel_context *ce, void *vaddr) @@ -3438,9 +3440,10 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) return mask; } -static void virtual_submission_tasklet(unsigned long data) +static void virtual_submission_tasklet(struct tasklet_struct *t) { - struct virtual_engine * const ve = (struct virtual_engine *)data; + struct virtual_engine * const ve = + from_tasklet(ve, t, base.execlists.tasklet); const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -3650,9 +3653,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, INIT_LIST_HEAD(virtual_queue(ve)); ve->base.execlists.queue_priority_hint = INT_MIN; - tasklet_init(&ve->base.execlists.tasklet, - virtual_submission_tasklet, - (unsigned long)ve); + tasklet_setup(&ve->base.execlists.tasklet, virtual_submission_tasklet); intel_context_init(&ve->context, &ve->base); @@ -3680,7 +3681,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, * layering if we handle cloning of the requests and * submitting a copy into each backend. */ - if (sibling->execlists.tasklet.func != + if (sibling->execlists.tasklet.callback != execlists_submission_tasklet) { err = -ENODEV; goto err_put; @@ -3840,9 +3841,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, count = 0; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - int i; - priolist_for_each_request(rq, p, i) { + priolist_for_each_request(rq, p) { if (count++ < max - 1) show_request(m, rq, "\t\t", 0); else diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index a8fd7adefd82..fd61dae820e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -6,6 +6,7 @@ #ifndef __INTEL_EXECLISTS_SUBMISSION_H__ #define __INTEL_EXECLISTS_SUBMISSION_H__ +#include <linux/llist.h> #include <linux/types.h> struct drm_printer; @@ -13,6 +14,7 @@ struct drm_printer; struct i915_request; struct intel_context; struct intel_engine_cs; +struct intel_gt; enum { INTEL_CONTEXT_SCHEDULE_IN = 0, diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 8a4fa7aef71d..670c1271e7d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -10,6 +10,8 @@ #include <drm/i915_drm.h> +#include "gem/i915_gem_lmem.h" + #include "intel_gt.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -92,7 +94,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915) } /* - * Certain Gen5 chipsets require require idling the GPU before + * Certain Gen5 chipsets require idling the GPU before * unmapping anything from the GTT when VT-d is enabled. */ static bool needs_idle_maps(struct drm_i915_private *i915) @@ -189,7 +191,12 @@ static u64 gen8_ggtt_pte_encode(dma_addr_t addr, enum i915_cache_level level, u32 flags) { - return addr | _PAGE_PRESENT; + gen8_pte_t pte = addr | _PAGE_PRESENT; + + if (flags & PTE_LM) + pte |= GEN12_GGTT_PTE_LM; + + return pte; } static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) @@ -201,13 +208,13 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, enum i915_cache_level level, - u32 unused) + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; - gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0)); + gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags)); ggtt->invalidate(ggtt); } @@ -217,7 +224,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { - const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); + const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen8_pte_t __iomem *gte; gen8_pte_t __iomem *end; @@ -459,6 +466,8 @@ static void ggtt_bind_vma(struct i915_address_space *vm, pte_flags = 0; if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; + if (i915_gem_object_is_lmem(obj)) + pte_flags |= PTE_LM; vm->insert_entries(vm, vma, cache_level, pte_flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -647,7 +656,9 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) if (err) goto err_ppgtt; + i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); + i915_gem_object_unlock(ppgtt->vm.scratch[0]); if (err) goto err_stash; @@ -734,6 +745,7 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) mutex_unlock(&ggtt->vm.mutex); i915_address_space_fini(&ggtt->vm); + dma_resv_fini(&ggtt->vm.resv); arch_phys_wc_del(ggtt->mtrr); @@ -794,6 +806,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) struct drm_i915_private *i915 = ggtt->vm.i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); phys_addr_t phys_addr; + u32 pte_flags; int ret; /* For Modern GENs the PTEs and register space are split in the BAR */ @@ -823,9 +836,13 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return ret; } + pte_flags = 0; + if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) + pte_flags |= PTE_LM; + ggtt->vm.scratch[0]->encode = ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), - I915_CACHE_NONE, 0); + I915_CACHE_NONE, pte_flags); return 0; } @@ -1115,6 +1132,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) ggtt->vm.gt = gt; ggtt->vm.i915 = i915; ggtt->vm.dma = i915->drm.dev; + dma_resv_init(&ggtt->vm.resv); if (INTEL_GEN(i915) <= 5) ret = i915_gmch_probe(ggtt); @@ -1122,8 +1140,10 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) ret = gen6_gmch_probe(ggtt); else ret = gen8_gmch_probe(ggtt); - if (ret) + if (ret) { + dma_resv_fini(&ggtt->vm.resv); return ret; + } if ((ggtt->vm.total - 1) >> 32) { drm_err(&i915->drm, @@ -1458,7 +1478,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset, true); GEM_BUG_ON(!iter); sg = st->sgl; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 67de2b189598..e72b7a0dc316 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -1,24 +1,6 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. */ #include "i915_drv.h" @@ -609,6 +591,7 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) } } else { u32 dimm_c0, dimm_c1; + dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0); dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1); dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; @@ -798,10 +781,12 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, i = 0; for_each_sgt_page(page, sgt_iter, pages) { char new_bit_17 = page_to_phys(page) >> 17; + if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { swizzle_page(page); set_page_dirty(page); } + i++; } } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h index 9eef679e1311..25340be5ecf0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.h @@ -1,25 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * */ #ifndef __INTEL_GGTT_FENCING_H__ diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 534e435f20bc..14e2ffb6c0e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT*/ /* - * SPDX-License-Identifier: MIT - * * Copyright � 2003-2018 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d8e1ab412634..8d77dcbad059 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -4,6 +4,8 @@ */ #include "debugfs_gt.h" + +#include "gem/i915_gem_lmem.h" #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" @@ -29,6 +31,9 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); + init_llist_head(>->watchdog.list); + INIT_WORK(>->watchdog.work, intel_gt_watchdog_work); + intel_gt_init_buffer_pool(gt); intel_gt_init_reset(gt); intel_gt_init_requests(gt); @@ -39,6 +44,42 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_uc_init_early(>->uc); } +int intel_gt_probe_lmem(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_memory_region *mem; + int id; + int err; + + mem = intel_gt_setup_lmem(gt); + if (mem == ERR_PTR(-ENODEV)) + mem = intel_gt_setup_fake_lmem(gt); + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + if (err == -ENODEV) + return 0; + + drm_err(&i915->drm, + "Failed to setup region(%d) type=%d\n", + err, INTEL_MEMORY_LOCAL); + return err; + } + + id = INTEL_REGION_LMEM; + + mem->id = id; + mem->type = INTEL_MEMORY_LOCAL; + mem->instance = 0; + + intel_memory_region_set_name(mem, "local%u", mem->instance); + + GEM_BUG_ON(!HAS_REGION(i915, id)); + GEM_BUG_ON(i915->mm.regions[id]); + i915->mm.regions[id] = mem; + + return 0; +} + void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) { gt->ggtt = ggtt; @@ -344,11 +385,13 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) struct i915_vma *vma; int ret; - obj = i915_gem_object_create_stolen(i915, size); + obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE); + if (IS_ERR(obj)) + obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); + drm_err(&i915->drm, "Failed to allocate scratch page\n"); return PTR_ERR(obj); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 9157c7411f60..7ec395cace69 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -36,6 +36,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt); +int intel_gt_probe_lmem(struct intel_gt *gt); int intel_gt_init_mmio(struct intel_gt *gt); int __must_check intel_gt_init_hw(struct intel_gt *gt); int intel_gt_init(struct intel_gt *gt); @@ -77,4 +78,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); +void intel_gt_watchdog_work(struct work_struct *work); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 06d84cf09570..c59468107598 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -98,28 +98,6 @@ static void pool_free_work(struct work_struct *wrk) round_jiffies_up_relative(HZ)); } -static int pool_active(struct i915_active *ref) -{ - struct intel_gt_buffer_pool_node *node = - container_of(ref, typeof(*node), active); - struct dma_resv *resv = node->obj->base.resv; - int err; - - if (dma_resv_trylock(resv)) { - dma_resv_add_excl_fence(resv, NULL); - dma_resv_unlock(resv); - } - - err = i915_gem_object_pin_pages(node->obj); - if (err) - return err; - - /* Hide this pinned object from the shrinker until retired */ - i915_gem_object_make_unshrinkable(node->obj); - - return 0; -} - __i915_active_call static void pool_retire(struct i915_active *ref) { @@ -129,10 +107,13 @@ static void pool_retire(struct i915_active *ref) struct list_head *list = bucket_for_size(pool, node->obj->base.size); unsigned long flags; - i915_gem_object_unpin_pages(node->obj); + if (node->pinned) { + i915_gem_object_unpin_pages(node->obj); - /* Return this object to the shrinker pool */ - i915_gem_object_make_purgeable(node->obj); + /* Return this object to the shrinker pool */ + i915_gem_object_make_purgeable(node->obj); + node->pinned = false; + } GEM_BUG_ON(node->age); spin_lock_irqsave(&pool->lock, flags); @@ -144,6 +125,19 @@ static void pool_retire(struct i915_active *ref) round_jiffies_up_relative(HZ)); } +void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node) +{ + assert_object_held(node->obj); + + if (node->pinned) + return; + + __i915_gem_object_pin_pages(node->obj); + /* Hide this pinned object from the shrinker until retired */ + i915_gem_object_make_unshrinkable(node->obj); + node->pinned = true; +} + static struct intel_gt_buffer_pool_node * node_create(struct intel_gt_buffer_pool *pool, size_t sz, enum i915_map_type type) @@ -159,7 +153,8 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz, node->age = 0; node->pool = pool; - i915_active_init(&node->active, pool_active, pool_retire); + node->pinned = false; + i915_active_init(&node->active, NULL, pool_retire); obj = i915_gem_object_create_internal(gt->i915, sz); if (IS_ERR(obj)) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h index 6068f8f1762e..487b8a5520f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h @@ -18,10 +18,15 @@ struct intel_gt_buffer_pool_node * intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size, enum i915_map_type type); +void intel_gt_buffer_pool_mark_used(struct intel_gt_buffer_pool_node *node); + static inline int intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node, struct i915_request *rq) { + /* did we call mark_used? */ + GEM_WARN_ON(!node->pinned); + return i915_active_add_request(&node->active, rq); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index d8d82c890da8..df1d75d08cd2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ @@ -31,6 +30,7 @@ struct intel_gt_buffer_pool_node { }; unsigned long age; enum i915_map_type type; + u32 pinned; }; #endif /* INTEL_GT_BUFFER_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index a4242ca8dcd7..582fcaee11aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -165,7 +165,6 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt) gt->clock_period_ns, div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX), USEC_PER_SEC)); - } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 9830342aa6f4..9fc6c912a4e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h index 886c5cf408a2..f667e976fb2b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index c94e8ac884eb..aef3084e8b16 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 63846a856e7e..d0588d8aaa44 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c index babe866126d7..811a11ed181c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h index b29816a04809..ff766966d6fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index dc06c78c9eeb..647eca9d867a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ @@ -9,6 +8,7 @@ #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_engine_heartbeat.h" +#include "intel_execlists_submission.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -243,4 +243,31 @@ void intel_gt_fini_requests(struct intel_gt *gt) { /* Wait until the work is marked as finished before unloading! */ cancel_delayed_work_sync(>->requests.retire_work); + + flush_work(>->watchdog.work); +} + +void intel_gt_watchdog_work(struct work_struct *work) +{ + struct intel_gt *gt = + container_of(work, typeof(*gt), watchdog.work); + struct i915_request *rq, *rn; + struct llist_node *first; + + first = llist_del_all(>->watchdog.list); + if (!first) + return; + + llist_for_each_entry_safe(rq, rn, first, watchdog.link) { + if (!i915_request_completed(rq)) { + struct dma_fence *f = &rq->fence; + + pr_notice("Fence expiration time out i915-%s:%s:%llx!\n", + f->ops->get_driver_name(f), + f->ops->get_timeline_name(f), + f->seqno); + i915_request_cancel(rq, -EINTR); + } + i915_request_put(rq); + } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index dbac53baf1cb..fcc30a6e4fe9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a83d3e18254d..0caf6ca0a784 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -8,10 +8,12 @@ #include <linux/ktime.h> #include <linux/list.h> +#include <linux/llist.h> #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/workqueue.h> #include "uc/intel_uc.h" @@ -39,10 +41,6 @@ struct intel_gt { struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; - - /* Pack multiple timelines' seqnos into the same page */ - spinlock_t hwsp_lock; - struct list_head hwsp_free_list; } timelines; struct intel_gt_requests { @@ -56,6 +54,11 @@ struct intel_gt { struct delayed_work retire_work; } requests; + struct { + struct llist_head list; + struct work_struct work; + } watchdog; + struct intel_wakeref wakeref; atomic_t user_wakeref; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 04aa6601e984..941f8af016d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -13,16 +13,36 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) { + struct drm_i915_gem_object *obj; + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) i915_gem_shrink_all(vm->i915); - return i915_gem_object_create_internal(vm->i915, sz); + obj = i915_gem_object_create_internal(vm->i915, sz); + /* ensure all dma objects have the same reservation class */ + if (!IS_ERR(obj)) + obj->base.resv = &vm->resv; + return obj; } int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) { int err; + i915_gem_object_lock(obj, NULL); + err = i915_gem_object_pin_pages(obj); + i915_gem_object_unlock(obj); + if (err) + return err; + + i915_gem_object_make_unshrinkable(obj); + return 0; +} + +int pin_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) +{ + int err; + err = i915_gem_object_pin_pages(obj); if (err) return err; @@ -56,6 +76,20 @@ void __i915_vm_close(struct i915_address_space *vm) mutex_unlock(&vm->mutex); } +/* lock the vm into the current ww, if we lock one, we lock all */ +int i915_vm_lock_objects(struct i915_address_space *vm, + struct i915_gem_ww_ctx *ww) +{ + if (vm->scratch[0]->base.resv == &vm->resv) { + return i915_gem_object_lock(vm->scratch[0], ww); + } else { + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + + /* We borrowed the scratch page from ggtt, take the top level object */ + return i915_gem_object_lock(ppgtt->pd->pt.base, ww); + } +} + void i915_address_space_fini(struct i915_address_space *vm) { drm_mm_takedown(&vm->mm); @@ -69,6 +103,7 @@ static void __i915_vm_release(struct work_struct *work) vm->cleanup(vm); i915_address_space_fini(vm); + dma_resv_fini(&vm->resv); kfree(vm); } @@ -98,6 +133,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) mutex_init(&vm->mutex); lockdep_set_subclass(&vm->mutex, subclass); i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); + dma_resv_init(&vm->resv); GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); @@ -427,7 +463,6 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; - int err; obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); if (IS_ERR(obj)) @@ -441,6 +476,19 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) return vma; } + return vma; +} + +struct i915_vma * +__vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) +{ + struct i915_vma *vma; + int err; + + vma = __vm_create_scratch_for_read(vm, size); + if (IS_ERR(vma)) + return vma; + err = i915_vma_pin(vma, 0, 0, i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); if (err) { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 29c10fde8ce3..e67e34e17913 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -85,6 +85,10 @@ typedef u64 gen8_pte_t; #define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2) #define BYT_PTE_WRITEABLE REG_BIT(1) +#define GEN12_PPGTT_PTE_LM BIT_ULL(11) + +#define GEN12_GGTT_PTE_LM BIT_ULL(1) + /* * Cacheability Control is a 4-bit value. The low three bits are stored in bits * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. @@ -238,6 +242,7 @@ struct i915_address_space { atomic_t open; struct mutex mutex; /* protects vma and our lists */ + struct dma_resv resv; /* reservation lock for all pd objects, and buffer pool */ #define VM_CLASS_GGTT 0 #define VM_CLASS_PPGTT 1 @@ -264,6 +269,7 @@ struct i915_address_space { enum i915_cache_level level, u32 flags); /* Create a valid PTE */ #define PTE_READ_ONLY BIT(0) +#define PTE_LM BIT(1) void (*allocate_va_range)(struct i915_address_space *vm, struct i915_vm_pt_stash *stash, @@ -346,6 +352,9 @@ struct i915_ppgtt { #define i915_is_ggtt(vm) ((vm)->is_ggtt) +int __must_check +i915_vm_lock_objects(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww); + static inline bool i915_vm_is_4lvl(const struct i915_address_space *vm) { @@ -522,6 +531,7 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm); struct i915_page_directory *__alloc_pd(int npde); int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj); +int pin_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj); void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl); @@ -576,6 +586,9 @@ void i915_vm_free_pt_stash(struct i915_address_space *vm, struct i915_vma * __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size); +struct i915_vma * +__vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size); + static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c index e3f637b3650e..075d741644ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.c +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h index ef09a890d2b7..0e2e3871c919 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc.h +++ b/drivers/gpu/drm/i915/gt/intel_llc.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h index ecad4687b930..ca5bdf166a23 100644 --- a/drivers/gpu/drm/i915/gt/intel_llc_types.h +++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 94f485b591af..e86897cde984 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3,6 +3,8 @@ * Copyright © 2014 Intel Corporation */ +#include "gem/i915_gem_lmem.h" + #include "gen8_engine_cs.h" #include "i915_drv.h" #include "i915_perf.h" @@ -808,7 +810,9 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) context_size += PAGE_SIZE; } - obj = i915_gem_object_create_shmem(engine->i915, context_size); + obj = i915_gem_object_create_lmem(engine->i915, context_size, 0); + if (IS_ERR(obj)) + obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -1417,7 +1421,7 @@ gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) #define CTX_WA_BB_SIZE (PAGE_SIZE) -static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) +static int lrc_create_wa_ctx(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -1433,10 +1437,6 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); - if (err) - goto err; - engine->wa_ctx.vma = vma; return 0; @@ -1448,9 +1448,6 @@ err: void lrc_fini_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); - - /* Called on error unwind, clear all flags to prevent further use */ - memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); } typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); @@ -1462,6 +1459,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) &wa_ctx->indirect_ctx, &wa_ctx->per_ctx }; wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; + struct i915_gem_ww_ctx ww; void *batch, *batch_ptr; unsigned int i; int err; @@ -1490,7 +1488,7 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) return; } - err = lrc_setup_wa_ctx(engine); + err = lrc_create_wa_ctx(engine); if (err) { /* * We continue even if we fail to initialize WA batch @@ -1503,7 +1501,22 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) return; } + if (!engine->wa_ctx.vma) + return; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(wa_ctx->vma->obj, &ww); + if (!err) + err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH); + if (err) + goto err; + batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_unpin; + } /* * Emit the two workaround batch buffers, recording the offset from the @@ -1528,8 +1541,26 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) __i915_gem_object_release_map(wa_ctx->vma->obj); /* Verify that we can handle failure to setup the wa_ctx */ - if (err || i915_inject_probe_error(engine->i915, -ENODEV)) - lrc_fini_wa_ctx(engine); + if (!err) + err = i915_inject_probe_error(engine->i915, -ENODEV); + +err_unpin: + if (err) + i915_vma_unpin(wa_ctx->vma); +err: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (err) { + i915_vma_put(engine->wa_ctx.vma); + + /* Clear all flags to prevent further use */ + memset(wa_ctx, 0, sizeof(*wa_ctx)); + } } static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 65fe76738335..41e5350a7a05 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ @@ -40,7 +39,7 @@ #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ - const u64 addr__ = px_dma(ppgtt->pd); \ + const u64 addr__ = px_dma((ppgtt)->pd); \ (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \ (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \ } while (0) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 8acb84960cd0..b14138fd505c 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -1,23 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright © 2015 Intel Corporation */ #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 83371f3e6ba1..d83274f5163b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -1,24 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright © 2015 Intel Corporation */ #ifndef INTEL_MOCS_H diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index 3f940ae27028..014ae8ac4480 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -5,6 +5,8 @@ #include <linux/slab.h> +#include "gem/i915_gem_lmem.h" + #include "i915_trace.h" #include "intel_gtt.h" #include "gen6_ppgtt.h" @@ -192,6 +194,8 @@ void ppgtt_bind_vma(struct i915_address_space *vm, pte_flags = 0; if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; + if (i915_gem_object_is_lmem(vma->obj)) + pte_flags |= PTE_LM; vm->insert_entries(vm, vma, cache_level, pte_flags); wmb(); @@ -262,7 +266,7 @@ int i915_vm_pin_pt_stash(struct i915_address_space *vm, for (n = 0; n < ARRAY_SIZE(stash->pt); n++) { for (pt = stash->pt[n]; pt; pt = pt->stash) { - err = pin_pt_dma(vm, pt->base); + err = pin_pt_dma_locked(vm, pt->base); if (err) return err; } @@ -304,6 +308,7 @@ void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) ppgtt->vm.dma = i915->drm.dev; ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); + dma_resv_init(&ppgtt->vm.resv); i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9843e1d4327f..3b7e62debe7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ @@ -176,7 +175,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) /* 3a: Enable RC6 */ set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - rc6->ctl_enable = GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_RC6_ENABLE | diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index 9f0f23fca8af..e119ec4a0bcc 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index bfbb623f7a4f..e747492b2f46 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index e326d3c0bc10..be6f2c8f5184 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -90,8 +90,6 @@ region_lmem_init(struct intel_memory_region *mem) if (ret) io_mapping_fini(&mem->iomap); - intel_memory_region_set_name(mem, "local"); - return ret; } @@ -102,20 +100,26 @@ static const struct intel_memory_region_ops intel_region_lmem_ops = { }; struct intel_memory_region * -intel_setup_fake_lmem(struct drm_i915_private *i915) +intel_gt_setup_fake_lmem(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); struct intel_memory_region *mem; resource_size_t mappable_end; resource_size_t io_start; resource_size_t start; + if (!HAS_LMEM(i915)) + return ERR_PTR(-ENODEV); + + if (!i915->params.fake_lmem_start) + return ERR_PTR(-ENODEV); + GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt)); - GEM_BUG_ON(!i915->params.fake_lmem_start); /* Your mappable aperture belongs to me now! */ mappable_end = pci_resource_len(pdev, 2); - io_start = pci_resource_start(pdev, 2), + io_start = pci_resource_start(pdev, 2); start = i915->params.fake_lmem_start; mem = intel_memory_region_create(i915, @@ -136,3 +140,86 @@ intel_setup_fake_lmem(struct drm_i915_private *i915) return mem; } + +static bool get_legacy_lowmem_region(struct intel_uncore *uncore, + u64 *start, u32 *size) +{ + if (!IS_DG1_REVID(uncore->i915, DG1_REVID_A0, DG1_REVID_B0)) + return false; + + *start = 0; + *size = SZ_1M; + + drm_dbg(&uncore->i915->drm, "LMEM: reserved legacy low-memory [0x%llx-0x%llx]\n", + *start, *start + *size); + + return true; +} + +static int reserve_lowmem_region(struct intel_uncore *uncore, + struct intel_memory_region *mem) +{ + u64 reserve_start; + u32 reserve_size; + int ret; + + if (!get_legacy_lowmem_region(uncore, &reserve_start, &reserve_size)) + return 0; + + ret = intel_memory_region_reserve(mem, reserve_start, reserve_size); + if (ret) + drm_err(&uncore->i915->drm, "LMEM: reserving low memory region failed\n"); + + return ret; +} + +static struct intel_memory_region *setup_lmem(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct pci_dev *pdev = i915->drm.pdev; + struct intel_memory_region *mem; + resource_size_t io_start; + resource_size_t lmem_size; + int err; + + if (!IS_DGFX(i915)) + return ERR_PTR(-ENODEV); + + /* Stolen starts from GSMBASE on DG1 */ + lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE); + + io_start = pci_resource_start(pdev, 2); + if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) + return ERR_PTR(-ENODEV); + + mem = intel_memory_region_create(i915, + 0, + lmem_size, + I915_GTT_PAGE_SIZE_4K, + io_start, + &intel_region_lmem_ops); + if (IS_ERR(mem)) + return mem; + + err = reserve_lowmem_region(uncore, mem); + if (err) + goto err_region_put; + + drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region); + drm_dbg(&i915->drm, "Local memory IO start: %pa\n", + &mem->io_start); + drm_info(&i915->drm, "Local memory available: %pa\n", + &lmem_size); + + return mem; + +err_region_put: + intel_memory_region_put(mem); + return ERR_PTR(err); +} + +struct intel_memory_region *intel_gt_setup_lmem(struct intel_gt *gt) +{ + return setup_lmem(gt); +} diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.h b/drivers/gpu/drm/i915/gt/intel_region_lmem.h index 8ea43e538dab..062d0542ae34 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.h +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.h @@ -6,9 +6,11 @@ #ifndef __INTEL_REGION_LMEM_H #define __INTEL_REGION_LMEM_H -struct drm_i915_private; +struct intel_gt; + +struct intel_memory_region *intel_gt_setup_lmem(struct intel_gt *gt); struct intel_memory_region * -intel_setup_fake_lmem(struct drm_i915_private *i915); +intel_gt_setup_fake_lmem(struct intel_gt *gt); #endif /* !__INTEL_REGION_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index ca816ba22197..b03e197b1d99 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -1,28 +1,6 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Mika Kuoppala <mika.kuoppala@intel.com> - * */ #include "i915_drv.h" @@ -65,7 +43,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine) if ((i) >= PAGE_SIZE / sizeof(u32)) \ goto out; \ (batch)[(i)++] = (val); \ - } while(0) + } while (0) static int render_state_setup(struct intel_renderstate *so, struct drm_i915_private *i915) @@ -84,6 +62,7 @@ static int render_state_setup(struct intel_renderstate *so, if (i * 4 == rodata->reloc[reloc_index]) { u64 r = s + so->vma->node.start; + s = lower_32_bits(r); if (HAS_64BIT_RELOC(i915)) { if (i + 1 >= rodata->batch_items || @@ -197,7 +176,7 @@ retry: if (err) goto err_context; - err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin_ww(so->vma, &so->ww, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) goto err_context; diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 713aa1e86c80..48f009203917 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -1,24 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. */ #ifndef _INTEL_RENDERSTATE_H_ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index afe0342dcd47..a377c4588aaa 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2008-2018 Intel Corporation */ @@ -787,18 +786,15 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) static void nop_submit_request(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - RQ_TRACE(request, "-EIO\n"); - i915_request_set_error_once(request, -EIO); - spin_lock_irqsave(&engine->active.lock, flags); - __i915_request_submit(request); - i915_request_mark_complete(request); - spin_unlock_irqrestore(&engine->active.lock, flags); + request = i915_request_mark_eio(request); + if (request) { + i915_request_submit(request); + intel_engine_signal_breadcrumbs(request->engine); - intel_engine_signal_breadcrumbs(engine); + i915_request_put(request); + } } static void __intel_gt_set_wedged(struct intel_gt *gt) @@ -974,8 +970,6 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) { int err, i; - gt_revoke(gt); - err = __intel_gt_reset(gt, ALL_ENGINES); for (i = 0; err && i < RESET_MAX_RETRIES; i++) { msleep(10 * (i + 1)); @@ -1030,6 +1024,13 @@ void intel_gt_reset(struct intel_gt *gt, might_sleep(); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); + + /* + * FIXME: Revoking cpu mmap ptes cannot be done from a dma_fence + * critical section like gpu reset. + */ + gt_revoke(gt); + mutex_lock(>->reset.mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 7dbf5cc8a333..adc734e67387 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2008-2018 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index add6b86d9d03..9312b29f5a97 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -32,7 +32,7 @@ struct intel_reset { * * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no * longer use the GPU - similar to #I915_WEDGED bit. The difference in - * in the way we're handling "forced" unwedged (e.g. through debugfs), + * the way we're handling "forced" unwedged (e.g. through debugfs), * which is not allowed in case we failed to initialize. * * #I915_WEDGED_ON_FINI - Similar to #I915_WEDGED_ON_INIT, except we diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 78d1360caa0f..aee0a77c77e0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -1,9 +1,9 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_object.h" #include "i915_drv.h" @@ -109,8 +109,8 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = ERR_PTR(-ENODEV); - if (i915_ggtt_has_aperture(ggtt)) + obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE); + if (IS_ERR(obj) && i915_ggtt_has_aperture(ggtt)) obj = i915_gem_object_create_stolen(i915, size); if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index 1700579bdc93..dbf5f14a136f 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ @@ -82,6 +81,7 @@ static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ u32 offset = addr - rq->ring->vaddr; + GEM_BUG_ON(offset > rq->ring->size); return intel_ring_wrap(rq->ring, offset); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 4984ff565424..9585546556ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1,30 +1,6 @@ +// SPDX-License-Identifier: MIT /* - * Copyright © 2008-2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt <eric@anholt.net> - * Zou Nan hai <nanhai.zou@intel.com> - * Xiang Hai hao<haihao.xiang@intel.com> - * + * Copyright © 2008-2021 Intel Corporation */ #include "gen2_engine_cs.h" @@ -183,15 +159,36 @@ static void set_pp_dir(struct intel_engine_cs *engine) } } +static bool stop_ring(struct intel_engine_cs *engine) +{ + /* Empty the ring by skipping to the end */ + ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); + ENGINE_POSTING_READ(engine, RING_HEAD); + + /* The ring must be empty before it is disabled */ + ENGINE_WRITE_FW(engine, RING_CTL, 0); + ENGINE_POSTING_READ(engine, RING_CTL); + + /* Then reset the disabled ring */ + ENGINE_WRITE_FW(engine, RING_HEAD, 0); + ENGINE_WRITE_FW(engine, RING_TAIL, 0); + + return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; +} + static int xcs_resume(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->legacy.ring; ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", ring->head, ring->tail); - if (HWS_NEEDS_PHYSICAL(dev_priv)) + /* Double check the ring is empty & disabled before we resume */ + synchronize_hardirq(engine->i915->drm.irq); + if (!stop_ring(engine)) + goto err; + + if (HWS_NEEDS_PHYSICAL(engine->i915)) ring_setup_phys_status_page(engine); else ring_setup_status_page(engine); @@ -228,21 +225,10 @@ static int xcs_resume(struct intel_engine_cs *engine) if (__intel_wait_for_register_fw(engine->uncore, RING_CTL(engine->mmio_base), RING_VALID, RING_VALID, - 5000, 0, NULL)) { - drm_err(&dev_priv->drm, - "%s initialization failed; " - "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", - engine->name, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_CTL) & RING_VALID, - ENGINE_READ(engine, RING_HEAD), ring->head, - ENGINE_READ(engine, RING_TAIL), ring->tail, - ENGINE_READ(engine, RING_START), - i915_ggtt_offset(ring->vma)); - return -EIO; - } + 5000, 0, NULL)) + goto err; - if (INTEL_GEN(dev_priv) > 2) + if (INTEL_GEN(engine->i915) > 2) ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); @@ -255,6 +241,19 @@ static int xcs_resume(struct intel_engine_cs *engine) /* Papering over lost _interrupts_ immediately following the restart */ intel_engine_signal_breadcrumbs(engine); return 0; + +err: + drm_err(&engine->i915->drm, + "%s initialization failed; " + "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", + engine->name, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_CTL) & RING_VALID, + ENGINE_READ(engine, RING_HEAD), ring->head, + ENGINE_READ(engine, RING_TAIL), ring->tail, + ENGINE_READ(engine, RING_START), + i915_ggtt_offset(ring->vma)); + return -EIO; } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -290,23 +289,6 @@ static void xcs_sanitize(struct intel_engine_cs *engine) clflush_cache_range(engine->status_page.addr, PAGE_SIZE); } -static bool stop_ring(struct intel_engine_cs *engine) -{ - /* Empty the ring by skipping to the end */ - ENGINE_WRITE_FW(engine, RING_HEAD, ENGINE_READ_FW(engine, RING_TAIL)); - ENGINE_POSTING_READ(engine, RING_HEAD); - - /* The ring must be empty before it is disabled */ - ENGINE_WRITE_FW(engine, RING_CTL, 0); - ENGINE_POSTING_READ(engine, RING_CTL); - - /* Then reset the disabled ring */ - ENGINE_WRITE_FW(engine, RING_HEAD, 0); - ENGINE_WRITE_FW(engine, RING_TAIL, 0); - - return (ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) == 0; -} - static void reset_prepare(struct intel_engine_cs *engine) { /* @@ -329,25 +311,23 @@ static void reset_prepare(struct intel_engine_cs *engine) if (!stop_ring(engine)) { /* G45 ring initialization often fails to reset head to zero */ - drm_dbg(&engine->i915->drm, - "%s head not reset to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ_FW(engine, RING_CTL), - ENGINE_READ_FW(engine, RING_HEAD), - ENGINE_READ_FW(engine, RING_TAIL), - ENGINE_READ_FW(engine, RING_START)); - } - - if (!stop_ring(engine)) { - drm_err(&engine->i915->drm, - "failed to set %s head to zero " - "ctl %08x head %08x tail %08x start %08x\n", - engine->name, - ENGINE_READ_FW(engine, RING_CTL), - ENGINE_READ_FW(engine, RING_HEAD), - ENGINE_READ_FW(engine, RING_TAIL), - ENGINE_READ_FW(engine, RING_START)); + ENGINE_TRACE(engine, + "HEAD not reset to zero, " + "{ CTL:%08x, HEAD:%08x, TAIL:%08x, START:%08x }\n", + ENGINE_READ_FW(engine, RING_CTL), + ENGINE_READ_FW(engine, RING_HEAD), + ENGINE_READ_FW(engine, RING_TAIL), + ENGINE_READ_FW(engine, RING_START)); + if (!stop_ring(engine)) { + drm_err(&engine->i915->drm, + "failed to set %s head to zero " + "ctl %08x head %08x tail %08x start %08x\n", + engine->name, + ENGINE_READ_FW(engine, RING_CTL), + ENGINE_READ_FW(engine, RING_HEAD), + ENGINE_READ_FW(engine, RING_TAIL), + ENGINE_READ_FW(engine, RING_START)); + } } } @@ -431,7 +411,7 @@ static void reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, &engine->active.requests, sched.link) - i915_request_mark_eio(request); + i915_request_put(i915_request_mark_eio(request)); intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -466,6 +446,26 @@ static void ring_context_destroy(struct kref *ref) intel_context_free(ce); } +static int ring_context_init_default_state(struct intel_context *ce, + struct i915_gem_ww_ctx *ww) +{ + struct drm_i915_gem_object *obj = ce->state->obj; + void *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + shmem_read(ce->engine->default_state, 0, + vaddr, ce->engine->context_size); + + i915_gem_object_flush_map(obj); + __i915_gem_object_release_map(obj); + + __set_bit(CONTEXT_VALID_BIT, &ce->flags); + return 0; +} + static int ring_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **unused) @@ -473,6 +473,13 @@ static int ring_context_pre_pin(struct intel_context *ce, struct i915_address_space *vm; int err = 0; + if (ce->engine->default_state && + !test_bit(CONTEXT_VALID_BIT, &ce->flags)) { + err = ring_context_init_default_state(ce, ww); + if (err) + return err; + } + vm = vm_alias(ce->vm); if (vm) err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww); @@ -528,22 +535,6 @@ alloc_context_vma(struct intel_engine_cs *engine) if (IS_IVYBRIDGE(i915)) i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); - if (engine->default_state) { - void *vaddr; - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_obj; - } - - shmem_read(engine->default_state, 0, - vaddr, engine->context_size); - - i915_gem_object_flush_map(obj); - __i915_gem_object_release_map(obj); - } - vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -575,8 +566,6 @@ static int ring_context_alloc(struct intel_context *ce) return PTR_ERR(vma); ce->state = vma; - if (engine->default_state) - __set_bit(CONTEXT_VALID_BIT, &ce->flags); } return 0; @@ -761,13 +750,14 @@ static int mi_set_context(struct i915_request *rq, static int remap_l3_slice(struct i915_request *rq, int slice) { +#define L3LOG_DW (GEN7_L3LOG_SIZE / sizeof(u32)) u32 *cs, *remap_info = rq->engine->i915->l3_parity.remap_info[slice]; int i; if (!remap_info) return 0; - cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2); + cs = intel_ring_begin(rq, L3LOG_DW * 2 + 2); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -776,8 +766,8 @@ static int remap_l3_slice(struct i915_request *rq, int slice) * here because no other code should access these registers other than * at initialization time. */ - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); - for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { + *cs++ = MI_LOAD_REGISTER_IMM(L3LOG_DW); + for (i = 0; i < L3LOG_DW; i++) { *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); *cs++ = remap_info[i]; } @@ -785,6 +775,7 @@ static int remap_l3_slice(struct i915_request *rq, int slice) intel_ring_advance(rq, cs); return 0; +#undef L3LOG_DW } static int remap_l3(struct i915_request *rq) @@ -1176,37 +1167,15 @@ static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, return gen7_setup_clear_gpr_bb(engine, vma); } -static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) +static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + struct i915_vma *vma) { - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int size; int err; - size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); - if (size <= 0) - return size; - - size = ALIGN(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(engine->i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, engine->gt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - vma->private = intel_context_create(engine); /* dummy residuals */ - if (IS_ERR(vma->private)) { - err = PTR_ERR(vma->private); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + err = i915_vma_pin_ww(vma, ww, 0, 0, PIN_USER | PIN_HIGH); if (err) - goto err_private; + return err; err = i915_vma_sync(vma); if (err) @@ -1221,17 +1190,53 @@ static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) err_unpin: i915_vma_unpin(vma); -err_private: - intel_context_put(vma->private); -err_obj: - i915_gem_object_put(obj); return err; } +static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int size, err; + + if (!IS_GEN(engine->i915, 7) || engine->class != RENDER_CLASS) + return 0; + + err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); + if (err < 0) + return ERR_PTR(err); + if (!err) + return NULL; + + size = ALIGN(err, PAGE_SIZE); + + obj = i915_gem_object_create_internal(engine->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return ERR_CAST(vma); + } + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + err = PTR_ERR(vma->private); + vma->private = NULL; + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + int intel_ring_submission_setup(struct intel_engine_cs *engine) { + struct i915_gem_ww_ctx ww; struct intel_timeline *timeline; struct intel_ring *ring; + struct i915_vma *gen7_wa_vma; int err; setup_common(engine); @@ -1262,43 +1267,72 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) } GEM_BUG_ON(timeline->has_initial_breadcrumb); - err = intel_timeline_pin(timeline, NULL); - if (err) - goto err_timeline; - ring = intel_engine_create_ring(engine, SZ_16K); if (IS_ERR(ring)) { err = PTR_ERR(ring); - goto err_timeline_unpin; + goto err_timeline; } - err = intel_ring_pin(ring, NULL); - if (err) - goto err_ring; - GEM_BUG_ON(engine->legacy.ring); engine->legacy.ring = ring; engine->legacy.timeline = timeline; - GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + gen7_wa_vma = gen7_ctx_vma(engine); + if (IS_ERR(gen7_wa_vma)) { + err = PTR_ERR(gen7_wa_vma); + goto err_ring; + } - if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { - err = gen7_ctx_switch_bb_init(engine); + i915_gem_ww_ctx_init(&ww, false); + +retry: + err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww); + if (!err && gen7_wa_vma) + err = i915_gem_object_lock(gen7_wa_vma->obj, &ww); + if (!err && engine->legacy.ring->vma->obj) + err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww); + if (!err) + err = intel_timeline_pin(timeline, &ww); + if (!err) { + err = intel_ring_pin(ring, &ww); if (err) - goto err_ring_unpin; + intel_timeline_unpin(timeline); + } + if (err) + goto out; + + GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + + if (gen7_wa_vma) { + err = gen7_ctx_switch_bb_init(engine, &ww, gen7_wa_vma); + if (err) { + intel_ring_unpin(ring); + intel_timeline_unpin(timeline); + } } +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (err) + goto err_gen7_put; + /* Finally, take ownership and responsibility for cleanup! */ engine->release = ring_release; return 0; -err_ring_unpin: - intel_ring_unpin(ring); +err_gen7_put: + if (gen7_wa_vma) { + intel_context_put(gen7_wa_vma->private); + i915_gem_object_put(gen7_wa_vma->obj); + } err_ring: intel_ring_put(ring); -err_timeline_unpin: - intel_timeline_unpin(timeline); err_timeline: intel_timeline_put(timeline); err: diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h index 1a189ea00fd8..49ccb76dda3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_types.h +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index ee5835c29c03..405d814e9040 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 8d3c9d663662..1d2cfc98b510 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index 029fe13cf303..3941d8551f52 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 8a72e0fe34ca..0d9f74aec8fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 23ba6c2ebe70..4cd1a8a7298a 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 037b0e3ccbed..f19cf6d2fa85 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016-2018 Intel Corporation */ @@ -12,21 +11,9 @@ #include "intel_ring.h" #include "intel_timeline.h" -#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) -#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) +#define TIMELINE_SEQNO_BYTES 8 -#define CACHELINE_BITS 6 -#define CACHELINE_FREE CACHELINE_BITS - -struct intel_timeline_hwsp { - struct intel_gt *gt; - struct intel_gt_timelines *gt_timelines; - struct list_head free_link; - struct i915_vma *vma; - u64 free_bitmap; -}; - -static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) +static struct i915_vma *hwsp_alloc(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; @@ -45,174 +32,42 @@ static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) return vma; } -static struct i915_vma * -hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) -{ - struct intel_gt_timelines *gt = &timeline->gt->timelines; - struct intel_timeline_hwsp *hwsp; - - BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); - - spin_lock_irq(>->hwsp_lock); - - /* hwsp_free_list only contains HWSP that have available cachelines */ - hwsp = list_first_entry_or_null(>->hwsp_free_list, - typeof(*hwsp), free_link); - if (!hwsp) { - struct i915_vma *vma; - - spin_unlock_irq(>->hwsp_lock); - - hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); - if (!hwsp) - return ERR_PTR(-ENOMEM); - - vma = __hwsp_alloc(timeline->gt); - if (IS_ERR(vma)) { - kfree(hwsp); - return vma; - } - - GT_TRACE(timeline->gt, "new HWSP allocated\n"); - - vma->private = hwsp; - hwsp->gt = timeline->gt; - hwsp->vma = vma; - hwsp->free_bitmap = ~0ull; - hwsp->gt_timelines = gt; - - spin_lock_irq(>->hwsp_lock); - list_add(&hwsp->free_link, >->hwsp_free_list); - } - - GEM_BUG_ON(!hwsp->free_bitmap); - *cacheline = __ffs64(hwsp->free_bitmap); - hwsp->free_bitmap &= ~BIT_ULL(*cacheline); - if (!hwsp->free_bitmap) - list_del(&hwsp->free_link); - - spin_unlock_irq(>->hwsp_lock); - - GEM_BUG_ON(hwsp->vma->private != hwsp); - return hwsp->vma; -} - -static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) -{ - struct intel_gt_timelines *gt = hwsp->gt_timelines; - unsigned long flags; - - spin_lock_irqsave(>->hwsp_lock, flags); - - /* As a cacheline becomes available, publish the HWSP on the freelist */ - if (!hwsp->free_bitmap) - list_add_tail(&hwsp->free_link, >->hwsp_free_list); - - GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); - hwsp->free_bitmap |= BIT_ULL(cacheline); - - /* And if no one is left using it, give the page back to the system */ - if (hwsp->free_bitmap == ~0ull) { - i915_vma_put(hwsp->vma); - list_del(&hwsp->free_link); - kfree(hwsp); - } - - spin_unlock_irqrestore(>->hwsp_lock, flags); -} - -static void __rcu_cacheline_free(struct rcu_head *rcu) -{ - struct intel_timeline_cacheline *cl = - container_of(rcu, typeof(*cl), rcu); - - /* Must wait until after all *rq->hwsp are complete before removing */ - i915_gem_object_unpin_map(cl->hwsp->vma->obj); - __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); - - i915_active_fini(&cl->active); - kfree(cl); -} - -static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) -{ - GEM_BUG_ON(!i915_active_is_idle(&cl->active)); - call_rcu(&cl->rcu, __rcu_cacheline_free); -} - __i915_active_call -static void __cacheline_retire(struct i915_active *active) +static void __timeline_retire(struct i915_active *active) { - struct intel_timeline_cacheline *cl = - container_of(active, typeof(*cl), active); + struct intel_timeline *tl = + container_of(active, typeof(*tl), active); - i915_vma_unpin(cl->hwsp->vma); - if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) - __idle_cacheline_free(cl); + i915_vma_unpin(tl->hwsp_ggtt); + intel_timeline_put(tl); } -static int __cacheline_active(struct i915_active *active) +static int __timeline_active(struct i915_active *active) { - struct intel_timeline_cacheline *cl = - container_of(active, typeof(*cl), active); + struct intel_timeline *tl = + container_of(active, typeof(*tl), active); - __i915_vma_pin(cl->hwsp->vma); + __i915_vma_pin(tl->hwsp_ggtt); + intel_timeline_get(tl); return 0; } -static struct intel_timeline_cacheline * -cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) +I915_SELFTEST_EXPORT int +intel_timeline_pin_map(struct intel_timeline *timeline) { - struct intel_timeline_cacheline *cl; + struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj; + u32 ofs = offset_in_page(timeline->hwsp_offset); void *vaddr; - GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); - - cl = kmalloc(sizeof(*cl), GFP_KERNEL); - if (!cl) - return ERR_PTR(-ENOMEM); - - vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - kfree(cl); - return ERR_CAST(vaddr); - } - - cl->hwsp = hwsp; - cl->vaddr = page_pack_bits(vaddr, cacheline); - - i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); - - return cl; -} - -static void cacheline_acquire(struct intel_timeline_cacheline *cl, - u32 ggtt_offset) -{ - if (!cl) - return; - - cl->ggtt_offset = ggtt_offset; - i915_active_acquire(&cl->active); -} - -static void cacheline_release(struct intel_timeline_cacheline *cl) -{ - if (cl) - i915_active_release(&cl->active); -} - -static void cacheline_free(struct intel_timeline_cacheline *cl) -{ - if (!i915_active_acquire_if_busy(&cl->active)) { - __idle_cacheline_free(cl); - return; - } + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); - cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); + timeline->hwsp_map = vaddr; + timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); + clflush(vaddr + ofs); - i915_active_release(&cl->active); + return 0; } static int intel_timeline_init(struct intel_timeline *timeline, @@ -220,45 +75,25 @@ static int intel_timeline_init(struct intel_timeline *timeline, struct i915_vma *hwsp, unsigned int offset) { - void *vaddr; - kref_init(&timeline->kref); atomic_set(&timeline->pin_count, 0); timeline->gt = gt; - timeline->has_initial_breadcrumb = !hwsp; - timeline->hwsp_cacheline = NULL; - - if (!hwsp) { - struct intel_timeline_cacheline *cl; - unsigned int cacheline; - - hwsp = hwsp_alloc(timeline, &cacheline); + if (hwsp) { + timeline->hwsp_offset = offset; + timeline->hwsp_ggtt = i915_vma_get(hwsp); + } else { + timeline->has_initial_breadcrumb = true; + hwsp = hwsp_alloc(gt); if (IS_ERR(hwsp)) return PTR_ERR(hwsp); - - cl = cacheline_alloc(hwsp->private, cacheline); - if (IS_ERR(cl)) { - __idle_hwsp_free(hwsp->private, cacheline); - return PTR_ERR(cl); - } - - timeline->hwsp_cacheline = cl; - timeline->hwsp_offset = cacheline * CACHELINE_BYTES; - - vaddr = page_mask_bits(cl->vaddr); - } else { - timeline->hwsp_offset = offset; - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + timeline->hwsp_ggtt = hwsp; } - timeline->hwsp_seqno = - memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); + timeline->hwsp_map = NULL; + timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset; - timeline->hwsp_ggtt = i915_vma_get(hwsp); GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); timeline->fence_context = dma_fence_context_alloc(1); @@ -269,6 +104,7 @@ static int intel_timeline_init(struct intel_timeline *timeline, INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); + i915_active_init(&timeline->active, __timeline_active, __timeline_retire); return 0; } @@ -279,23 +115,19 @@ void intel_gt_init_timelines(struct intel_gt *gt) spin_lock_init(&timelines->lock); INIT_LIST_HEAD(&timelines->active_list); - - spin_lock_init(&timelines->hwsp_lock); - INIT_LIST_HEAD(&timelines->hwsp_free_list); } -static void intel_timeline_fini(struct intel_timeline *timeline) +static void intel_timeline_fini(struct rcu_head *rcu) { - GEM_BUG_ON(atomic_read(&timeline->pin_count)); - GEM_BUG_ON(!list_empty(&timeline->requests)); - GEM_BUG_ON(timeline->retire); + struct intel_timeline *timeline = + container_of(rcu, struct intel_timeline, rcu); - if (timeline->hwsp_cacheline) - cacheline_free(timeline->hwsp_cacheline); - else + if (timeline->hwsp_map) i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); i915_vma_put(timeline->hwsp_ggtt); + i915_active_fini(&timeline->active); + kfree(timeline); } struct intel_timeline * @@ -351,6 +183,12 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) if (atomic_add_unless(&tl->pin_count, 1, 0)) return 0; + if (!tl->hwsp_map) { + err = intel_timeline_pin_map(tl); + if (err) + return err; + } + err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); if (err) return err; @@ -361,9 +199,9 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", tl->fence_context, tl->hwsp_offset); - cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset); + i915_active_acquire(&tl->active); if (atomic_fetch_inc(&tl->pin_count)) { - cacheline_release(tl->hwsp_cacheline); + i915_active_release(&tl->active); __i915_vma_unpin(tl->hwsp_ggtt); } @@ -372,9 +210,13 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) void intel_timeline_reset_seqno(const struct intel_timeline *tl) { + u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno; /* Must be pinned to be writable, and no requests in flight. */ GEM_BUG_ON(!atomic_read(&tl->pin_count)); - WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); + + memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); + WRITE_ONCE(*hwsp_seqno, tl->seqno); + clflush(hwsp_seqno); } void intel_timeline_enter(struct intel_timeline *tl) @@ -450,106 +292,23 @@ static u32 timeline_advance(struct intel_timeline *tl) return tl->seqno += 1 + tl->has_initial_breadcrumb; } -static void timeline_rollback(struct intel_timeline *tl) -{ - tl->seqno -= 1 + tl->has_initial_breadcrumb; -} - static noinline int __intel_timeline_get_seqno(struct intel_timeline *tl, - struct i915_request *rq, u32 *seqno) { - struct intel_timeline_cacheline *cl; - unsigned int cacheline; - struct i915_vma *vma; - void *vaddr; - int err; - - might_lock(&tl->gt->ggtt->vm.mutex); - GT_TRACE(tl->gt, "timeline:%llx wrapped\n", tl->fence_context); - - /* - * If there is an outstanding GPU reference to this cacheline, - * such as it being sampled by a HW semaphore on another timeline, - * we cannot wraparound our seqno value (the HW semaphore does - * a strict greater-than-or-equals compare, not i915_seqno_passed). - * So if the cacheline is still busy, we must detach ourselves - * from it and leave it inflight alongside its users. - * - * However, if nobody is watching and we can guarantee that nobody - * will, we could simply reuse the same cacheline. - * - * if (i915_active_request_is_signaled(&tl->last_request) && - * i915_active_is_signaled(&tl->hwsp_cacheline->active)) - * return 0; - * - * That seems unlikely for a busy timeline that needed to wrap in - * the first place, so just replace the cacheline. - */ - - vma = hwsp_alloc(tl, &cacheline); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_rollback; - } - - err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); - if (err) { - __idle_hwsp_free(vma->private, cacheline); - goto err_rollback; - } - - cl = cacheline_alloc(vma->private, cacheline); - if (IS_ERR(cl)) { - err = PTR_ERR(cl); - __idle_hwsp_free(vma->private, cacheline); - goto err_unpin; - } - GEM_BUG_ON(cl->hwsp->vma != vma); - - /* - * Attach the old cacheline to the current request, so that we only - * free it after the current request is retired, which ensures that - * all writes into the cacheline from previous requests are complete. - */ - err = i915_active_ref(&tl->hwsp_cacheline->active, - tl->fence_context, - &rq->fence); - if (err) - goto err_cacheline; - - cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ - cacheline_free(tl->hwsp_cacheline); - - i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ - i915_vma_put(tl->hwsp_ggtt); - - tl->hwsp_ggtt = i915_vma_get(vma); + u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES); - vaddr = page_mask_bits(cl->vaddr); - tl->hwsp_offset = cacheline * CACHELINE_BYTES; - tl->hwsp_seqno = - memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5))) + next_ofs = offset_in_page(next_ofs + BIT(5)); - tl->hwsp_offset += i915_ggtt_offset(vma); - GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", - tl->fence_context, tl->hwsp_offset); - - cacheline_acquire(cl, tl->hwsp_offset); - tl->hwsp_cacheline = cl; + tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs; + tl->hwsp_seqno = tl->hwsp_map + next_ofs; + intel_timeline_reset_seqno(tl); *seqno = timeline_advance(tl); GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); return 0; - -err_cacheline: - cacheline_free(cl); -err_unpin: - i915_vma_unpin(vma); -err_rollback: - timeline_rollback(tl); - return err; } int intel_timeline_get_seqno(struct intel_timeline *tl, @@ -559,51 +318,52 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, *seqno = timeline_advance(tl); /* Replace the HWSP on wraparound for HW semaphores */ - if (unlikely(!*seqno && tl->hwsp_cacheline)) - return __intel_timeline_get_seqno(tl, rq, seqno); + if (unlikely(!*seqno && tl->has_initial_breadcrumb)) + return __intel_timeline_get_seqno(tl, seqno); return 0; } -static int cacheline_ref(struct intel_timeline_cacheline *cl, - struct i915_request *rq) -{ - return i915_active_add_request(&cl->active, rq); -} - int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { - struct intel_timeline_cacheline *cl; + struct intel_timeline *tl; int err; - GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); - rcu_read_lock(); - cl = rcu_dereference(from->hwsp_cacheline); - if (i915_request_signaled(from)) /* confirm cacheline is valid */ - goto unlock; - if (unlikely(!i915_active_acquire_if_busy(&cl->active))) - goto unlock; /* seqno wrapped and completed! */ - if (unlikely(__i915_request_is_complete(from))) - goto release; + tl = rcu_dereference(from->timeline); + if (i915_request_signaled(from) || + !i915_active_acquire_if_busy(&tl->active)) + tl = NULL; + + if (tl) { + /* hwsp_offset may wraparound, so use from->hwsp_seqno */ + *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(from->hwsp_seqno); + } + + /* ensure we wait on the right request, if not, we completed */ + if (tl && __i915_request_is_complete(from)) { + i915_active_release(&tl->active); + tl = NULL; + } rcu_read_unlock(); - err = cacheline_ref(cl, to); - if (err) + if (!tl) + return 1; + + /* Can't do semaphore waits on kernel context */ + if (!tl->has_initial_breadcrumb) { + err = -EINVAL; goto out; + } + + err = i915_active_add_request(&tl->active, to); - *hwsp = cl->ggtt_offset; out: - i915_active_release(&cl->active); + i915_active_release(&tl->active); return err; - -release: - i915_active_release(&cl->active); -unlock: - rcu_read_unlock(); - return 1; } void intel_timeline_unpin(struct intel_timeline *tl) @@ -612,8 +372,7 @@ void intel_timeline_unpin(struct intel_timeline *tl) if (!atomic_dec_and_test(&tl->pin_count)) return; - cacheline_release(tl->hwsp_cacheline); - + i915_active_release(&tl->active); __i915_vma_unpin(tl->hwsp_ggtt); } @@ -622,8 +381,11 @@ void __intel_timeline_free(struct kref *kref) struct intel_timeline *timeline = container_of(kref, typeof(*timeline), kref); - intel_timeline_fini(timeline); - kfree_rcu(timeline, rcu); + GEM_BUG_ON(atomic_read(&timeline->pin_count)); + GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); + + call_rcu(&timeline->rcu, intel_timeline_fini); } void intel_gt_fini_timelines(struct intel_gt *gt) @@ -631,7 +393,6 @@ void intel_gt_fini_timelines(struct intel_gt *gt) struct intel_gt_timelines *timelines = >->timelines; GEM_BUG_ON(!list_empty(&timelines->active_list)); - GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); } void intel_gt_show_timelines(struct intel_gt *gt, diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index dcdee692a80e..57308c4d664a 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -1,25 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * */ #ifndef I915_TIMELINE_H @@ -117,4 +98,6 @@ intel_timeline_is_last(const struct intel_timeline *tl, return list_is_last_rcu(&rq->link, &tl->requests); } +I915_SELFTEST_DECLARE(int intel_timeline_pin_map(struct intel_timeline *tl)); + #endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index e360f50706bf..74e67dbf89c5 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2016 Intel Corporation */ @@ -18,7 +17,6 @@ struct i915_vma; struct i915_syncmap; struct intel_gt; -struct intel_timeline_hwsp; struct intel_timeline { u64 fence_context; @@ -45,12 +43,11 @@ struct intel_timeline { atomic_t pin_count; atomic_t active_count; + void *hwsp_map; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; u32 hwsp_offset; - struct intel_timeline_cacheline *hwsp_cacheline; - bool has_initial_breadcrumb; /** @@ -67,6 +64,8 @@ struct intel_timeline { */ struct i915_active_fence last_request; + struct i915_active active; + /** A chain of completed timelines ready for early retirement. */ struct intel_timeline *retire; @@ -90,15 +89,4 @@ struct intel_timeline { struct rcu_head rcu; }; -struct intel_timeline_cacheline { - struct i915_active active; - - struct intel_timeline_hwsp *hwsp; - void *vaddr; - - u32 ggtt_offset; - - struct rcu_head rcu; -}; - #endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 821142c02ae5..2c6f7217469f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ @@ -242,7 +241,7 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB + * workaround for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ /* WaForceEnableNonCoherent:bdw,chv */ @@ -719,7 +718,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, else if (IS_GEN(i915, 6)) gen6_ctx_workarounds_init(engine, wal); else if (INTEL_GEN(i915) < 8) - return; + ; else MISSING_CASE(INTEL_GEN(i915)); @@ -1073,11 +1072,10 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) { + IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); - } } static void @@ -1172,7 +1170,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) else if (IS_GEN(i915, 4)) gen4_gt_workarounds_init(i915, wal); else if (INTEL_GEN(i915) <= 8) - return; + ; else MISSING_CASE(INTEL_GEN(i915)); } @@ -1547,7 +1545,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) else if (IS_SKYLAKE(i915)) skl_whitelist_build(engine); else if (INTEL_GEN(i915) <= 8) - return; + ; else MISSING_CASE(INTEL_GEN(i915)); @@ -2174,10 +2172,15 @@ retry: if (err) goto err_pm; + err = i915_vma_pin_ww(vma, &ww, 0, 0, + i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); + if (err) + goto err_unpin; + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto err_unpin; + goto err_vma; } err = i915_request_await_object(rq, vma->obj, true); @@ -2218,6 +2221,8 @@ retry: err_rq: i915_request_put(rq); +err_vma: + i915_vma_unpin(vma); err_unpin: intel_context_unpin(ce); err_pm: @@ -2228,7 +2233,6 @@ err_pm: } i915_gem_ww_ctx_fini(&ww); intel_engine_pm_put(ce->engine); - i915_vma_unpin(vma); i915_vma_put(vma); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h index 8c9c769c2204..15abb68b6c00 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index d166a7145720..c214111ea367 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 4b4f03b70df7..e1ba03b93ffa 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -1,25 +1,6 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * */ #include "gem/i915_gem_context.h" @@ -32,9 +13,20 @@ #include "mock_engine.h" #include "selftests/mock_request.h" -static void mock_timeline_pin(struct intel_timeline *tl) +static int mock_timeline_pin(struct intel_timeline *tl) { + int err; + + if (WARN_ON(!i915_gem_object_trylock(tl->hwsp_ggtt->obj))) + return -EBUSY; + + err = intel_timeline_pin_map(tl); + i915_gem_object_unlock(tl->hwsp_ggtt->obj); + if (err) + return err; + atomic_inc(&tl->pin_count); + return 0; } static void mock_timeline_unpin(struct intel_timeline *tl) @@ -152,6 +144,8 @@ static void mock_context_destroy(struct kref *ref) static int mock_context_alloc(struct intel_context *ce) { + int err; + ce->ring = mock_ring(ce->engine); if (!ce->ring) return -ENOMEM; @@ -162,7 +156,12 @@ static int mock_context_alloc(struct intel_context *ce) return PTR_ERR(ce->timeline); } - mock_timeline_pin(ce->timeline); + err = mock_timeline_pin(ce->timeline); + if (err) { + intel_timeline_put(ce->timeline); + ce->timeline = NULL; + return err; + } return 0; } @@ -258,13 +257,15 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) - i915_request_mark_eio(rq); + i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); /* Cancel and submit all pending requests. */ list_for_each_entry(rq, &mock->hw_queue, mock.link) { - i915_request_mark_eio(rq); - __i915_request_submit(rq); + if (i915_request_mark_eio(rq)) { + __i915_request_submit(rq); + i915_request_put(rq); + } } INIT_LIST_HEAD(&mock->hw_queue); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h index 3f9b698c49d2..cc5ab6e1f37e 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.h +++ b/drivers/gpu/drm/i915/gt/mock_engine.h @@ -1,25 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * */ #ifndef __MOCK_ENGINE_H__ diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index db738d400168..b9bdd1d23243 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright © 2019 Intel Corporation */ @@ -88,8 +87,8 @@ static int __live_context_size(struct intel_engine_cs *engine) if (err) goto err; - vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(engine->i915)); + vaddr = i915_gem_object_pin_map_unlocked(ce->state->obj, + i915_coherent_map_type(engine->i915)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.c b/drivers/gpu/drm/i915/gt/selftest_engine.c index f65b118e261d..262764f6d90a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright © 2018 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.h b/drivers/gpu/drm/i915/gt/selftest_engine.h index ab32d09ec5a1..c6feb3bd2ccc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine.h +++ b/drivers/gpu/drm/i915/gt/selftest_engine.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 439c8984f5fa..b32814a1f20b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright © 2018 Intel Corporation */ @@ -42,6 +41,9 @@ static int perf_end(struct intel_gt *gt) static int write_timestamp(struct i915_request *rq, int slot) { + struct intel_timeline *tl = + rcu_dereference_protected(rq->timeline, + !i915_request_signaled(rq)); u32 cmd; u32 *cs; @@ -54,7 +56,7 @@ static int write_timestamp(struct i915_request *rq, int slot) cmd++; *cs++ = cmd; *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); - *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); + *cs++ = tl->hwsp_offset + slot * sizeof(u32); *cs++ = 0; intel_ring_advance(rq, cs); @@ -73,7 +75,7 @@ static struct i915_vma *create_empty_batch(struct intel_context *ce) if (IS_ERR(obj)) return ERR_CAST(obj); - cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_put; @@ -209,7 +211,7 @@ static struct i915_vma *create_nop_batch(struct intel_context *ce) if (IS_ERR(obj)) return ERR_CAST(obj); - cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_put; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 223ab88f7e57..b2c369317bf1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2018 Intel Corporation */ @@ -12,6 +11,12 @@ #include "i915_selftest.h" #include "selftest_engine_heartbeat.h" +static void reset_heartbeat(struct intel_engine_cs *engine) +{ + intel_engine_set_heartbeat(engine, + engine->defaults.heartbeat_interval_ms); +} + static int timeline_sync(struct intel_timeline *tl) { struct dma_fence *fence; @@ -270,7 +275,7 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine) err = -EINVAL; } - intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); + reset_heartbeat(engine); err_pm: intel_engine_pm_put(engine); intel_context_put(ce); @@ -285,7 +290,7 @@ static int live_heartbeat_fast(void *arg) int err = 0; /* Check that the heartbeat ticks at the desired rate. */ - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return 0; for_each_engine(engine, gt, id) { @@ -333,7 +338,7 @@ static int __live_heartbeat_off(struct intel_engine_cs *engine) } err_beat: - intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL); + reset_heartbeat(engine); err_pm: intel_engine_pm_put(engine); return err; @@ -347,7 +352,7 @@ static int live_heartbeat_off(void *arg) int err = 0; /* Check that we can turn off heartbeat and not interrupt VIP */ - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) + if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return 0; for_each_engine(engine, gt, id) { diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index c3d965279fc3..2c898622bdfb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright © 2018 Intel Corporation */ @@ -111,13 +110,15 @@ static int __measure_timestamps(struct intel_context *ce, cpu_relax(); /* Run the request for a 100us, sampling timestamps before/after */ - preempt_disable(); - *dt = local_clock(); + local_irq_disable(); write_semaphore(&sema[2], 0); + while (READ_ONCE(sema[1]) == 0) /* wait for the gpu to catch up */ + cpu_relax(); + *dt = local_clock(); udelay(100); *dt = local_clock() - *dt; write_semaphore(&sema[2], 1); - preempt_enable(); + local_irq_enable(); if (i915_request_wait(rq, 0, HZ / 2) < 0) { i915_request_put(rq); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 264b5ebdb021..1081cd36a2bd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2018 Intel Corporation */ @@ -321,7 +320,7 @@ static int live_unlite_switch(void *arg) static int live_unlite_preempt(void *arg) { - return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); + return live_unlite_restore(arg, I915_PRIORITY_MAX); } static int live_unlite_ring(void *arg) @@ -609,7 +608,7 @@ static int live_hold_reset(void *arg) } tasklet_disable(&engine->execlists.tasklet); - engine->execlists.tasklet.func(engine->execlists.tasklet.data); + engine->execlists.tasklet.callback(&engine->execlists.tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); i915_request_get(rq); @@ -989,7 +988,7 @@ static int live_timeslice_preempt(void *arg) goto err_obj; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_obj; @@ -1081,7 +1080,6 @@ create_rewinder(struct intel_context *ce, intel_ring_advance(rq, cs); - rq->sched.attr.priority = I915_PRIORITY_MASK; err = 0; err: i915_request_get(rq); @@ -1297,7 +1295,7 @@ static int live_timeslice_queue(void *arg) goto err_obj; } - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_obj; @@ -1312,9 +1310,7 @@ static int live_timeslice_queue(void *arg) goto err_pin; for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct i915_request *rq, *nop; if (!intel_engine_has_preemption(engine)) @@ -1529,14 +1525,12 @@ static int live_busywait_preempt(void *arg) ctx_hi = kernel_context(gt->i915); if (!ctx_hi) return -ENOMEM; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) { @@ -1544,7 +1538,7 @@ static int live_busywait_preempt(void *arg) goto err_ctx_lo; } - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(map)) { err = PTR_ERR(map); goto err_obj; @@ -1733,14 +1727,12 @@ static int live_preempt(void *arg) ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; for_each_engine(engine, gt, id) { struct igt_live_test t; @@ -1833,7 +1825,7 @@ static int live_late_preempt(void *arg) goto err_ctx_hi; /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ - ctx_lo->sched.priority = I915_USER_PRIORITY(1); + ctx_lo->sched.priority = 1; for_each_engine(engine, gt, id) { struct igt_live_test t; @@ -1874,7 +1866,7 @@ static int live_late_preempt(void *arg) goto err_wedged; } - attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + attr.priority = I915_PRIORITY_MAX; engine->schedule(rq, &attr); if (!igt_wait_for_spinner(&spin_hi, rq)) { @@ -1955,7 +1947,7 @@ static int live_nopreempt(void *arg) return -ENOMEM; if (preempt_client_init(gt, &b)) goto err_client_a; - b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + b.ctx->sched.priority = I915_PRIORITY_MAX; for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; @@ -2420,11 +2412,9 @@ err_wedged: static int live_suppress_self_preempt(void *arg) { + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) - }; struct preempt_client a, b; enum intel_engine_id id; int err = -ENOMEM; @@ -2555,9 +2545,7 @@ static int live_chain_preempt(void *arg) goto err_client_hi; for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct igt_live_test t; struct i915_request *rq; int ring_size, count, i; @@ -2714,7 +2702,7 @@ static int create_gang(struct intel_engine_cs *engine, if (err) goto err_obj; - cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_obj; @@ -2876,7 +2864,7 @@ static int __live_preempt_ring(struct intel_engine_cs *engine, err = wait_for_submit(engine, rq, HZ / 2); i915_request_put(rq); if (err) { - pr_err("%s: preemption request was not submited\n", + pr_err("%s: preemption request was not submitted\n", engine->name); err = -ETIME; } @@ -2976,9 +2964,7 @@ static int live_preempt_gang(void *arg) return -EIO; do { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(prio++), - }; + struct i915_sched_attr attr = { .priority = prio++ }; err = create_gang(engine, &rq); if (err) @@ -2997,7 +2983,7 @@ static int live_preempt_gang(void *arg) * it will terminate the next lowest spinner until there * are no more spinners and the gang is complete. */ - cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC); if (!IS_ERR(cs)) { *cs = 0; i915_gem_object_unpin_map(rq->batch->obj); @@ -3014,7 +3000,7 @@ static int live_preempt_gang(void *arg) drm_info_printer(engine->i915->drm.dev); pr_err("Failed to flush chain of %d requests, at %d\n", - prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); + prio, rq_prio(rq)); intel_engine_dump(engine, &p, "%s\n", engine->name); @@ -3062,7 +3048,7 @@ create_gpr_user(struct intel_engine_cs *engine, return ERR_PTR(err); } - cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cs)) { i915_vma_put(vma); return ERR_CAST(cs); @@ -3269,7 +3255,7 @@ static int live_preempt_user(void *arg) if (IS_ERR(global)) return PTR_ERR(global); - result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); + result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC); if (IS_ERR(result)) { i915_vma_unpin_and_release(&global, 0); return PTR_ERR(result); @@ -3384,14 +3370,12 @@ static int live_preempt_timeout(void *arg) ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; for_each_engine(engine, gt, id) { unsigned long saved_timeout; @@ -3658,7 +3642,7 @@ static int live_preempt_smoke(void *arg) goto err_free; } - cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); + cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_batch; @@ -4197,8 +4181,9 @@ static int preserved_virtual_engine(struct intel_gt *gt, int err = 0; u32 *cs; - scratch = __vm_create_scratch_for_read(&siblings[0]->gt->ggtt->vm, - PAGE_SIZE); + scratch = + __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm, + PAGE_SIZE); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -4262,7 +4247,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, goto out_end; } - cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto out_end; @@ -4610,7 +4595,7 @@ static int reset_virtual_engine(struct intel_gt *gt, } tasklet_disable(&engine->execlists.tasklet); - engine->execlists.tasklet.func(engine->execlists.tasklet.data); + engine->execlists.tasklet.callback(&engine->execlists.tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Fake a preemption event; failed of course */ diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 5d911f724ebe..c0845bf72dd3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -1,7 +1,5 @@ - +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 463bb6a700c8..746985971c3a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1,25 +1,6 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * */ #include <linux/kthread.h> @@ -80,15 +61,15 @@ static int hang_init(struct hang *h, struct intel_gt *gt) } i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC); - vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(h->hws, I915_MAP_WB); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_obj; } h->seqno = memset(vaddr, 0xff, PAGE_SIZE); - vaddr = i915_gem_object_pin_map(h->obj, - i915_coherent_map_type(gt->i915)); + vaddr = i915_gem_object_pin_map_unlocked(h->obj, + i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto err_unpin_hws; @@ -149,7 +130,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) return ERR_CAST(obj); } - vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); + vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); i915_vm_put(vm); diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c index a912159693fd..94006f117bbd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.c +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h index 873f896e72f2..88ee9480022a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_llc.h +++ b/drivers/gpu/drm/i915/gt/selftest_llc.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 920979a89413..85e7df6a5123 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -27,7 +27,7 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) { - return __vm_create_scratch_for_read(>->ggtt->vm, PAGE_SIZE); + return __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); } static bool is_active(struct i915_request *rq) @@ -627,7 +627,7 @@ static int __live_lrc_gpr(struct intel_engine_cs *engine, goto err_rq; } - cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_rq; @@ -733,7 +733,6 @@ create_timestamp(struct intel_context *ce, void *slot, int idx) intel_ring_advance(rq, cs); - rq->sched.attr.priority = I915_PRIORITY_MASK; err = 0; err: i915_request_get(rq); @@ -921,7 +920,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) if (IS_ERR(batch)) return batch; - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); if (IS_ERR(cs)) { i915_vma_put(batch); return ERR_CAST(cs); @@ -1085,7 +1084,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) if (IS_ERR(batch)) return batch; - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); if (IS_ERR(cs)) { i915_vma_put(batch); return ERR_CAST(cs); @@ -1199,29 +1198,29 @@ static int compare_isolation(struct intel_engine_cs *engine, u32 *defaults; int err = 0; - A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); + A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC); if (IS_ERR(A[0])) return PTR_ERR(A[0]); - A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); + A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC); if (IS_ERR(A[1])) { err = PTR_ERR(A[1]); goto err_A0; } - B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); + B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC); if (IS_ERR(B[0])) { err = PTR_ERR(B[0]); goto err_A1; } - B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); + B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC); if (IS_ERR(B[1])) { err = PTR_ERR(B[1]); goto err_B0; } - lrc = i915_gem_object_pin_map(ce->state->obj, + lrc = i915_gem_object_pin_map_unlocked(ce->state->obj, i915_coherent_map_type(engine->i915)); if (IS_ERR(lrc)) { err = PTR_ERR(lrc); diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index cf373c72359e..e55a887d11e2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ @@ -13,8 +12,9 @@ #include "selftests/igt_spinner.h" struct live_mocs { - struct drm_i915_mocs_table mocs; - struct drm_i915_mocs_table l3cc; + struct drm_i915_mocs_table table; + struct drm_i915_mocs_table *mocs; + struct drm_i915_mocs_table *l3cc; struct i915_vma *scratch; void *vaddr; }; @@ -59,27 +59,27 @@ static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) { - struct drm_i915_mocs_table table; unsigned int flags; int err; memset(arg, 0, sizeof(*arg)); - flags = get_mocs_settings(gt->i915, &table); + flags = get_mocs_settings(gt->i915, &arg->table); if (!flags) return -EINVAL; if (flags & HAS_RENDER_L3CC) - arg->l3cc = table; + arg->l3cc = &arg->table; if (flags & (HAS_GLOBAL_MOCS | HAS_ENGINE_MOCS)) - arg->mocs = table; + arg->mocs = &arg->table; - arg->scratch = __vm_create_scratch_for_read(>->ggtt->vm, PAGE_SIZE); + arg->scratch = + __vm_create_scratch_for_read_pinned(>->ggtt->vm, PAGE_SIZE); if (IS_ERR(arg->scratch)) return PTR_ERR(arg->scratch); - arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB); + arg->vaddr = i915_gem_object_pin_map_unlocked(arg->scratch->obj, I915_MAP_WB); if (IS_ERR(arg->vaddr)) { err = PTR_ERR(arg->vaddr); goto err_scratch; @@ -131,6 +131,9 @@ static int read_mocs_table(struct i915_request *rq, { u32 addr; + if (!table) + return 0; + if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915)) addr = global_mocs_offset(); else @@ -145,6 +148,9 @@ static int read_l3cc_table(struct i915_request *rq, { u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); + if (!table) + return 0; + return read_regs(rq, addr, (table->n_entries + 1) / 2, offset); } @@ -155,6 +161,9 @@ static int check_mocs_table(struct intel_engine_cs *engine, unsigned int i; u32 expect; + if (!table) + return 0; + for_each_mocs(expect, table, i) { if (**vaddr != expect) { pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n", @@ -186,6 +195,9 @@ static int check_l3cc_table(struct intel_engine_cs *engine, unsigned int i; u32 expect; + if (!table) + return 0; + for_each_l3cc(expect, table, i) { if (!mcr_range(engine->i915, reg) && **vaddr != expect) { pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n", @@ -223,9 +235,9 @@ static int check_mocs_engine(struct live_mocs *arg, /* Read the mocs tables back using SRM */ offset = i915_ggtt_offset(vma); if (!err) - err = read_mocs_table(rq, &arg->mocs, &offset); + err = read_mocs_table(rq, arg->mocs, &offset); if (!err && ce->engine->class == RENDER_CLASS) - err = read_l3cc_table(rq, &arg->l3cc, &offset); + err = read_l3cc_table(rq, arg->l3cc, &offset); offset -= i915_ggtt_offset(vma); GEM_BUG_ON(offset > PAGE_SIZE); @@ -236,9 +248,9 @@ static int check_mocs_engine(struct live_mocs *arg, /* Compare the results against the expected tables */ vaddr = arg->vaddr; if (!err) - err = check_mocs_table(ce->engine, &arg->mocs, &vaddr); + err = check_mocs_table(ce->engine, arg->mocs, &vaddr); if (!err && ce->engine->class == RENDER_CLASS) - err = check_l3cc_table(ce->engine, &arg->l3cc, &vaddr); + err = check_l3cc_table(ce->engine, arg->l3cc, &vaddr); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 61abc0556601..f097e420ac45 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h index 762fd442d7b2..daf0927909bc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.h +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2019 Intel Corporation */ diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 3350e7c995bc..99609271c3a7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -35,7 +35,7 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine) return ERR_PTR(err); } - cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cs)) { i915_gem_object_put(obj); return ERR_CAST(cs); @@ -212,7 +212,7 @@ static int __live_ctx_switch_wa(struct intel_engine_cs *engine) if (IS_ERR(bb)) return PTR_ERR(bb); - result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC); + result = i915_gem_object_pin_map_unlocked(bb->obj, I915_MAP_WC); if (IS_ERR(result)) { intel_context_put(bb->private); i915_vma_unpin_and_release(&bb, 0); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 6f3a3687ef0f..9adbd9d147be 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2017-2018 Intel Corporation */ @@ -35,10 +34,31 @@ static unsigned long hwsp_cacheline(struct intel_timeline *tl) { unsigned long address = (unsigned long)page_address(hwsp_page(tl)); - return (address + tl->hwsp_offset) / CACHELINE_BYTES; + return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES; } -#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) +static int selftest_tl_pin(struct intel_timeline *tl) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_gem_object_lock(tl->hwsp_ggtt->obj, &ww); + if (!err) + err = intel_timeline_pin(tl, &ww); + + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + return err; +} + +/* Only half of seqno's are usable, see __intel_timeline_get_seqno() */ +#define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2) struct mock_hwsp_freelist { struct intel_gt *gt; @@ -59,6 +79,7 @@ static void __mock_hwsp_record(struct mock_hwsp_freelist *state, tl = xchg(&state->history[idx], tl); if (tl) { radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); + intel_timeline_unpin(tl); intel_timeline_put(tl); } } @@ -78,6 +99,12 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, if (IS_ERR(tl)) return PTR_ERR(tl); + err = selftest_tl_pin(tl); + if (err) { + intel_timeline_put(tl); + return err; + } + cacheline = hwsp_cacheline(tl); err = radix_tree_insert(&state->cachelines, cacheline, tl); if (err) { @@ -85,6 +112,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", cacheline); } + intel_timeline_unpin(tl); intel_timeline_put(tl); return err; } @@ -452,17 +480,24 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) } static struct i915_request * -tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) +checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) { struct i915_request *rq; int err; - err = intel_timeline_pin(tl, NULL); + err = selftest_tl_pin(tl); if (err) { rq = ERR_PTR(err); goto out; } + if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { + pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", + *tl->hwsp_seqno, tl->seqno); + intel_timeline_unpin(tl); + return ERR_PTR(-EINVAL); + } + rq = intel_engine_create_kernel_request(engine); if (IS_ERR(rq)) goto out_unpin; @@ -484,25 +519,6 @@ out: return rq; } -static struct intel_timeline * -checked_intel_timeline_create(struct intel_gt *gt) -{ - struct intel_timeline *tl; - - tl = intel_timeline_create(gt); - if (IS_ERR(tl)) - return tl; - - if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { - pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", - *tl->hwsp_seqno, tl->seqno); - intel_timeline_put(tl); - return ERR_PTR(-EINVAL); - } - - return tl; -} - static int live_hwsp_engine(void *arg) { #define NUM_TIMELINES 4096 @@ -535,13 +551,13 @@ static int live_hwsp_engine(void *arg) struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(gt); + tl = intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); break; } - rq = tl_write(tl, engine, count); + rq = checked_tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); @@ -608,14 +624,14 @@ static int live_hwsp_alternate(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - tl = checked_intel_timeline_create(gt); + tl = intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); goto out; } intel_engine_pm_get(engine); - rq = tl_write(tl, engine, count); + rq = checked_tl_write(tl, engine, count); intel_engine_pm_put(engine); if (IS_ERR(rq)) { intel_timeline_put(tl); @@ -666,10 +682,10 @@ static int live_hwsp_wrap(void *arg) if (IS_ERR(tl)) return PTR_ERR(tl); - if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + if (!tl->has_initial_breadcrumb) goto out_free; - err = intel_timeline_pin(tl, NULL); + err = selftest_tl_pin(tl); if (err) goto out_free; @@ -816,13 +832,13 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) if (IS_ERR(obj)) return PTR_ERR(obj); - w->map = i915_gem_object_pin_map(obj, I915_MAP_WB); + w->map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(w->map)) { i915_gem_object_put(obj); return PTR_ERR(w->map); } - vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (IS_ERR(vma)) { i915_gem_object_put(obj); return PTR_ERR(vma); @@ -833,12 +849,26 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) return 0; } +static void switch_tl_lock(struct i915_request *from, struct i915_request *to) +{ + /* some light mutex juggling required; think co-routines */ + + if (from) { + lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie); + mutex_unlock(&from->context->timeline->mutex); + } + + if (to) { + mutex_lock(&to->context->timeline->mutex); + to->cookie = lockdep_pin_lock(&to->context->timeline->mutex); + } +} + static int create_watcher(struct hwsp_watcher *w, struct intel_engine_cs *engine, int ringsz) { struct intel_context *ce; - struct intel_timeline *tl; ce = intel_context_create(engine); if (IS_ERR(ce)) @@ -851,11 +881,8 @@ static int create_watcher(struct hwsp_watcher *w, return PTR_ERR(w->rq); w->addr = i915_ggtt_offset(w->vma); - tl = w->rq->context->timeline; - /* some light mutex juggling required; think co-routines */ - lockdep_unpin_lock(&tl->mutex, w->rq->cookie); - mutex_unlock(&tl->mutex); + switch_tl_lock(w->rq, NULL); return 0; } @@ -864,15 +891,13 @@ static int check_watcher(struct hwsp_watcher *w, const char *name, bool (*op)(u32 hwsp, u32 seqno)) { struct i915_request *rq = fetch_and_zero(&w->rq); - struct intel_timeline *tl = rq->context->timeline; u32 offset, end; int err; GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size); i915_request_get(rq); - mutex_lock(&tl->mutex); - rq->cookie = lockdep_pin_lock(&tl->mutex); + switch_tl_lock(NULL, rq); i915_request_add(rq); if (i915_request_wait(rq, 0, HZ) < 0) { @@ -901,10 +926,7 @@ out: static void cleanup_watcher(struct hwsp_watcher *w) { if (w->rq) { - struct intel_timeline *tl = w->rq->context->timeline; - - mutex_lock(&tl->mutex); - w->rq->cookie = lockdep_pin_lock(&tl->mutex); + switch_tl_lock(NULL, w->rq); i915_request_add(w->rq); } @@ -942,7 +964,7 @@ static struct i915_request *wrap_timeline(struct i915_request *rq) } i915_request_put(rq); - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) return rq; @@ -977,7 +999,7 @@ static int live_hwsp_read(void *arg) if (IS_ERR(tl)) return PTR_ERR(tl); - if (!tl->hwsp_cacheline) + if (!tl->has_initial_breadcrumb) goto out_free; for (i = 0; i < ARRAY_SIZE(watcher); i++) { @@ -999,7 +1021,7 @@ static int live_hwsp_read(void *arg) do { struct i915_sw_fence *submit; struct i915_request *rq; - u32 hwsp; + u32 hwsp, dummy; submit = heap_fence_create(GFP_KERNEL); if (!submit) { @@ -1017,14 +1039,26 @@ static int live_hwsp_read(void *arg) goto out; } - /* Skip to the end, saving 30 minutes of nops */ - tl->seqno = -10u + 2 * (count & 3); - WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); ce->timeline = intel_timeline_get(tl); - rq = intel_context_create_request(ce); + /* Ensure timeline is mapped, done during first pin */ + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + /* + * Start at a new wrap, and set seqno right before another wrap, + * saving 30 minutes of nops + */ + tl->seqno = -12u + 2 * (count & 3); + __intel_timeline_get_seqno(tl, &dummy); + + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); + intel_context_unpin(ce); intel_context_put(ce); goto out; } @@ -1034,32 +1068,35 @@ static int live_hwsp_read(void *arg) GFP_KERNEL); if (err < 0) { i915_request_add(rq); + intel_context_unpin(ce); intel_context_put(ce); goto out; } - mutex_lock(&watcher[0].rq->context->timeline->mutex); + switch_tl_lock(rq, watcher[0].rq); err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp); if (err == 0) err = emit_read_hwsp(watcher[0].rq, /* before */ rq->fence.seqno, hwsp, &watcher[0].addr); - mutex_unlock(&watcher[0].rq->context->timeline->mutex); + switch_tl_lock(watcher[0].rq, rq); if (err) { i915_request_add(rq); + intel_context_unpin(ce); intel_context_put(ce); goto out; } - mutex_lock(&watcher[1].rq->context->timeline->mutex); + switch_tl_lock(rq, watcher[1].rq); err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp); if (err == 0) err = emit_read_hwsp(watcher[1].rq, /* after */ rq->fence.seqno, hwsp, &watcher[1].addr); - mutex_unlock(&watcher[1].rq->context->timeline->mutex); + switch_tl_lock(watcher[1].rq, rq); if (err) { i915_request_add(rq); + intel_context_unpin(ce); intel_context_put(ce); goto out; } @@ -1068,6 +1105,7 @@ static int live_hwsp_read(void *arg) i915_request_add(rq); rq = wrap_timeline(rq); + intel_context_unpin(ce); intel_context_put(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); @@ -1107,8 +1145,8 @@ static int live_hwsp_read(void *arg) 3 * watcher[1].rq->ring->size) break; - } while (!__igt_timeout(end_time, NULL)); - WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef); + } while (!__igt_timeout(end_time, NULL) && + count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2); pr_info("%s: simulated %lu wraps\n", engine->name, count); err = check_watcher(&watcher[1], "after", cmp_gte); @@ -1153,9 +1191,7 @@ static int live_hwsp_rollover_kernel(void *arg) } GEM_BUG_ON(i915_active_fence_isset(&tl->last_request)); - tl->seqno = 0; - timeline_rollback(tl); - timeline_rollback(tl); + tl->seqno = -2u; WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); for (i = 0; i < ARRAY_SIZE(rq); i++) { @@ -1235,11 +1271,14 @@ static int live_hwsp_rollover_user(void *arg) goto out; tl = ce->timeline; - if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + if (!tl->has_initial_breadcrumb) + goto out; + + err = intel_context_pin(ce); + if (err) goto out; - timeline_rollback(tl); - timeline_rollback(tl); + tl->seqno = -4u; WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); for (i = 0; i < ARRAY_SIZE(rq); i++) { @@ -1248,7 +1287,7 @@ static int live_hwsp_rollover_user(void *arg) this = intel_context_create_request(ce); if (IS_ERR(this)) { err = PTR_ERR(this); - goto out; + goto out_unpin; } pr_debug("%s: create fence.seqnp:%d\n", @@ -1267,17 +1306,18 @@ static int live_hwsp_rollover_user(void *arg) if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { pr_err("Wait for timeline wrap timed out!\n"); err = -EIO; - goto out; + goto out_unpin; } for (i = 0; i < ARRAY_SIZE(rq); i++) { if (!i915_request_completed(rq[i])) { pr_err("Pre-wrap request not completed!\n"); err = -EINVAL; - goto out; + goto out_unpin; } } - +out_unpin: + intel_context_unpin(ce); out: for (i = 0; i < ARRAY_SIZE(rq); i++) i915_request_put(rq[i]); @@ -1319,13 +1359,13 @@ static int live_hwsp_recycle(void *arg) struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(gt); + tl = intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); break; } - rq = tl_write(tl, engine, count); + rq = checked_tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 2070b91cb607..19850489a3fc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2018 Intel Corporation */ @@ -112,7 +111,7 @@ read_nonprivs(struct intel_context *ce) i915_gem_object_set_cache_coherency(result, I915_CACHE_LLC); - cs = i915_gem_object_pin_map(result, I915_MAP_WB); + cs = i915_gem_object_pin_map_unlocked(result, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_obj; @@ -218,7 +217,7 @@ static int check_whitelist(struct intel_context *ce) i915_gem_object_lock(results, NULL); intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); - i915_gem_object_unlock(results); + if (intel_gt_is_wedged(engine->gt)) err = -EIO; if (err) @@ -246,6 +245,7 @@ static int check_whitelist(struct intel_context *ce) i915_gem_object_unpin_map(results); out_put: + i915_gem_object_unlock(results); i915_gem_object_put(results); return err; } @@ -490,7 +490,7 @@ static int check_dirty_whitelist(struct intel_context *ce) u32 *cs, *results; sz = (2 * ARRAY_SIZE(values) + 1) * sizeof(u32); - scratch = __vm_create_scratch_for_read(ce->vm, sz); + scratch = __vm_create_scratch_for_read_pinned(ce->vm, sz); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -502,6 +502,7 @@ static int check_dirty_whitelist(struct intel_context *ce) for (i = 0; i < engine->whitelist.count; i++) { u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + struct i915_gem_ww_ctx ww; u64 addr = scratch->node.start; struct i915_request *rq; u32 srm, lrm, rsvd; @@ -517,6 +518,29 @@ static int check_dirty_whitelist(struct intel_context *ce) ro_reg = ro_register(reg); + i915_gem_ww_ctx_init(&ww, false); +retry: + cs = NULL; + err = i915_gem_object_lock(scratch->obj, &ww); + if (!err) + err = i915_gem_object_lock(batch->obj, &ww); + if (!err) + err = intel_context_pin_ww(ce, &ww); + if (err) + goto out; + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_ctx; + } + + results = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(results)) { + err = PTR_ERR(results); + goto out_unmap_batch; + } + /* Clear non priv flags */ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK; @@ -528,12 +552,6 @@ static int check_dirty_whitelist(struct intel_context *ce) pr_debug("%s: Writing garbage to %x\n", engine->name, reg); - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto out_batch; - } - /* SRM original */ *cs++ = srm; *cs++ = reg; @@ -580,11 +598,12 @@ static int check_dirty_whitelist(struct intel_context *ce) i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); intel_gt_chipset_flush(engine->gt); + cs = NULL; - rq = intel_context_create_request(ce); + rq = i915_request_create(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_batch; + goto out_unmap_scratch; } if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ @@ -593,20 +612,16 @@ static int check_dirty_whitelist(struct intel_context *ce) goto err_request; } - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) err = i915_vma_move_to_active(batch, rq, 0); - i915_vma_unlock(batch); if (err) goto err_request; - i915_vma_lock(scratch); err = i915_request_await_object(rq, scratch->obj, true); if (err == 0) err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); - i915_vma_unlock(scratch); if (err) goto err_request; @@ -622,13 +637,7 @@ err_request: pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); intel_gt_set_wedged(engine->gt); - goto out_batch; - } - - results = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); - if (IS_ERR(results)) { - err = PTR_ERR(results); - goto out_batch; + goto out_unmap_scratch; } GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff); @@ -639,7 +648,7 @@ err_request: pr_err("%s: Unable to write to whitelisted register %x\n", engine->name, reg); err = -EINVAL; - goto out_unpin; + goto out_unmap_scratch; } } else { rsvd = 0; @@ -705,15 +714,27 @@ err_request: err = -EINVAL; } -out_unpin: +out_unmap_scratch: i915_gem_object_unpin_map(scratch->obj); +out_unmap_batch: + if (cs) + i915_gem_object_unpin_map(batch->obj); +out_ctx: + intel_context_unpin(ce); +out: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); if (err) break; } if (igt_flush_test(engine->i915)) err = -EIO; -out_batch: + i915_vma_unpin_and_release(&batch, 0); out_scratch: i915_vma_unpin_and_release(&scratch, 0); @@ -847,7 +868,7 @@ static int scrub_whitelisted_registers(struct intel_context *ce) if (IS_ERR(batch)) return PTR_ERR(batch); - cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err_batch; @@ -982,11 +1003,11 @@ check_whitelisted_registers(struct intel_engine_cs *engine, u32 *a, *b; int i, err; - a = i915_gem_object_pin_map(A->obj, I915_MAP_WB); + a = i915_gem_object_pin_map_unlocked(A->obj, I915_MAP_WB); if (IS_ERR(a)) return PTR_ERR(a); - b = i915_gem_object_pin_map(B->obj, I915_MAP_WB); + b = i915_gem_object_pin_map_unlocked(B->obj, I915_MAP_WB); if (IS_ERR(b)) { err = PTR_ERR(b); goto err_a; @@ -1030,14 +1051,14 @@ static int live_isolated_whitelist(void *arg) for (i = 0; i < ARRAY_SIZE(client); i++) { client[i].scratch[0] = - __vm_create_scratch_for_read(gt->vm, 4096); + __vm_create_scratch_for_read_pinned(gt->vm, 4096); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); goto err; } client[i].scratch[1] = - __vm_create_scratch_for_read(gt->vm, 4096); + __vm_create_scratch_for_read_pinned(gt->vm, 4096); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); @@ -1220,7 +1241,11 @@ live_engine_reset_workarounds(void *arg) goto err; } - intel_engine_reset(engine, "live_workarounds:idle"); + ret = intel_engine_reset(engine, "live_workarounds:idle"); + if (ret) { + pr_err("%s: Reset failed while idle\n", engine->name); + goto err; + } ok = verify_wa_lists(gt, &lists, "after idle reset"); if (!ok) { @@ -1241,12 +1266,18 @@ live_engine_reset_workarounds(void *arg) ret = request_add_spin(rq, &spin); if (ret) { - pr_err("Spinner failed to start\n"); + pr_err("%s: Spinner failed to start\n", engine->name); igt_spinner_fini(&spin); goto err; } - intel_engine_reset(engine, "live_workarounds:active"); + ret = intel_engine_reset(engine, "live_workarounds:active"); + if (ret) { + pr_err("%s: Reset failed on an active spinner\n", + engine->name); + igt_spinner_fini(&spin); + goto err; + } igt_spinner_end(&spin); igt_spinner_fini(&spin); diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index a4d8fc9e2374..f8f02aab842b 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -39,7 +39,7 @@ struct file *shmem_create_from_object(struct drm_i915_gem_object *obj) return file; } - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); + ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(ptr)) return ERR_CAST(ptr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 4545e90e3bf1..78305b2ec89d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -682,7 +682,7 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size, if (IS_ERR(vma)) return PTR_ERR(vma); - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index c92f2c056db4..c36d5eb5bbb9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -335,7 +335,7 @@ static int guc_log_map(struct intel_guc_log *log) * buffer pages, so that we can directly get the data * (up-to-date) from memory. */ - vaddr = i915_gem_object_pin_map(log->vma->obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(log->vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -744,7 +744,7 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, if (!obj) return 0; - map = i915_gem_object_pin_map(obj, I915_MAP_WC); + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(map)) { DRM_DEBUG("Failed to pin object\n"); drm_puts(p, "(log data unaccessible)\n"); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 23dc0aeaa0ab..92688a9b6717 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -206,9 +206,8 @@ static void __guc_dequeue(struct intel_engine_cs *engine) while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - int i; - priolist_for_each_request_consume(rq, rn, p, i) { + priolist_for_each_request_consume(rq, rn, p) { if (last && rq->context != last->context) { if (port == last_port) goto done; @@ -238,9 +237,10 @@ done: execlists->active = execlists->inflight; } -static void guc_submission_tasklet(unsigned long data) +static void guc_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct intel_engine_cs * const engine = + from_tasklet(engine, t, execlists.tasklet); struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port, *rq; unsigned long flags; @@ -361,9 +361,8 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); - int i; - priolist_for_each_request_consume(rq, rn, p, i) { + priolist_for_each_request_consume(rq, rn, p) { list_del_init(&rq->sched.link); __i915_request_submit(rq); dma_fence_set_error(&rq->fence, -EIO); @@ -610,7 +609,7 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; engine->schedule = i915_schedule; - engine->execlists.tasklet.func = guc_submission_tasklet; + engine->execlists.tasklet.callback = guc_submission_tasklet; engine->reset.prepare = guc_reset_prepare; engine->reset.rewind = guc_reset_rewind; @@ -702,8 +701,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) */ GEM_BUG_ON(INTEL_GEN(i915) < 11); - tasklet_init(&engine->execlists.tasklet, - guc_submission_tasklet, (unsigned long)engine); + tasklet_setup(&engine->execlists.tasklet, guc_submission_tasklet); guc_default_vfuncs(engine); guc_default_irqs(engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 65eeb44b397d..2126dd81ac38 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -82,7 +82,7 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) if (IS_ERR(vma)) return PTR_ERR(vma); - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + vaddr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 984fa79e0fa7..df647c9a8d56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -539,7 +539,7 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw) if (!intel_uc_fw_is_available(uc_fw)) return -ENOEXEC; - err = i915_gem_object_pin_pages(uc_fw->obj); + err = i915_gem_object_pin_pages_unlocked(uc_fw->obj); if (err) { DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n", intel_uc_fw_type_repr(uc_fw->type), err); diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index c3eb3838fe88..d4f883f35b95 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -218,7 +218,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, roundup(info->size, PAGE_SIZE)); - i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class); + i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class, 0); i915_gem_object_set_readonly(obj); obj->read_domains = I915_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 3bc616cc1ad2..cf9a3d384971 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -293,18 +293,13 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) static struct i915_active_fence * active_instance(struct i915_active *ref, u64 idx) { - struct active_node *node, *prealloc; + struct active_node *node; struct rb_node **p, *parent; node = __active_lookup(ref, idx); if (likely(node)) return &node->base; - /* Preallocate a replacement, just in case */ - prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); - if (!prealloc) - return NULL; - spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); @@ -314,10 +309,8 @@ active_instance(struct i915_active *ref, u64 idx) parent = *p; node = rb_entry(parent, struct active_node, node); - if (node->timeline == idx) { - kmem_cache_free(global.slab_cache, prealloc); + if (node->timeline == idx) goto out; - } if (node->timeline < idx) p = &parent->rb_right; @@ -325,7 +318,14 @@ active_instance(struct i915_active *ref, u64 idx) p = &parent->rb_left; } - node = prealloc; + /* + * XXX: We should preallocate this before i915_active_ref() is ever + * called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC. + */ + node = kmem_cache_alloc(global.slab_cache, GFP_ATOMIC); + if (!node) + goto out; + __i915_active_fence_init(&node->base, NULL, node_retire); node->ref = ref; node->timeline = idx; diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index 20babbdb297d..3a2f6eecb2fc 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -48,6 +48,8 @@ static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_block *parent { struct i915_buddy_block *block; + GEM_BUG_ON(order > I915_BUDDY_MAX_ORDER); + block = kmem_cache_zalloc(global.slab_blocks, GFP_KERNEL); if (!block) return NULL; @@ -56,6 +58,7 @@ static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_block *parent block->header |= order; block->parent = parent; + GEM_BUG_ON(block->header & I915_BUDDY_HEADER_UNUSED); return block; } diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h index ed41f3507cdc..9ce5200f4001 100644 --- a/drivers/gpu/drm/i915/i915_buddy.h +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -15,7 +15,9 @@ struct i915_buddy_block { #define I915_BUDDY_ALLOCATED (1 << 10) #define I915_BUDDY_FREE (2 << 10) #define I915_BUDDY_SPLIT (3 << 10) -#define I915_BUDDY_HEADER_ORDER GENMASK_ULL(9, 0) +/* Free to be used, if needed in the future */ +#define I915_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6) +#define I915_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) u64 header; struct i915_buddy_block *left; @@ -34,7 +36,8 @@ struct i915_buddy_block { struct list_head tmp_link; }; -#define I915_BUDDY_MAX_ORDER I915_BUDDY_HEADER_ORDER +/* Order-zero must be at least PAGE_SIZE */ +#define I915_BUDDY_MAX_ORDER (63 - PAGE_SHIFT) /* * Binary Buddy System. diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index ced9a96d7c34..e6f1e93abbbb 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -940,7 +940,7 @@ static void fini_hash_table(struct intel_engine_cs *engine) * struct intel_engine_cs based on whether the platform requires software * command parsing. */ -void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; @@ -948,7 +948,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && engine->class == COPY_ENGINE_CLASS)) - return; + return 0; switch (engine->class) { case RENDER_CLASS: @@ -1013,19 +1013,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) break; default: MISSING_CASE(engine->class); - return; + goto out; } if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) { drm_err(&engine->i915->drm, "%s: command descriptions are not sorted\n", engine->name); - return; + goto out; } if (!validate_regs_sorted(engine)) { drm_err(&engine->i915->drm, "%s: registers are not sorted\n", engine->name); - return; + goto out; } ret = init_hash_table(engine, cmd_tables, cmd_table_count); @@ -1033,10 +1033,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) drm_err(&engine->i915->drm, "%s: initialised failed!\n", engine->name); fini_hash_table(engine); - return; + goto out; } engine->flags |= I915_ENGINE_USING_CMD_PARSER; + +out: + if (intel_engine_requires_cmd_parser(engine) && + !intel_engine_using_cmd_parser(engine)) + return -EINVAL; + + return 0; } /** @@ -1137,38 +1144,20 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, - unsigned long offset, unsigned long length) + unsigned long offset, unsigned long length, + void *dst, const void *src) { - bool needs_clflush; - void *dst, *src; - int ret; - - dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); - if (IS_ERR(dst)) - return dst; - - ret = i915_gem_object_pin_pages(src_obj); - if (ret) { - i915_gem_object_unpin_map(dst_obj); - return ERR_PTR(ret); - } - - needs_clflush = + bool needs_clflush = !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - src = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) { - src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); - if (!IS_ERR(src)) { - i915_unaligned_memcpy_from_wc(dst, - src + offset, - length); - i915_gem_object_unpin_map(src_obj); - } - } - if (IS_ERR(src)) { - unsigned long x, n, remain; + if (src) { + GEM_BUG_ON(!needs_clflush); + i915_unaligned_memcpy_from_wc(dst, src + offset, length); + } else { + struct scatterlist *sg; void *ptr; + unsigned int x, sg_ofs; + unsigned long remain; /* * We can avoid clflushing partial cachelines before the write @@ -1185,23 +1174,31 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, ptr = dst; x = offset_in_page(offset); - for (n = offset >> PAGE_SHIFT; remain; n++) { - int len = min(remain, PAGE_SIZE - x); - - src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); - if (needs_clflush) - drm_clflush_virt_range(src + x, len); - memcpy(ptr, src + x, len); - kunmap_atomic(src); - - ptr += len; - remain -= len; - x = 0; + sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false); + + while (remain) { + unsigned long sg_max = sg->length >> PAGE_SHIFT; + + for (; remain && sg_ofs < sg_max; sg_ofs++) { + unsigned long len = min(remain, PAGE_SIZE - x); + void *map; + + map = kmap_atomic(nth_page(sg_page(sg), sg_ofs)); + if (needs_clflush) + drm_clflush_virt_range(map + x, len); + memcpy(ptr, map + x, len); + kunmap_atomic(map); + + ptr += len; + remain -= len; + x = 0; + } + + sg_ofs = 0; + sg = sg_next(sg); } } - i915_gem_object_unpin_pages(src_obj); - memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); /* dst_obj is returned with vmap pinned */ @@ -1363,9 +1360,6 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, if (target_cmd_index == offset) return 0; - if (IS_ERR(jump_whitelist)) - return PTR_ERR(jump_whitelist); - if (!test_bit(target_cmd_index, jump_whitelist)) { DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", jump_target); @@ -1375,10 +1369,14 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } -static unsigned long *alloc_whitelist(u32 batch_length) +unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, + bool trampoline) { unsigned long *jmp; + if (trampoline) + return NULL; + /* * We expect batch_length to be less than 256KiB for known users, * i.e. we need at most an 8KiB bitmap allocation which should be @@ -1416,14 +1414,16 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - bool trampoline) + unsigned long *jump_whitelist, + void *shadow_map, + const void *batch_map) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; - unsigned long *jump_whitelist; u64 batch_addr, shadow_addr; int ret = 0; + bool trampoline = !jump_whitelist; GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd))); GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd))); @@ -1431,16 +1431,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, batch->size)); GEM_BUG_ON(!batch_length); - cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length); - if (IS_ERR(cmd)) { - DRM_DEBUG("CMD: Failed to copy batch\n"); - return PTR_ERR(cmd); - } - - jump_whitelist = NULL; - if (!trampoline) - /* Defer failure until attempted use */ - jump_whitelist = alloc_whitelist(batch_length); + cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length, + shadow_map, batch_map); shadow_addr = gen8_canonical_addr(shadow->node.start); batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); @@ -1541,9 +1533,6 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, i915_gem_object_flush_map(shadow->obj); - if (!IS_ERR_OR_NULL(jump_whitelist)) - kfree(jump_whitelist); - i915_gem_object_unpin_map(shadow->obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4cf975b7504f..b654b7498bcd 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -908,10 +908,10 @@ i915_drop_caches_set(void *data, u64 val) fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) - i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_BOUND); + i915_gem_shrink(NULL, i915, LONG_MAX, NULL, I915_SHRINK_BOUND); if (val & DROP_UNBOUND) - i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_UNBOUND); + i915_gem_shrink(NULL, i915, LONG_MAX, NULL, I915_SHRINK_UNBOUND); if (val & DROP_SHRINK_ALL) i915_gem_shrink_all(i915); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index abb0152b56f4..2483447ff8dc 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -567,6 +567,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) intel_gt_init_hw_early(&dev_priv->gt, &dev_priv->ggtt); + ret = intel_gt_probe_lmem(&dev_priv->gt); + if (ret) + goto err_mem_regions; + ret = i915_ggtt_enable_hw(dev_priv); if (ret) { drm_err(&dev_priv->drm, "failed to enable GGTT\n"); @@ -804,7 +808,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (INTEL_GEN(i915) >= 9 && i915_selftest.live < 0 && i915->params.fake_lmem_start) { mkwrite_device_info(i915)->memory_regions = - REGION_SMEM | REGION_LMEM | REGION_STOLEN; + REGION_SMEM | REGION_LMEM | REGION_STOLEN_SMEM; GEM_BUG_ON(!HAS_LMEM(i915)); } } @@ -1700,7 +1704,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer_ioctl, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, drm_invalid_op, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c97c3521df76..69e43bf91a15 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -555,12 +555,13 @@ struct i915_gem_mm { struct notifier_block vmap_notifier; struct shrinker shrinker; +#ifdef CONFIG_MMU_NOTIFIER /** - * Workqueue to fault in userptr pages, flushed by the execbuf - * when required but otherwise left to userspace to try again - * on EAGAIN. + * notifier_lock for mmu notifiers, memory may not be allocated + * while holding this lock. */ - struct workqueue_struct *userptr_wq; + spinlock_t notifier_lock; +#endif /* shrinker accounting, also useful for userland debugging */ u64 shrink_memory; @@ -933,8 +934,6 @@ struct drm_i915_private { struct i915_ggtt ggtt; /* VM representing the global address space */ struct i915_gem_mm mm; - DECLARE_HASHTABLE(mm_structs, 7); - spinlock_t mm_lock; /* Kernel Modesetting */ @@ -1309,7 +1308,7 @@ intel_subplatform(const struct intel_runtime_info *info, enum intel_platform p) { const unsigned int pi = __platform_mask_index(info, p); - return info->platform_mask[pi] & ((1 << INTEL_SUBPLATFORM_BITS) - 1); + return info->platform_mask[pi] & INTEL_SUBPLATFORM_MASK; } static __always_inline bool @@ -1880,14 +1879,19 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); -void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); +unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, + bool trampoline); + int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - bool trampoline); + unsigned long *jump_whitelist, + void *shadow_map, + const void *batch_map); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aa4490934469..b23f58e94cfb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -204,7 +204,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, { unsigned int needs_clflush; unsigned int idx, offset; - struct dma_fence *fence; char __user *user_data; u64 remain; int ret; @@ -213,19 +212,17 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, if (ret) return ret; + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_unlock; + ret = i915_gem_object_prepare_read(obj, &needs_clflush); - if (ret) { - i915_gem_object_unlock(obj); - return ret; - } + if (ret) + goto err_unpin; - fence = i915_gem_object_lock_fence(obj); i915_gem_object_finish_access(obj); i915_gem_object_unlock(obj); - if (!fence) - return -ENOMEM; - remain = args->size; user_data = u64_to_user_ptr(args->data_ptr); offset = offset_in_page(args->offset); @@ -243,7 +240,13 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj, offset = 0; } - i915_gem_object_unlock_fence(obj, fence); + i915_gem_object_unpin_pages(obj); + return ret; + +err_unpin: + i915_gem_object_unpin_pages(obj); +err_unlock: + i915_gem_object_unlock(obj); return ret; } @@ -271,52 +274,102 @@ gtt_user_read(struct io_mapping *mapping, return unwritten; } -static int -i915_gem_gtt_pread(struct drm_i915_gem_object *obj, - const struct drm_i915_gem_pread *args) +static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj, + struct drm_mm_node *node, + bool write) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; - intel_wakeref_t wakeref; - struct drm_mm_node node; - struct dma_fence *fence; - void __user *user_data; struct i915_vma *vma; - u64 remain, offset; + struct i915_gem_ww_ctx ww; int ret; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + i915_gem_ww_ctx_init(&ww, true); +retry: vma = ERR_PTR(-ENODEV); + ret = i915_gem_object_lock(obj, &ww); + if (ret) + goto err_ww; + + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto err_ww; + if (!i915_gem_object_is_tiled(obj)) - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); - if (!IS_ERR(vma)) { - node.start = i915_ggtt_offset(vma); - node.flags = 0; + vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (vma == ERR_PTR(-EDEADLK)) { + ret = -EDEADLK; + goto err_ww; + } else if (!IS_ERR(vma)) { + node->start = i915_ggtt_offset(vma); + node->flags = 0; } else { - ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); + ret = insert_mappable_node(ggtt, node, PAGE_SIZE); if (ret) - goto out_rpm; - GEM_BUG_ON(!drm_mm_node_allocated(&node)); + goto err_ww; + GEM_BUG_ON(!drm_mm_node_allocated(node)); + vma = NULL; } - ret = i915_gem_object_lock_interruptible(obj, NULL); - if (ret) - goto out_unpin; - - ret = i915_gem_object_set_to_gtt_domain(obj, false); + ret = i915_gem_object_pin_pages(obj); if (ret) { - i915_gem_object_unlock(obj); - goto out_unpin; + if (drm_mm_node_allocated(node)) { + ggtt->vm.clear_range(&ggtt->vm, node->start, node->size); + remove_mappable_node(ggtt, node); + } else { + i915_vma_unpin(vma); + } } - fence = i915_gem_object_lock_fence(obj); - i915_gem_object_unlock(obj); - if (!fence) { - ret = -ENOMEM; - goto out_unpin; +err_ww: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + return ret ? ERR_PTR(ret) : vma; +} + +static void i915_gem_gtt_cleanup(struct drm_i915_gem_object *obj, + struct drm_mm_node *node, + struct i915_vma *vma) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + + i915_gem_object_unpin_pages(obj); + if (drm_mm_node_allocated(node)) { + ggtt->vm.clear_range(&ggtt->vm, node->start, node->size); + remove_mappable_node(ggtt, node); + } else { + i915_vma_unpin(vma); + } +} + +static int +i915_gem_gtt_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + intel_wakeref_t wakeref; + struct drm_mm_node node; + void __user *user_data; + struct i915_vma *vma; + u64 remain, offset; + int ret = 0; + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + vma = i915_gem_gtt_prepare(obj, &node, false); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out_rpm; } user_data = u64_to_user_ptr(args->data_ptr); @@ -353,14 +406,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, offset += page_length; } - i915_gem_object_unlock_fence(obj, fence); -out_unpin: - if (drm_mm_node_allocated(&node)) { - ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(ggtt, &node); - } else { - i915_vma_unpin(vma); - } + i915_gem_gtt_cleanup(obj, &node, vma); out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); return ret; @@ -378,10 +424,17 @@ int i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_pread *args = data; struct drm_i915_gem_object *obj; int ret; + /* PREAD is disallowed for all platforms after TGL-LP. This also + * covers all platforms with local memory. + */ + if (INTEL_GEN(i915) >= 12 && !IS_TIGERLAKE(i915)) + return -EOPNOTSUPP; + if (args->size == 0) return 0; @@ -400,6 +453,11 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, } trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pread) + ret = obj->ops->pread(obj, args); + if (ret != -ENODEV) + goto out; ret = -ENODEV; if (obj->ops->pread) @@ -413,15 +471,10 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, if (ret) goto out; - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto out; - ret = i915_gem_shmem_pread(obj, args); if (ret == -EFAULT || ret == -ENODEV) ret = i915_gem_gtt_pread(obj, args); - i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); return ret; @@ -469,11 +522,10 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, struct intel_runtime_pm *rpm = &i915->runtime_pm; intel_wakeref_t wakeref; struct drm_mm_node node; - struct dma_fence *fence; struct i915_vma *vma; u64 remain, offset; void __user *user_data; - int ret; + int ret = 0; if (i915_gem_object_has_struct_page(obj)) { /* @@ -491,37 +543,10 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, wakeref = intel_runtime_pm_get(rpm); } - vma = ERR_PTR(-ENODEV); - if (!i915_gem_object_is_tiled(obj)) - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK /* NOWARN */ | - PIN_NOEVICT); - if (!IS_ERR(vma)) { - node.start = i915_ggtt_offset(vma); - node.flags = 0; - } else { - ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); - if (ret) - goto out_rpm; - GEM_BUG_ON(!drm_mm_node_allocated(&node)); - } - - ret = i915_gem_object_lock_interruptible(obj, NULL); - if (ret) - goto out_unpin; - - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) { - i915_gem_object_unlock(obj); - goto out_unpin; - } - - fence = i915_gem_object_lock_fence(obj); - i915_gem_object_unlock(obj); - if (!fence) { - ret = -ENOMEM; - goto out_unpin; + vma = i915_gem_gtt_prepare(obj, &node, true); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out_rpm; } i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); @@ -570,14 +595,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, intel_gt_flush_ggtt_writes(ggtt->vm.gt); i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - i915_gem_object_unlock_fence(obj, fence); -out_unpin: - if (drm_mm_node_allocated(&node)) { - ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(ggtt, &node); - } else { - i915_vma_unpin(vma); - } + i915_gem_gtt_cleanup(obj, &node, vma); out_rpm: intel_runtime_pm_put(rpm, wakeref); return ret; @@ -617,7 +635,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, unsigned int partial_cacheline_write; unsigned int needs_clflush; unsigned int offset, idx; - struct dma_fence *fence; void __user *user_data; u64 remain; int ret; @@ -626,19 +643,17 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, if (ret) return ret; + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_unlock; + ret = i915_gem_object_prepare_write(obj, &needs_clflush); - if (ret) { - i915_gem_object_unlock(obj); - return ret; - } + if (ret) + goto err_unpin; - fence = i915_gem_object_lock_fence(obj); i915_gem_object_finish_access(obj); i915_gem_object_unlock(obj); - if (!fence) - return -ENOMEM; - /* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch. @@ -666,8 +681,14 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, } i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - i915_gem_object_unlock_fence(obj, fence); + i915_gem_object_unpin_pages(obj); + return ret; + +err_unpin: + i915_gem_object_unpin_pages(obj); +err_unlock: + i915_gem_object_unlock(obj); return ret; } @@ -683,10 +704,17 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; + /* PWRITE is disallowed for all platforms after TGL-LP. This also + * covers all platforms with local memory. + */ + if (INTEL_GEN(i915) >= 12 && !IS_TIGERLAKE(i915)) + return -EOPNOTSUPP; + if (args->size == 0) return 0; @@ -724,10 +752,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret) goto err; - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; - ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get @@ -748,7 +772,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_shmem_pwrite(obj, args); } - i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); return ret; @@ -909,7 +932,11 @@ new_vma: return ERR_PTR(ret); } - ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); + if (ww) + ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL); + else + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + if (ret) return ERR_PTR(ret); @@ -949,7 +976,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (!obj) return -ENOENT; - err = mutex_lock_interruptible(&obj->mm.lock); + err = i915_gem_object_lock_interruptible(obj, NULL); if (err) goto out; @@ -995,8 +1022,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; - mutex_unlock(&obj->mm.lock); + i915_gem_object_unlock(obj); out: i915_gem_object_put(obj); return err; @@ -1050,10 +1077,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_unlock: i915_gem_drain_workqueue(dev_priv); - if (ret != -EIO) { + if (ret != -EIO) intel_uc_cleanup_firmwares(&dev_priv->gt.uc); - i915_gem_cleanup_userptr(dev_priv); - } if (ret == -EIO) { /* @@ -1110,7 +1135,6 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) intel_wa_list_free(&dev_priv->gt_wa_list); intel_uc_cleanup_firmwares(&dev_priv->gt.uc); - i915_gem_cleanup_userptr(dev_priv); i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 486c9953e5b6..36489be4896b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -44,7 +44,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, * the DMA remapper, i915_gem_shrink will return 0. */ GEM_BUG_ON(obj->mm.pages == pages); - } while (i915_gem_shrink(to_i915(obj->base.dev), + } while (i915_gem_shrink(NULL, to_i915(obj->base.dev), obj->base.size >> PAGE_SHIFT, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND)); diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c index 7b3b83bd5ab8..1b021a4902de 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.c +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -135,7 +135,7 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) * accepts that its arguments may not be aligned, but are valid for the * potential 16-byte read past the end. */ -void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len) +void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len) { unsigned long addr; diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h index e36d30edd987..3df063a3293b 100644 --- a/drivers/gpu/drm/i915/i915_memcpy.h +++ b/drivers/gpu/drm/i915/i915_memcpy.h @@ -13,7 +13,7 @@ struct drm_i915_private; void i915_memcpy_init_early(struct drm_i915_private *i915); bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); -void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len); +void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len); /* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment, * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 6939634e56ed..0320878d96b0 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -197,6 +197,11 @@ i915_param_named_unsafe(fake_lmem_start, ulong, 0400, "Fake LMEM start offset (default: 0)"); #endif +#if CONFIG_DRM_I915_REQUEST_TIMEOUT +i915_param_named_unsafe(request_timeout_ms, uint, 0600, + "Default request/fence/batch buffer expiration timeout."); +#endif + static __always_inline void _print_param(struct drm_printer *p, const char *name, const char *type, diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 48f47e44e848..34ebb0662547 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -72,6 +72,7 @@ struct drm_printer; param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ param(unsigned long, fake_lmem_start, 0, 0400) \ + param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, 0600) \ /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ param(bool, load_detect_test, false, 0600) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index be82248c2dc2..480553746794 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -154,7 +154,7 @@ .page_sizes = I915_GTT_PAGE_SIZE_4K #define GEN_DEFAULT_REGIONS \ - .memory_regions = REGION_SMEM | REGION_STOLEN + .memory_regions = REGION_SMEM | REGION_STOLEN_SMEM #define I830_FEATURES \ GEN(2), \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 41ad5a66657e..85ad62dbabfa 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1573,7 +1573,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) stream->oa_buffer.vma = vma; stream->oa_buffer.vaddr = - i915_gem_object_pin_map(bo, I915_MAP_WB); + i915_gem_object_pin_map_unlocked(bo, I915_MAP_WB); if (IS_ERR(stream->oa_buffer.vaddr)) { ret = PTR_ERR(stream->oa_buffer.vaddr); goto err_unpin; @@ -1627,6 +1627,7 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) const u32 base = stream->engine->mmio_base; #define CS_GPR(x) GEN8_RING_CS_GPR(base, x) u32 *batch, *ts0, *cs, *jump; + struct i915_gem_ww_ctx ww; int ret, i; enum { START_TS, @@ -1644,15 +1645,21 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return PTR_ERR(bo); } + i915_gem_ww_ctx_init(&ww, true); +retry: + ret = i915_gem_object_lock(bo, &ww); + if (ret) + goto out_ww; + /* * We pin in GGTT because we jump into this buffer now because * multiple OA config BOs will have a jump to this address and it * needs to be fixed during the lifetime of the i915/perf stream. */ - vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH); + vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH); if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unref; + goto out_ww; } batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); @@ -1786,12 +1793,19 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) __i915_gem_object_release_map(bo); stream->noa_wait = vma; - return 0; + goto out_ww; err_unpin: i915_vma_unpin_and_release(&vma, 0); -err_unref: - i915_gem_object_put(bo); +out_ww: + if (ret == -EDEADLK) { + ret = i915_gem_ww_ctx_backoff(&ww); + if (!ret) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (ret) + i915_gem_object_put(bo); return ret; } @@ -1834,6 +1848,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, { struct drm_i915_gem_object *obj; struct i915_oa_config_bo *oa_bo; + struct i915_gem_ww_ctx ww; size_t config_length = 0; u32 *cs; int err; @@ -1854,10 +1869,16 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, goto err_free; } + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(obj, &ww); + if (err) + goto out_ww; + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); - goto err_oa_bo; + goto out_ww; } cs = write_cs_mi_lri(cs, @@ -1885,19 +1906,28 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, NULL); if (IS_ERR(oa_bo->vma)) { err = PTR_ERR(oa_bo->vma); - goto err_oa_bo; + goto out_ww; } oa_bo->oa_config = i915_oa_config_get(oa_config); llist_add(&oa_bo->node, &stream->oa_config_bos); - return oa_bo; +out_ww: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); -err_oa_bo: - i915_gem_object_put(obj); + if (err) + i915_gem_object_put(obj); err_free: - kfree(oa_bo); - return ERR_PTR(err); + if (err) { + kfree(oa_bo); + return ERR_PTR(err); + } + return oa_bo; } static struct i915_vma * diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 8aa7866ec6b6..bc2fa84f98a8 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -24,14 +24,8 @@ enum { I915_PRIORITY_DISPLAY, }; -#define I915_USER_PRIORITY_SHIFT 0 -#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) - -#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) -#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) - /* Smallest priority value that cannot be bumped. */ -#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) +#define I915_PRIORITY_INVALID (INT_MIN) /* * Requests containing performance queries must not be preempted by @@ -45,9 +39,8 @@ enum { #define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) struct i915_priolist { - struct list_head requests[I915_PRIORITY_COUNT]; + struct list_head requests; struct rb_node node; - unsigned long used; int priority; }; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 765ba64442ab..cbf7a60afe54 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -12187,6 +12187,8 @@ enum skl_power_gate { #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ +#define GEN12_GSMBASE _MMIO(0x108100) + /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) #define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 22e39d938f17..9165971c3c47 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -33,7 +33,10 @@ #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" +#include "gt/intel_engine.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_gpu_commands.h" +#include "gt/intel_reset.h" #include "gt/intel_ring.h" #include "gt/intel_rps.h" @@ -244,6 +247,50 @@ static void __i915_request_fill(struct i915_request *rq, u8 val) memset(vaddr + head, val, rq->postfix - head); } +/** + * i915_request_active_engine + * @rq: request to inspect + * @active: pointer in which to return the active engine + * + * Fills the currently active engine to the @active pointer if the request + * is active and still not completed. + * + * Returns true if request was active or false otherwise. + */ +bool +i915_request_active_engine(struct i915_request *rq, + struct intel_engine_cs **active) +{ + struct intel_engine_cs *engine, *locked; + bool ret = false; + + /* + * Serialise with __i915_request_submit() so that it sees + * is-banned?, or we know the request is already inflight. + * + * Note that rq->engine is unstable, and so we double + * check that we have acquired the lock on the final engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + locked = engine; + spin_lock(&locked->active.lock); + } + + if (i915_request_is_active(rq)) { + if (!__i915_request_is_complete(rq)) + *active = locked; + ret = true; + } + + spin_unlock_irq(&locked->active.lock); + + return ret; +} + + static void remove_from_engine(struct i915_request *rq) { struct intel_engine_cs *engine, *locked; @@ -274,6 +321,53 @@ static void remove_from_engine(struct i915_request *rq) __notify_execute_cb_imm(rq); } +static void __rq_init_watchdog(struct i915_request *rq) +{ + rq->watchdog.timer.function = NULL; +} + +static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer) +{ + struct i915_request *rq = + container_of(hrtimer, struct i915_request, watchdog.timer); + struct intel_gt *gt = rq->engine->gt; + + if (!i915_request_completed(rq)) { + if (llist_add(&rq->watchdog.link, >->watchdog.list)) + schedule_work(>->watchdog.work); + } else { + i915_request_put(rq); + } + + return HRTIMER_NORESTART; +} + +static void __rq_arm_watchdog(struct i915_request *rq) +{ + struct i915_request_watchdog *wdg = &rq->watchdog; + struct intel_context *ce = rq->context; + + if (!ce->watchdog.timeout_us) + return; + + hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + wdg->timer.function = __rq_watchdog_expired; + hrtimer_start_range_ns(&wdg->timer, + ns_to_ktime(ce->watchdog.timeout_us * + NSEC_PER_USEC), + NSEC_PER_MSEC, + HRTIMER_MODE_REL); + i915_request_get(rq); +} + +static void __rq_cancel_watchdog(struct i915_request *rq) +{ + struct i915_request_watchdog *wdg = &rq->watchdog; + + if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0) + i915_request_put(rq); +} + bool i915_request_retire(struct i915_request *rq) { if (!__i915_request_is_complete(rq)) @@ -285,6 +379,8 @@ bool i915_request_retire(struct i915_request *rq) trace_i915_request_retire(rq); i915_request_mark_complete(rq); + __rq_cancel_watchdog(rq); + /* * We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -498,31 +594,38 @@ void __i915_request_skip(struct i915_request *rq) rq->infix = rq->postfix; } -void i915_request_set_error_once(struct i915_request *rq, int error) +bool i915_request_set_error_once(struct i915_request *rq, int error) { int old; GEM_BUG_ON(!IS_ERR_VALUE((long)error)); if (i915_request_signaled(rq)) - return; + return false; old = READ_ONCE(rq->fence.error); do { if (fatal_error(old)) - return; + return false; } while (!try_cmpxchg(&rq->fence.error, &old, error)); + + return true; } -void i915_request_mark_eio(struct i915_request *rq) +struct i915_request *i915_request_mark_eio(struct i915_request *rq) { if (__i915_request_is_complete(rq)) - return; + return NULL; GEM_BUG_ON(i915_request_signaled(rq)); + /* As soon as the request is completed, it may be retired */ + rq = i915_request_get(rq); + i915_request_set_error_once(rq, -EIO); i915_request_mark_complete(rq); + + return rq; } bool __i915_request_submit(struct i915_request *request) @@ -678,6 +781,28 @@ void i915_request_unsubmit(struct i915_request *request) spin_unlock_irqrestore(&engine->active.lock, flags); } +static void __cancel_request(struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + +void i915_request_cancel(struct i915_request *rq, int error) +{ + if (!i915_request_set_error_once(rq, error)) + return; + + set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); + + __cancel_request(rq); +} + static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -690,6 +815,8 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) if (unlikely(fence->error)) i915_request_set_error_once(request, fence->error); + else + __rq_arm_watchdog(request); /* * We need to serialize use of the submit_request() callback @@ -863,7 +990,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->fence.seqno = seqno; RCU_INIT_POINTER(rq->timeline, tl); - RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; GEM_BUG_ON(__i915_request_is_complete(rq)); @@ -877,6 +1003,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) /* No zalloc, everything must be cleared after use */ rq->batch = NULL; + __rq_init_watchdog(rq); GEM_BUG_ON(rq->capture_list); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); @@ -1108,9 +1235,6 @@ emit_semaphore_wait(struct i915_request *to, if (i915_request_has_initial_breadcrumb(to)) goto await_fence; - if (!rcu_access_pointer(from->hwsp_cacheline)) - goto await_fence; - /* * If this or its dependents are waiting on an external fence * that may fail catastrophically, then we want to avoid using diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 1bfe214a47e9..270f6cd37650 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -26,7 +26,9 @@ #define I915_REQUEST_H #include <linux/dma-fence.h> +#include <linux/hrtimer.h> #include <linux/irq_work.h> +#include <linux/llist.h> #include <linux/lockdep.h> #include "gem/i915_gem_context_types.h" @@ -237,16 +239,6 @@ struct i915_request { */ const u32 *hwsp_seqno; - /* - * If we need to access the timeline's seqno for this request in - * another request, we need to keep a read reference to this associated - * cacheline, so that we do not free and recycle it before the foreign - * observers have completed. Hence, we keep a pointer to the cacheline - * inside the timeline's HWSP vma, but it is only valid while this - * request has not completed and guarded by the timeline mutex. - */ - struct intel_timeline_cacheline __rcu *hwsp_cacheline; - /** Position in the ring of the start of the request */ u32 head; @@ -287,6 +279,12 @@ struct i915_request { /** timeline->request entry for this request */ struct list_head link; + /** Watchdog support fields. */ + struct i915_request_watchdog { + struct llist_node link; + struct hrtimer timer; + } watchdog; + I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; @@ -310,8 +308,8 @@ struct i915_request * __must_check i915_request_create(struct intel_context *ce); void __i915_request_skip(struct i915_request *rq); -void i915_request_set_error_once(struct i915_request *rq, int error); -void i915_request_mark_eio(struct i915_request *rq); +bool i915_request_set_error_once(struct i915_request *rq, int error); +struct i915_request *i915_request_mark_eio(struct i915_request *rq); struct i915_request *__i915_request_commit(struct i915_request *request); void __i915_request_queue(struct i915_request *rq, @@ -366,6 +364,8 @@ void i915_request_submit(struct i915_request *request); void __i915_request_unsubmit(struct i915_request *request); void i915_request_unsubmit(struct i915_request *request); +void i915_request_cancel(struct i915_request *rq, int error); + long i915_request_wait(struct i915_request *rq, unsigned int flags, long timeout) @@ -616,4 +616,29 @@ i915_request_active_timeline(const struct i915_request *rq) lockdep_is_held(&rq->engine->active.lock)); } +static inline u32 +i915_request_active_seqno(const struct i915_request *rq) +{ + u32 hwsp_phys_base = + page_mask_bits(i915_request_active_timeline(rq)->hwsp_offset); + u32 hwsp_relative_offset = offset_in_page(rq->hwsp_seqno); + + /* + * Because of wraparound, we cannot simply take tl->hwsp_offset, + * but instead use the fact that the relative for vaddr is the + * offset as for hwsp_offset. Take the top bits from tl->hwsp_offset + * and combine them with the relative offset in rq->hwsp_seqno. + * + * As rw->hwsp_seqno is rewritten when signaled, this only works + * when the request isn't signaled yet, but at that point you + * no longer need the offset. + */ + + return hwsp_phys_base + hwsp_relative_offset; +} + +bool +i915_request_active_engine(struct i915_request *rq, + struct intel_engine_cs **active); + #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 7144239f08df..efa638c3acc7 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -43,7 +43,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static void assert_priolists(struct intel_engine_execlists * const execlists) { struct rb_node *rb; - long last_prio, i; + long last_prio; if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; @@ -57,14 +57,6 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) GEM_BUG_ON(p->priority > last_prio); last_prio = p->priority; - - GEM_BUG_ON(!p->used); - for (i = 0; i < ARRAY_SIZE(p->requests); i++) { - if (list_empty(&p->requests[i])) - continue; - - GEM_BUG_ON(!(p->used & BIT(i))); - } } } @@ -75,14 +67,10 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; - int idx, i; lockdep_assert_held(&engine->active.lock); assert_priolists(execlists); - /* buckets sorted from highest [in slot 0] to lowest priority */ - idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; - prio >>= I915_USER_PRIORITY_SHIFT; if (unlikely(execlists->no_priolist)) prio = I915_PRIORITY_NORMAL; @@ -99,7 +87,7 @@ find_priolist: parent = &rb->rb_right; first = false; } else { - goto out; + return &p->requests; } } @@ -125,15 +113,12 @@ find_priolist: } p->priority = prio; - for (i = 0; i < ARRAY_SIZE(p->requests); i++) - INIT_LIST_HEAD(&p->requests[i]); + INIT_LIST_HEAD(&p->requests); + rb_link_node(&p->node, rb, parent); rb_insert_color_cached(&p->node, &execlists->queue, first); - p->used = 0; -out: - p->used |= BIT(idx); - return &p->requests[idx]; + return &p->requests; } void __i915_priolist_free(struct i915_priolist *p) @@ -363,30 +348,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) spin_unlock_irq(&schedule_lock); } -static void __bump_priority(struct i915_sched_node *node, unsigned int bump) -{ - struct i915_sched_attr attr = node->attr; - - if (attr.priority & bump) - return; - - attr.priority |= bump; - __i915_schedule(node, &attr); -} - -void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) -{ - unsigned long flags; - - GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); - if (READ_ONCE(rq->sched.attr.priority) & bump) - return; - - spin_lock_irqsave(&schedule_lock, flags); - __bump_priority(&rq->sched, bump); - spin_unlock_irqrestore(&schedule_lock, flags); -} - void i915_sched_node_init(struct i915_sched_node *node) { INIT_LIST_HEAD(&node->signalers_list); @@ -553,8 +514,7 @@ int __init i915_global_scheduler_init(void) if (!global.slab_dependencies) return -ENOMEM; - global.slab_priorities = KMEM_CACHE(i915_priolist, - SLAB_HWCACHE_ALIGN); + global.slab_priorities = KMEM_CACHE(i915_priolist, 0); if (!global.slab_priorities) goto err_priorities; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 4501e5ac2637..858a0938f47a 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -15,17 +15,11 @@ struct drm_printer; -#define priolist_for_each_request(it, plist, idx) \ - for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ - list_for_each_entry(it, &(plist)->requests[idx], sched.link) +#define priolist_for_each_request(it, plist) \ + list_for_each_entry(it, &(plist)->requests, sched.link) -#define priolist_for_each_request_consume(it, n, plist, idx) \ - for (; \ - (plist)->used ? (idx = __ffs((plist)->used)), 1 : 0; \ - (plist)->used &= ~BIT(idx)) \ - list_for_each_entry_safe(it, n, \ - &(plist)->requests[idx], \ - sched.link) +#define priolist_for_each_request_consume(it, n, plist) \ + list_for_each_entry_safe(it, n, &(plist)->requests, sched.link) void i915_sched_node_init(struct i915_sched_node *node); void i915_sched_node_reinit(struct i915_sched_node *node); @@ -44,8 +38,6 @@ void i915_sched_node_fini(struct i915_sched_node *node); void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); -void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump); - struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index d53d207ab6eb..f54de0499be7 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -107,6 +107,7 @@ int __i915_subtests(const char *caller, #define I915_SELFTEST_DECLARE(x) x #define I915_SELFTEST_ONLY(x) unlikely(x) +#define I915_SELFTEST_EXPORT #else /* !IS_ENABLED(CONFIG_DRM_I915_SELFTEST) */ @@ -116,6 +117,7 @@ static inline int i915_perf_selftests(struct pci_dev *pdev) { return 0; } #define I915_SELFTEST_DECLARE(x) #define I915_SELFTEST_ONLY(x) 0 +#define I915_SELFTEST_EXPORT static #endif diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index caa9b041616b..07490db51cdc 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -230,7 +230,7 @@ err_vma: } static struct i915_vma * -vma_lookup(struct drm_i915_gem_object *obj, +i915_vma_lookup(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view) { @@ -278,7 +278,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, GEM_BUG_ON(!atomic_read(&vm->open)); spin_lock(&obj->vma.lock); - vma = vma_lookup(obj, vm, view); + vma = i915_vma_lookup(obj, vm, view); spin_unlock(&obj->vma.lock); /* vma_create() will resolve the race if another creates the vma */ @@ -863,8 +863,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, int err; #ifdef CONFIG_PROVE_LOCKING - if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex)) - WARN_ON(!ww); + if (debug_locks && !WARN_ON(!ww) && vma->resv) + assert_vma_held(vma); #endif BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); @@ -884,6 +884,11 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); if (flags & vma->vm->bind_async_flags) { + /* lock VM */ + err = i915_vm_lock_objects(vma->vm, ww); + if (err) + goto err_rpm; + work = i915_vma_work(); if (!work) { err = -ENOMEM; @@ -1020,8 +1025,15 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, GEM_BUG_ON(!i915_vma_is_ggtt(vma)); +#ifdef CONFIG_LOCKDEP + WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv)); +#endif + do { - err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); + if (ww) + err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL); + else + err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL); if (err != -ENOSPC) { if (!err) { err = i915_vma_wait_for_bind(vma); @@ -1238,9 +1250,11 @@ int i915_vma_move_to_active(struct i915_vma *vma, obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->read_domains = 0; } else { - err = dma_resv_reserve_shared(vma->resv, 1); - if (unlikely(err)) - return err; + if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { + err = dma_resv_reserve_shared(vma->resv, 1); + if (unlikely(err)) + return err; + } dma_resv_add_shared_fence(vma->resv, &rq->fence); obj->write_domain = 0; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index a64adc8c883b..8df784a026d2 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -52,6 +52,9 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) return !i915_active_is_idle(&vma->active); } +/* do not reserve memory to prevent deadlocks */ +#define __EXEC_OBJECT_NO_RESERVE BIT(31) + int __must_check __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq); int __must_check i915_vma_move_to_active(struct i915_vma *vma, @@ -243,7 +246,22 @@ i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, static inline int __must_check i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - return i915_vma_pin_ww(vma, NULL, size, alignment, flags); + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, size, alignment, flags); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + return err; } int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 1035a841610b..de02207f6ec6 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -223,7 +223,7 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) } } - GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_BITS); + GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); RUNTIME_INFO(i915)->platform_mask[pi] |= mask; } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 9d31d9f6c6b5..2f442d418a15 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -95,7 +95,8 @@ enum intel_platform { * it is fine for the same bit to be used on multiple parent platforms. */ -#define INTEL_SUBPLATFORM_BITS (3) +#define INTEL_SUBPLATFORM_BITS (2) +#define INTEL_SUBPLATFORM_MASK (BIT(INTEL_SUBPLATFORM_BITS) - 1) /* HSW/BDW/SKL/KBL/CFL */ #define INTEL_SUBPLATFORM_ULT (0) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 1bfcdd89b241..bf837b6bb185 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -6,14 +6,22 @@ #include "intel_memory_region.h" #include "i915_drv.h" -/* XXX: Hysterical raisins. BIT(inst) needs to just be (inst) at some point. */ -#define REGION_MAP(type, inst) \ - BIT((type) + INTEL_MEMORY_TYPE_SHIFT) | BIT(inst) - -static const u32 intel_region_map[] = { - [INTEL_REGION_SMEM] = REGION_MAP(INTEL_MEMORY_SYSTEM, 0), - [INTEL_REGION_LMEM] = REGION_MAP(INTEL_MEMORY_LOCAL, 0), - [INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0), +static const struct { + u16 class; + u16 instance; +} intel_region_map[] = { + [INTEL_REGION_SMEM] = { + .class = INTEL_MEMORY_SYSTEM, + .instance = 0, + }, + [INTEL_REGION_LMEM] = { + .class = INTEL_MEMORY_LOCAL, + .instance = 0, + }, + [INTEL_REGION_STOLEN_SMEM] = { + .class = INTEL_MEMORY_STOLEN_SYSTEM, + .instance = 0, + }, }; struct intel_memory_region * @@ -156,9 +164,22 @@ int intel_memory_region_init_buddy(struct intel_memory_region *mem) void intel_memory_region_release_buddy(struct intel_memory_region *mem) { + i915_buddy_free_list(&mem->mm, &mem->reserved); i915_buddy_fini(&mem->mm); } +int intel_memory_region_reserve(struct intel_memory_region *mem, + u64 offset, u64 size) +{ + int ret; + + mutex_lock(&mem->mm_lock); + ret = i915_buddy_alloc_range(&mem->mm, &mem->reserved, offset, size); + mutex_unlock(&mem->mm_lock); + + return ret; +} + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, @@ -185,6 +206,7 @@ intel_memory_region_create(struct drm_i915_private *i915, mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); INIT_LIST_HEAD(&mem->objects.purgeable); + INIT_LIST_HEAD(&mem->reserved); mutex_init(&mem->mm_lock); @@ -245,22 +267,22 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(i915->mm.regions); i++) { struct intel_memory_region *mem = ERR_PTR(-ENODEV); - u32 type; + u16 type, instance; if (!HAS_REGION(i915, BIT(i))) continue; - type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); + type = intel_region_map[i].class; + instance = intel_region_map[i].instance; switch (type) { case INTEL_MEMORY_SYSTEM: mem = i915_gem_shmem_setup(i915); break; - case INTEL_MEMORY_STOLEN: + case INTEL_MEMORY_STOLEN_SYSTEM: mem = i915_gem_stolen_setup(i915); break; - case INTEL_MEMORY_LOCAL: - mem = intel_setup_fake_lmem(i915); - break; + default: + continue; } if (IS_ERR(mem)) { @@ -271,9 +293,9 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) goto out_cleanup; } - mem->id = intel_region_map[i]; + mem->id = i; mem->type = type; - mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); + mem->instance = instance; i915->mm.regions[i] = mem; } diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 6ffc0673f005..edd49067c8ca 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -25,24 +25,19 @@ struct sg_table; enum intel_memory_type { INTEL_MEMORY_SYSTEM = 0, INTEL_MEMORY_LOCAL, - INTEL_MEMORY_STOLEN, + INTEL_MEMORY_STOLEN_SYSTEM, }; enum intel_region_id { INTEL_REGION_SMEM = 0, INTEL_REGION_LMEM, - INTEL_REGION_STOLEN, + INTEL_REGION_STOLEN_SMEM, INTEL_REGION_UNKNOWN, /* Should be last */ }; #define REGION_SMEM BIT(INTEL_REGION_SMEM) #define REGION_LMEM BIT(INTEL_REGION_LMEM) -#define REGION_STOLEN BIT(INTEL_REGION_STOLEN) - -#define INTEL_MEMORY_TYPE_SHIFT 16 - -#define MEMORY_TYPE_FROM_REGION(r) (ilog2((r) >> INTEL_MEMORY_TYPE_SHIFT)) -#define MEMORY_INSTANCE_FROM_REGION(r) (ilog2((r) & 0xffff)) +#define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) #define I915_ALLOC_MIN_PAGE_SIZE BIT(0) #define I915_ALLOC_CONTIGUOUS BIT(1) @@ -84,11 +79,13 @@ struct intel_memory_region { resource_size_t total; resource_size_t avail; - unsigned int type; - unsigned int instance; - unsigned int id; + u16 type; + u16 instance; + enum intel_region_id id; char name[8]; + struct list_head reserved; + dma_addr_t remap_addr; struct { @@ -113,6 +110,9 @@ void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, struct list_head *blocks); void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block); +int intel_memory_region_reserve(struct intel_memory_region *mem, + u64 offset, u64 size); + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c index 632b912b0bc9..f0f5c4df8dbc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_buddy.c +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -727,6 +727,53 @@ err_fini: return err; } +static int igt_buddy_alloc_limit(void *arg) +{ + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + const u64 size = U64_MAX; + int err; + + err = i915_buddy_init(&mm, size, PAGE_SIZE); + if (err) + return err; + + if (mm.max_order != I915_BUDDY_MAX_ORDER) { + pr_err("mm.max_order(%d) != %d\n", + mm.max_order, I915_BUDDY_MAX_ORDER); + err = -EINVAL; + goto out_fini; + } + + block = i915_buddy_alloc(&mm, mm.max_order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + goto out_fini; + } + + if (i915_buddy_block_order(block) != mm.max_order) { + pr_err("block order(%d) != %d\n", + i915_buddy_block_order(block), mm.max_order); + err = -EINVAL; + goto out_free; + } + + if (i915_buddy_block_size(&mm, block) != + BIT_ULL(mm.max_order) * PAGE_SIZE) { + pr_err("block size(%llu) != %llu\n", + i915_buddy_block_size(&mm, block), + BIT_ULL(mm.max_order) * PAGE_SIZE); + err = -EINVAL; + goto out_free; + } + +out_free: + i915_buddy_free(&mm, block); +out_fini: + i915_buddy_fini(&mm); + return err; +} + int i915_buddy_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -735,6 +782,7 @@ int i915_buddy_mock_selftests(void) SUBTEST(igt_buddy_alloc_pathological), SUBTEST(igt_buddy_alloc_smoke), SUBTEST(igt_buddy_alloc_range), + SUBTEST(igt_buddy_alloc_limit), }; return i915_subtests(tests, NULL); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c1adea8765a9..2e4f06eaacc1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -121,7 +121,7 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) goto err; drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &fake_ops, &lock_class); + i915_gem_object_init(obj, &fake_ops, &lock_class, 0); i915_gem_object_set_volatile(obj); @@ -130,7 +130,7 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) obj->cache_level = I915_CACHE_NONE; /* Preallocate the "backing storage" */ - if (i915_gem_object_pin_pages(obj)) + if (i915_gem_object_pin_pages_unlocked(obj)) goto err_obj; i915_gem_object_unpin_pages(obj); @@ -146,6 +146,7 @@ static int igt_ppgtt_alloc(void *arg) { struct drm_i915_private *dev_priv = arg; struct i915_ppgtt *ppgtt; + struct i915_gem_ww_ctx ww; u64 size, last, limit; int err = 0; @@ -171,6 +172,12 @@ static int igt_ppgtt_alloc(void *arg) limit = totalram_pages() << PAGE_SHIFT; limit = min(ppgtt->vm.total, limit); + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_vm_lock_objects(&ppgtt->vm, &ww); + if (err) + goto err_ppgtt_cleanup; + /* Check we can allocate the entire range */ for (size = 4096; size <= limit; size <<= 2) { struct i915_vm_pt_stash stash = {}; @@ -215,6 +222,13 @@ static int igt_ppgtt_alloc(void *arg) } err_ppgtt_cleanup: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + i915_vm_put(&ppgtt->vm); return err; } @@ -276,7 +290,7 @@ static int lowlevel_hole(struct i915_address_space *vm, GEM_BUG_ON(obj->base.size != BIT_ULL(size)); - if (i915_gem_object_pin_pages(obj)) { + if (i915_gem_object_pin_pages_unlocked(obj)) { i915_gem_object_put(obj); kfree(order); break; @@ -297,20 +311,36 @@ static int lowlevel_hole(struct i915_address_space *vm, if (vm->allocate_va_range) { struct i915_vm_pt_stash stash = {}; + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_vm_lock_objects(vm, &ww); + if (err) + goto alloc_vm_end; + err = -ENOMEM; if (i915_vm_alloc_pt_stash(vm, &stash, BIT_ULL(size))) - break; - - if (i915_vm_pin_pt_stash(vm, &stash)) { - i915_vm_free_pt_stash(vm, &stash); - break; - } + goto alloc_vm_end; - vm->allocate_va_range(vm, &stash, - addr, BIT_ULL(size)); + err = i915_vm_pin_pt_stash(vm, &stash); + if (!err) + vm->allocate_va_range(vm, &stash, + addr, BIT_ULL(size)); i915_vm_free_pt_stash(vm, &stash); +alloc_vm_end: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + if (err) + break; } mock_vma->pages = obj->mm.pages; @@ -1166,7 +1196,7 @@ static int igt_ggtt_page(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_free; @@ -1333,7 +1363,7 @@ static int igt_gtt_reserve(void *arg) goto out; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); goto out; @@ -1385,7 +1415,7 @@ static int igt_gtt_reserve(void *arg) goto out; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); goto out; @@ -1549,7 +1579,7 @@ static int igt_gtt_insert(void *arg) goto out; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); goto out; @@ -1658,7 +1688,7 @@ static int igt_gtt_insert(void *arg) goto out; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); goto out; @@ -1829,7 +1859,7 @@ static int igt_cs_tlb(void *arg) goto out_vm; } - batch = i915_gem_object_pin_map(bbe, I915_MAP_WC); + batch = i915_gem_object_pin_map_unlocked(bbe, I915_MAP_WC); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_put_bbe; @@ -1845,7 +1875,7 @@ static int igt_cs_tlb(void *arg) } /* Track the execution of each request by writing into different slot */ - batch = i915_gem_object_pin_map(act, I915_MAP_WC); + batch = i915_gem_object_pin_map_unlocked(act, I915_MAP_WC); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto out_put_act; @@ -1892,7 +1922,7 @@ static int igt_cs_tlb(void *arg) goto out_put_out; GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE); - result = i915_gem_object_pin_map(out, I915_MAP_WB); + result = i915_gem_object_pin_map_unlocked(out, I915_MAP_WB); if (IS_ERR(result)) { err = PTR_ERR(result); goto out_put_out; @@ -1908,6 +1938,7 @@ static int igt_cs_tlb(void *arg) while (!__igt_timeout(end_time, NULL)) { struct i915_vm_pt_stash stash = {}; struct i915_request *rq; + struct i915_gem_ww_ctx ww; u64 offset; offset = igt_random_offset(&prng, @@ -1926,19 +1957,30 @@ static int igt_cs_tlb(void *arg) if (err) goto end; + i915_gem_ww_ctx_init(&ww, false); +retry: + err = i915_vm_lock_objects(vm, &ww); + if (err) + goto end_ww; + err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size); if (err) - goto end; + goto end_ww; err = i915_vm_pin_pt_stash(vm, &stash); - if (err) { - i915_vm_free_pt_stash(vm, &stash); - goto end; - } - - vm->allocate_va_range(vm, &stash, offset, chunk_size); + if (!err) + vm->allocate_va_range(vm, &stash, offset, chunk_size); i915_vm_free_pt_stash(vm, &stash); +end_ww: + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + if (err) + goto end; /* Prime the TLB with the dummy pages */ for (i = 0; i < count; i++) { diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index d2a678a2497e..ee8e753d98ce 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -609,6 +609,206 @@ static int live_nop_request(void *arg) return err; } +static int __cancel_inactive(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + int err = 0; + + if (igt_spinner_init(&spin, engine->gt)) + return -ENOMEM; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_spin; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + pr_debug("%s: Cancelling inactive request\n", engine->name); + i915_request_cancel(rq, -EINTR); + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = drm_info_printer(engine->i915->drm.dev); + + pr_err("%s: Failed to cancel inactive request\n", engine->name); + intel_engine_dump(engine, &p, "%s\n", engine->name); + err = -ETIME; + goto out_rq; + } + + if (rq->fence.error != -EINTR) { + pr_err("%s: fence not cancelled (%u)\n", + engine->name, rq->fence.error); + err = -EINVAL; + } + +out_rq: + i915_request_put(rq); +out_ce: + intel_context_put(ce); +out_spin: + igt_spinner_fini(&spin); + if (err) + pr_err("%s: %s error %d\n", __func__, engine->name, err); + return err; +} + +static int __cancel_active(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + int err = 0; + + if (igt_spinner_init(&spin, engine->gt)) + return -ENOMEM; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_spin; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + pr_debug("%s: Cancelling active request\n", engine->name); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin, rq)) { + struct drm_printer p = drm_info_printer(engine->i915->drm.dev); + + pr_err("Failed to start spinner on %s\n", engine->name); + intel_engine_dump(engine, &p, "%s\n", engine->name); + err = -ETIME; + goto out_rq; + } + i915_request_cancel(rq, -EINTR); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = drm_info_printer(engine->i915->drm.dev); + + pr_err("%s: Failed to cancel active request\n", engine->name); + intel_engine_dump(engine, &p, "%s\n", engine->name); + err = -ETIME; + goto out_rq; + } + + if (rq->fence.error != -EINTR) { + pr_err("%s: fence not cancelled (%u)\n", + engine->name, rq->fence.error); + err = -EINVAL; + } + +out_rq: + i915_request_put(rq); +out_ce: + intel_context_put(ce); +out_spin: + igt_spinner_fini(&spin); + if (err) + pr_err("%s: %s error %d\n", __func__, engine->name, err); + return err; +} + +static int __cancel_completed(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + int err = 0; + + if (igt_spinner_init(&spin, engine->gt)) + return -ENOMEM; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_spin; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + igt_spinner_end(&spin); + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto out_rq; + } + + pr_debug("%s: Cancelling completed request\n", engine->name); + i915_request_cancel(rq, -EINTR); + if (rq->fence.error) { + pr_err("%s: fence not cancelled (%u)\n", + engine->name, rq->fence.error); + err = -EINVAL; + } + +out_rq: + i915_request_put(rq); +out_ce: + intel_context_put(ce); +out_spin: + igt_spinner_fini(&spin); + if (err) + pr_err("%s: %s error %d\n", __func__, engine->name, err); + return err; +} + +static int live_cancel_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + + /* + * Check cancellation of requests. We expect to be able to immediately + * cancel active requests, even if they are currently on the GPU. + */ + + for_each_uabi_engine(engine, i915) { + struct igt_live_test t; + int err, err2; + + if (!intel_engine_has_preemption(engine)) + continue; + + err = igt_live_test_begin(&t, i915, __func__, engine->name); + if (err) + return err; + + err = __cancel_inactive(engine); + if (err == 0) + err = __cancel_active(engine); + if (err == 0) + err = __cancel_completed(engine); + + err2 = igt_live_test_end(&t); + if (err) + return err; + if (err2) + return err2; + } + + return 0; +} + static struct i915_vma *empty_batch(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj; @@ -620,7 +820,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) if (IS_ERR(obj)) return ERR_CAST(obj); - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto err; @@ -782,7 +982,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (err) goto err; - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); goto err; @@ -817,7 +1017,7 @@ static int recursive_batch_resolve(struct i915_vma *batch) { u32 *cmd; - cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + cmd = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); if (IS_ERR(cmd)) return PTR_ERR(cmd); @@ -1070,8 +1270,8 @@ out_request: if (!request[idx]) break; - cmd = i915_gem_object_pin_map(request[idx]->batch->obj, - I915_MAP_WC); + cmd = i915_gem_object_pin_map_unlocked(request[idx]->batch->obj, + I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; @@ -1486,6 +1686,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_sequential_engines), SUBTEST(live_parallel_engines), SUBTEST(live_empty_request), + SUBTEST(live_cancel_request), SUBTEST(live_breadcrumbs_smoketest), }; diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 83f6e5f31fb3..cfbbe415b57c 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -12,8 +12,6 @@ int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt) { - unsigned int mode; - void *vaddr; int err; memset(spin, 0, sizeof(*spin)); @@ -24,6 +22,7 @@ int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt) err = PTR_ERR(spin->hws); goto err; } + i915_gem_object_set_cache_coherency(spin->hws, I915_CACHE_LLC); spin->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(spin->obj)) { @@ -31,34 +30,83 @@ int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt) goto err_hws; } - i915_gem_object_set_cache_coherency(spin->hws, I915_CACHE_LLC); - vaddr = i915_gem_object_pin_map(spin->hws, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_obj; - } - spin->seqno = memset(vaddr, 0xff, PAGE_SIZE); - - mode = i915_coherent_map_type(gt->i915); - vaddr = i915_gem_object_pin_map(spin->obj, mode); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_unpin_hws; - } - spin->batch = vaddr; - return 0; -err_unpin_hws: - i915_gem_object_unpin_map(spin->hws); -err_obj: - i915_gem_object_put(spin->obj); err_hws: i915_gem_object_put(spin->hws); err: return err; } +static void *igt_spinner_pin_obj(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + unsigned int mode, struct i915_vma **vma) +{ + void *vaddr; + int ret; + + *vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(*vma)) + return ERR_CAST(*vma); + + ret = i915_gem_object_lock(obj, ww); + if (ret) + return ERR_PTR(ret); + + vaddr = i915_gem_object_pin_map(obj, mode); + + if (!ww) + i915_gem_object_unlock(obj); + + if (IS_ERR(vaddr)) + return vaddr; + + if (ww) + ret = i915_vma_pin_ww(*vma, ww, 0, 0, PIN_USER); + else + ret = i915_vma_pin(*vma, 0, 0, PIN_USER); + + if (ret) { + i915_gem_object_unpin_map(obj); + return ERR_PTR(ret); + } + + return vaddr; +} + +int igt_spinner_pin(struct igt_spinner *spin, + struct intel_context *ce, + struct i915_gem_ww_ctx *ww) +{ + void *vaddr; + + if (spin->ce && WARN_ON(spin->ce != ce)) + return -ENODEV; + spin->ce = ce; + + if (!spin->seqno) { + vaddr = igt_spinner_pin_obj(ce, ww, spin->hws, I915_MAP_WB, &spin->hws_vma); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + spin->seqno = memset(vaddr, 0xff, PAGE_SIZE); + } + + if (!spin->batch) { + unsigned int mode = + i915_coherent_map_type(spin->gt->i915); + + vaddr = igt_spinner_pin_obj(ce, ww, spin->obj, mode, &spin->batch_vma); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + spin->batch = vaddr; + } + + return 0; +} + static unsigned int seqno_offset(u64 fence) { return offset_in_page(sizeof(u32) * fence); @@ -103,27 +151,18 @@ igt_spinner_create_request(struct igt_spinner *spin, if (!intel_engine_can_store_dword(ce->engine)) return ERR_PTR(-ENODEV); - vma = i915_vma_instance(spin->obj, ce->vm, NULL); - if (IS_ERR(vma)) - return ERR_CAST(vma); - - hws = i915_vma_instance(spin->hws, ce->vm, NULL); - if (IS_ERR(hws)) - return ERR_CAST(hws); + if (!spin->batch) { + err = igt_spinner_pin(spin, ce, NULL); + if (err) + return ERR_PTR(err); + } - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return ERR_PTR(err); - - err = i915_vma_pin(hws, 0, 0, PIN_USER); - if (err) - goto unpin_vma; + hws = spin->hws_vma; + vma = spin->batch_vma; rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto unpin_hws; - } + if (IS_ERR(rq)) + return ERR_CAST(rq); err = move_to_active(vma, rq, 0); if (err) @@ -186,10 +225,6 @@ cancel_rq: i915_request_set_error_once(rq, err); i915_request_add(rq); } -unpin_hws: - i915_vma_unpin(hws); -unpin_vma: - i915_vma_unpin(vma); return err ? ERR_PTR(err) : rq; } @@ -203,6 +238,9 @@ hws_seqno(const struct igt_spinner *spin, const struct i915_request *rq) void igt_spinner_end(struct igt_spinner *spin) { + if (!spin->batch) + return; + *spin->batch = MI_BATCH_BUFFER_END; intel_gt_chipset_flush(spin->gt); } @@ -211,10 +249,16 @@ void igt_spinner_fini(struct igt_spinner *spin) { igt_spinner_end(spin); - i915_gem_object_unpin_map(spin->obj); + if (spin->batch) { + i915_vma_unpin(spin->batch_vma); + i915_gem_object_unpin_map(spin->obj); + } i915_gem_object_put(spin->obj); - i915_gem_object_unpin_map(spin->hws); + if (spin->seqno) { + i915_vma_unpin(spin->hws_vma); + i915_gem_object_unpin_map(spin->hws); + } i915_gem_object_put(spin->hws); } diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h index ec62c9ef320b..fbe5b1625b05 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h @@ -20,11 +20,16 @@ struct igt_spinner { struct intel_gt *gt; struct drm_i915_gem_object *hws; struct drm_i915_gem_object *obj; + struct intel_context *ce; + struct i915_vma *hws_vma, *batch_vma; u32 *batch; void *seqno; }; int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt); +int igt_spinner_pin(struct igt_spinner *spin, + struct intel_context *ce, + struct i915_gem_ww_ctx *ww); void igt_spinner_fini(struct igt_spinner *spin); struct i915_request * diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ce7adfa3bca0..a5fc0bf3feb9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -31,10 +31,12 @@ static void close_objects(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, *on; list_for_each_entry_safe(obj, on, objects, st_link) { + i915_gem_object_lock(obj, NULL); if (i915_gem_object_has_pinned_pages(obj)) i915_gem_object_unpin_pages(obj); /* No polluting the memory region between tests */ __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); list_del(&obj->st_link); i915_gem_object_put(obj); } @@ -69,7 +71,7 @@ static int igt_mock_fill(void *arg) break; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) { i915_gem_object_put(obj); break; @@ -109,7 +111,7 @@ igt_object_create(struct intel_memory_region *mem, if (IS_ERR(obj)) return obj; - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto put; @@ -123,8 +125,10 @@ put: static void igt_object_release(struct drm_i915_gem_object *obj) { + i915_gem_object_lock(obj, NULL); i915_gem_object_unpin_pages(obj); __i915_gem_object_put_pages(obj); + i915_gem_object_unlock(obj); list_del(&obj->st_link); i915_gem_object_put(obj); } @@ -144,6 +148,82 @@ static bool is_contiguous(struct drm_i915_gem_object *obj) return true; } +static int igt_mock_reserve(void *arg) +{ + struct intel_memory_region *mem = arg; + resource_size_t avail = resource_size(&mem->region); + struct drm_i915_gem_object *obj; + const u32 chunk_size = SZ_32M; + u32 i, offset, count, *order; + u64 allocated, cur_avail; + I915_RND_STATE(prng); + LIST_HEAD(objects); + int err = 0; + + if (!list_empty(&mem->reserved)) { + pr_err("%s region reserved list is not empty\n", __func__); + return -EINVAL; + } + + count = avail / chunk_size; + order = i915_random_order(count, &prng); + if (!order) + return 0; + + /* Reserve a bunch of ranges within the region */ + for (i = 0; i < count; ++i) { + u64 start = order[i] * chunk_size; + u64 size = i915_prandom_u32_max_state(chunk_size, &prng); + + /* Allow for some really big holes */ + if (!size) + continue; + + size = round_up(size, PAGE_SIZE); + offset = igt_random_offset(&prng, 0, chunk_size, size, + PAGE_SIZE); + + err = intel_memory_region_reserve(mem, start + offset, size); + if (err) { + pr_err("%s failed to reserve range", __func__); + goto out_close; + } + + /* XXX: maybe sanity check the block range here? */ + avail -= size; + } + + /* Try to see if we can allocate from the remaining space */ + allocated = 0; + cur_avail = avail; + do { + u32 size = i915_prandom_u32_max_state(cur_avail, &prng); + + size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); + obj = igt_object_create(mem, &objects, size, 0); + if (IS_ERR(obj)) { + if (PTR_ERR(obj) == -ENXIO) + break; + + err = PTR_ERR(obj); + goto out_close; + } + cur_avail -= size; + allocated += size; + } while (1); + + if (allocated != avail) { + pr_err("%s mismatch between allocation and free space", __func__); + err = -EINVAL; + } + +out_close: + kfree(order); + close_objects(mem, &objects); + i915_buddy_free_list(&mem->mm, &mem->reserved); + return err; +} + static int igt_mock_contiguous(void *arg) { struct intel_memory_region *mem = arg; @@ -433,7 +513,7 @@ static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) if (err) return err; - ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); + ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(ptr)) return PTR_ERR(ptr); @@ -538,7 +618,7 @@ static int igt_lmem_create(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_put; @@ -577,7 +657,7 @@ static int igt_lmem_write_gpu(void *arg) goto out_file; } - err = i915_gem_object_pin_pages(obj); + err = i915_gem_object_pin_pages_unlocked(obj); if (err) goto out_put; @@ -649,7 +729,7 @@ static int igt_lmem_write_cpu(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(vaddr)) { err = PTR_ERR(vaddr); goto out_put; @@ -753,7 +833,7 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, return obj; } - addr = i915_gem_object_pin_map(obj, type); + addr = i915_gem_object_pin_map_unlocked(obj, type); if (IS_ERR(addr)) { i915_gem_object_put(obj); if (PTR_ERR(addr) == -ENXIO) @@ -930,6 +1010,7 @@ static int perf_memcpy(void *arg) int intel_memory_region_mock_selftests(void) { static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_reserve), SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 3c6021415274..5d2d010a1e22 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -27,13 +27,13 @@ static int mock_object_init(struct intel_memory_region *mem, return -E2BIG; drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class); + i915_gem_object_init(obj, &mock_region_obj_ops, &lock_class, flags); obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - i915_gem_object_init_memory_region(obj, mem, flags); + i915_gem_object_init_memory_region(obj, mem); return 0; } |