summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c8
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c71
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.h5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c16
-rw-r--r--drivers/gpu/drm/i915/i915_sysfs.c10
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c7
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c10
-rw-r--r--drivers/gpu/drm/i915/intel_hangcheck.c4
9 files changed, 109 insertions, 24 deletions
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index c8454ac43fae..db6b94dda5df 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -471,6 +471,7 @@ struct parser_exec_state {
* used when ret from 2nd level batch buffer
*/
int saved_buf_addr_type;
+ bool is_ctx_wa;
struct cmd_info *info;
@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
bb->accessing = true;
bb->bb_start_cmd_va = s->ip_va;
+ if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
+ bb->bb_offset = s->ip_va - s->rb_va;
+ else
+ bb->bb_offset = 0;
+
/*
* ip_va saves the virtual address of the shadow batch buffer, while
* ip_gma saves the graphics address of the original batch buffer.
@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload)
s.ring_tail = gma_tail;
s.rb_va = workload->shadow_ring_buffer_va;
s.workload = workload;
+ s.is_ctx_wa = false;
if ((bypass_scan_mask & (1 << workload->ring_id)) ||
gma_head == gma_tail)
@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
s.ring_tail = gma_tail;
s.rb_va = wa_ctx->indirect_ctx.shadow_va;
s.workload = workload;
+ s.is_ctx_wa = true;
if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 256f1bb522b7..152df3d0291e 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -394,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
* performace for batch mmio read/write, so we need
* handle forcewake mannually.
*/
+ intel_runtime_pm_get(dev_priv);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
switch_mmio(pre, next, ring_id);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ intel_runtime_pm_put(dev_priv);
}
/**
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index b55b3580ca1d..d74d6f05c62c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
pdp_pair[i].val = pdp[7 - i];
}
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+ u32 *reg_state, bool save)
+{
+ struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+ u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+ u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+ int i = 0;
+ u32 flex_mmio[] = {
+ i915_mmio_reg_offset(EU_PERF_CNTL0),
+ i915_mmio_reg_offset(EU_PERF_CNTL1),
+ i915_mmio_reg_offset(EU_PERF_CNTL2),
+ i915_mmio_reg_offset(EU_PERF_CNTL3),
+ i915_mmio_reg_offset(EU_PERF_CNTL4),
+ i915_mmio_reg_offset(EU_PERF_CNTL5),
+ i915_mmio_reg_offset(EU_PERF_CNTL6),
+ };
+
+ if (!workload || !reg_state || workload->ring_id != RCS)
+ return;
+
+ if (save) {
+ workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+ for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+ u32 state_offset = ctx_flexeu0 + i * 2;
+
+ workload->flex_mmio[i] = reg_state[state_offset + 1];
+ }
+ } else {
+ reg_state[ctx_oactxctrl] =
+ i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+ reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+ for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+ u32 state_offset = ctx_flexeu0 + i * 2;
+ u32 mmio = flex_mmio[i];
+
+ reg_state[state_offset] = mmio;
+ reg_state[state_offset + 1] = workload->flex_mmio[i];
+ }
+ }
+}
+
static int populate_shadow_context(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
shadow_ring_context = kmap(page);
+ sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
#define COPY_REG(name) \
intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
sizeof(*shadow_ring_context),
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
+ sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
kunmap(page);
return 0;
}
@@ -376,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
goto err;
}
+ /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+ * is only updated into ring_scan_buffer, not real ring address
+ * allocated in later copy_workload_to_ring_buffer. pls be noted
+ * shadow_ring_buffer_va is now pointed to real ring buffer va
+ * in copy_workload_to_ring_buffer.
+ */
+
+ if (bb->bb_offset)
+ bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+ + bb->bb_offset;
+
/* relocate shadow batch buffer */
bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
if (gmadr_bytes == 8)
@@ -1044,10 +1105,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
- s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
- sizeof(struct intel_vgpu_workload), 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
+ s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+ sizeof(struct intel_vgpu_workload), 0,
+ SLAB_HWCACHE_ALIGN,
+ offsetof(struct intel_vgpu_workload, rb_tail),
+ sizeof_field(struct intel_vgpu_workload, rb_tail),
+ NULL);
if (!s->workloads) {
ret = -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index ff175a98b19e..a79a4f60637e 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
/* shadow batch buffer */
struct list_head shadow_bb;
struct intel_shadow_wa_ctx wa_ctx;
+
+ /* oa registers */
+ u32 oactxctrl;
+ u32 flex_mmio[7];
};
struct intel_vgpu_shadow_bb {
@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb {
u32 *bb_start_cmd_va;
unsigned int clflush;
bool accessing;
+ unsigned long bb_offset;
};
#define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 66ee9d888d16..6ff5d655c202 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -434,20 +434,28 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
dma_fence_put(shared[i]);
kfree(shared);
+ /*
+ * If both shared fences and an exclusive fence exist,
+ * then by construction the shared fences must be later
+ * than the exclusive fence. If we successfully wait for
+ * all the shared fences, we know that the exclusive fence
+ * must all be signaled. If all the shared fences are
+ * signaled, we can prune the array and recover the
+ * floating references on the fences/requests.
+ */
prune_fences = count && timeout >= 0;
} else {
excl = reservation_object_get_excl_rcu(resv);
}
- if (excl && timeout >= 0) {
+ if (excl && timeout >= 0)
timeout = i915_gem_object_wait_fence(excl, flags, timeout,
rps_client);
- prune_fences = timeout >= 0;
- }
dma_fence_put(excl);
- /* Oportunistically prune the fences iff we know they have *all* been
+ /*
+ * Opportunistically prune the fences iff we know they have *all* been
* signaled and that the reservation object has not been changed (i.e.
* no new fences have been added).
*/
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index b33d2158c234..e5e6f6bb2b05 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -304,8 +304,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
{
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
struct intel_rps *rps = &dev_priv->gt_pm.rps;
- u32 val;
+ bool boost = false;
ssize_t ret;
+ u32 val;
ret = kstrtou32(buf, 0, &val);
if (ret)
@@ -317,8 +318,13 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
return -EINVAL;
mutex_lock(&dev_priv->pcu_lock);
- rps->boost_freq = val;
+ if (val != rps->boost_freq) {
+ rps->boost_freq = val;
+ boost = atomic_read(&rps->num_waiters);
+ }
mutex_unlock(&dev_priv->pcu_lock);
+ if (boost)
+ schedule_work(&rps->work);
return count;
}
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index f51645a08dca..6aff9d096e13 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2175,8 +2175,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
intel_prepare_dp_ddi_buffers(encoder, crtc_state);
intel_ddi_init_dp_buf_reg(encoder);
- if (!is_mst)
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
intel_dp_start_link_train(intel_dp);
if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
intel_dp_stop_link_train(intel_dp);
@@ -2274,14 +2273,12 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
struct intel_dp *intel_dp = &dig_port->dp;
- bool is_mst = intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST);
/*
* Power down sink before disabling the port, otherwise we end
* up getting interrupts from the sink on detecting link loss.
*/
- if (!is_mst)
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
intel_disable_ddi_buf(encoder);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 35c5299feab6..a29868cd30c7 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -620,19 +620,15 @@ static int
bxt_power_sequencer_idx(struct intel_dp *intel_dp)
{
struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
+ int backlight_controller = dev_priv->vbt.backlight.controller;
lockdep_assert_held(&dev_priv->pps_mutex);
/* We should never land here with regular DP ports */
WARN_ON(!intel_dp_is_edp(intel_dp));
- /*
- * TODO: BXT has 2 PPS instances. The correct port->PPS instance
- * mapping needs to be retrieved from VBT, for now just hard-code to
- * use instance #0 always.
- */
if (!intel_dp->pps_reset)
- return 0;
+ return backlight_controller;
intel_dp->pps_reset = false;
@@ -642,7 +638,7 @@ bxt_power_sequencer_idx(struct intel_dp *intel_dp)
*/
intel_dp_init_panel_power_sequencer_registers(intel_dp, false);
- return 0;
+ return backlight_controller;
}
typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 348a4f7ffb67..53747318f4a7 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -246,7 +246,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
*/
tmp = I915_READ_CTL(engine);
if (tmp & RING_WAIT) {
- i915_handle_error(dev_priv, 0,
+ i915_handle_error(dev_priv, BIT(engine->id),
"Kicking stuck wait on %s",
engine->name);
I915_WRITE_CTL(engine, tmp);
@@ -258,7 +258,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
default:
return ENGINE_DEAD;
case 1:
- i915_handle_error(dev_priv, 0,
+ i915_handle_error(dev_priv, ALL_ENGINES,
"Kicking stuck semaphore on %s",
engine->name);
I915_WRITE_CTL(engine, tmp);