1 files changed, 340 insertions, 169 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 397ff4fe9df8..3ea447f6a45b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -67,7 +67,7 @@ gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_color_min; i <= ltc->zbc_color_max; i++) {
 		if (gr->zbc_color[i].format) {
 			if (gr->zbc_color[i].format != format)
 				continue;
@@ -114,7 +114,7 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) {
 		if (gr->zbc_depth[i].format) {
 			if (gr->zbc_depth[i].format != format)
 				continue;
@@ -355,15 +355,14 @@ static void *
 gf100_gr_chan_dtor(struct nvkm_object *object)
 {
 	struct gf100_gr_chan *chan = gf100_gr_chan(object);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
-		nvkm_vmm_put(chan->vmm, &chan->data[i].vma);
-		nvkm_memory_unref(&chan->data[i].mem);
-	}
 
 	nvkm_vmm_put(chan->vmm, &chan->mmio_vma);
 	nvkm_memory_unref(&chan->mmio);
+
+	nvkm_vmm_put(chan->vmm, &chan->attrib_cb);
+	nvkm_vmm_put(chan->vmm, &chan->unknown);
+	nvkm_vmm_put(chan->vmm, &chan->bundle_cb);
+	nvkm_vmm_put(chan->vmm, &chan->pagepool);
 	nvkm_vmm_unref(&chan->vmm);
 	return chan;
 }
@@ -375,17 +374,15 @@ gf100_gr_chan = {
 };
 
 static int
-gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		  const struct nvkm_oclass *oclass,
 		  struct nvkm_object **pobject)
 {
 	struct gf100_gr *gr = gf100_gr(base);
-	struct gf100_gr_data *data = gr->mmio_data;
-	struct gf100_gr_mmio *mmio = gr->mmio_list;
 	struct gf100_gr_chan *chan;
 	struct gf100_vmm_map_v0 args = { .priv = 1 };
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int ret, i;
+	int ret;
 
 	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
 		return -ENOMEM;
@@ -394,63 +391,92 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 	chan->vmm = nvkm_vmm_ref(fifoch->vmm);
 	*pobject = &chan->object;
 
-	/* allocate memory for a "mmio list" buffer that's used by the HUB
-	 * fuc to modify some per-context register settings on first load
-	 * of the context.
-	 */
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
-			      false, &chan->mmio);
+	/* Map pagepool. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->pagepool), &chan->pagepool);
 	if (ret)
 		return ret;
 
-	ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma);
+	ret = nvkm_memory_map(gr->pagepool, 0, chan->vmm, chan->pagepool, &args, sizeof(args));
 	if (ret)
 		return ret;
 
-	ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm,
-			      chan->mmio_vma, &args, sizeof(args));
+	/* Map bundle circular buffer. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->bundle_cb), &chan->bundle_cb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_map(gr->bundle_cb, 0, chan->vmm, chan->bundle_cb, &args, sizeof(args));
+	if (ret)
+		return ret;
+
+	/* Map attribute circular buffer. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->attrib_cb), &chan->attrib_cb);
 	if (ret)
 		return ret;
 
-	/* allocate buffers referenced by mmio list */
-	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
-		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-				      data->size, data->align, false,
-				      &chan->data[i].mem);
+	if (device->card_type < GP100) {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, NULL, 0);
 		if (ret)
 			return ret;
-
-		ret = nvkm_vmm_get(fifoch->vmm, 12,
-				   nvkm_memory_size(chan->data[i].mem),
-				   &chan->data[i].vma);
+	} else {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
+				      &args, sizeof(args));
 		if (ret)
 			return ret;
+	}
 
-		args.priv = data->priv;
+	/* Map some context buffer of unknown purpose. */
+	if (gr->func->grctx->unknown_size) {
+		ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown);
+		if (ret)
+			return ret;
 
-		ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm,
-				      chan->data[i].vma, &args, sizeof(args));
+		ret = nvkm_memory_map(gr->unknown, 0, chan->vmm, chan->unknown,
+				      &args, sizeof(args));
 		if (ret)
 			return ret;
+	}
 
-		data++;
+	/* Generate golden context image. */
+	mutex_lock(&gr->fecs.mutex);
+	if (gr->data == NULL) {
+		ret = gf100_grctx_generate(gr, chan, fifoch->inst);
+		if (ret) {
+			nvkm_error(&base->engine.subdev, "failed to construct context\n");
+			mutex_unlock(&gr->fecs.mutex);
+			return ret;
+		}
 	}
+	mutex_unlock(&gr->fecs.mutex);
 
-	/* finally, fill in the mmio list and point the context at it */
-	nvkm_kmap(chan->mmio);
-	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
-		u32 addr = mmio->addr;
-		u32 data = mmio->data;
+	/* allocate memory for a "mmio list" buffer that's used by the HUB
+	 * fuc to modify some per-context register settings on first load
+	 * of the context.
+	 */
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
+			      false, &chan->mmio);
+	if (ret)
+		return ret;
 
-		if (mmio->buffer >= 0) {
-			u64 info = chan->data[mmio->buffer].vma->addr;
-			data |= info >> mmio->shift;
-		}
+	ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma);
+	if (ret)
+		return ret;
 
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
-		mmio++;
-	}
+	ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm,
+			      chan->mmio_vma, &args, sizeof(args));
+	if (ret)
+		return ret;
+
+	/* finally, fill in the mmio list and point the context at it */
+	nvkm_kmap(chan->mmio);
+	gr->func->grctx->pagepool(chan, chan->pagepool->addr);
+	gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size);
+	gr->func->grctx->attrib_cb(chan, chan->attrib_cb->addr, gr->func->grctx->attrib_cb_size(gr));
+	gr->func->grctx->attrib(chan);
+	if (gr->func->grctx->patch_ltc)
+		gr->func->grctx->patch_ltc(chan);
+	if (gr->func->grctx->unknown_size)
+		gr->func->grctx->unknown(chan, chan->unknown->addr, gr->func->grctx->unknown_size);
 	nvkm_done(chan->mmio);
 	return 0;
 }
@@ -727,7 +753,7 @@ gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
 	nvkm_wr32(device, 0x409804, 0xffffffff);
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0xffffffff);
 	nvkm_wr32(device, 0x409504, mthd);
 	nvkm_msec(device, 2000,
@@ -771,12 +797,45 @@ gf100_gr_fecs_stop_ctxsw(struct nvkm_gr *base)
 	return ret;
 }
 
+static int
+gf100_gr_fecs_halt_pipeline(struct gf100_gr *gr)
+{
+	int ret = 0;
+
+	if (gr->firmware) {
+		mutex_lock(&gr->fecs.mutex);
+		ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x04);
+		mutex_unlock(&gr->fecs.mutex);
+	}
+
+	return ret;
+}
+
+int
+gf100_gr_fecs_wfi_golden_save(struct gf100_gr *gr, u32 inst)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_mask(device, 0x409800, 0x00000003, 0x00000000);
+	nvkm_wr32(device, 0x409500, inst);
+	nvkm_wr32(device, 0x409504, 0x00000009);
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x409800);
+		if (stat & 0x00000002)
+			return -EIO;
+		if (stat & 0x00000001)
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
 int
 gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0x00000030);
+	nvkm_mask(device, 0x409800, 0x00000030, 0x00000000);
 	nvkm_wr32(device, 0x409500, inst);
 	nvkm_wr32(device, 0x409504, 0x00000003);
 	nvkm_msec(device, 2000,
@@ -867,7 +926,7 @@ gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000025);
 	nvkm_msec(device, 2000,
@@ -883,7 +942,7 @@ gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000016);
 	nvkm_msec(device, 2000,
@@ -899,7 +958,7 @@ gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000010);
 	nvkm_msec(device, 2000,
@@ -915,7 +974,7 @@ gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, timeout);
 	nvkm_wr32(device, 0x409504, 0x00000021);
 }
@@ -955,7 +1014,7 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
 			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
-	int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min;
+	int index, c = ltc->zbc_color_min, d = ltc->zbc_depth_min, s = ltc->zbc_depth_min;
 
 	if (!gr->zbc_color[0].format) {
 		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]); c++;
@@ -971,18 +1030,18 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 		}
 	}
 
-	for (index = c; index <= ltc->zbc_max; index++)
+	for (index = c; index <= ltc->zbc_color_max; index++)
 		gr->func->zbc->clear_color(gr, index);
-	for (index = d; index <= ltc->zbc_max; index++)
+	for (index = d; index <= ltc->zbc_depth_max; index++)
 		gr->func->zbc->clear_depth(gr, index);
 
 	if (gr->func->zbc->clear_stencil) {
-		for (index = s; index <= ltc->zbc_max; index++)
+		for (index = s; index <= ltc->zbc_depth_max; index++)
 			gr->func->zbc->clear_stencil(gr, index);
 	}
 }
 
-/**
+/*
  * Wait until GR goes idle. GR is considered idle if it is disabled by the
  * MC (0x200) register, or GR is not busy and a context switch is not in
  * progress.
@@ -1003,7 +1062,7 @@ gf100_gr_wait_idle(struct gf100_gr *gr)
 		nvkm_rd32(device, 0x400700);
 
 		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
-		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
+		ctxsw_active = nvkm_fifo_ctxsw_in_progress(&gr->base.engine);
 		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
 
 		if (!gr_enabled || (!gr_busy && !ctxsw_active))
@@ -1039,7 +1098,7 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
-	u32 data = 0;
+	u64 data = 0;
 
 	nvkm_wr32(device, 0x400208, 0x80000000);
 
@@ -1049,6 +1108,8 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 
 		if ((pack == p && init == p->init) || data != init->data) {
 			nvkm_wr32(device, 0x400204, init->data);
+			if (pack->type == 64)
+				nvkm_wr32(device, 0x40020c, upper_32_bits(init->data));
 			data = init->data;
 		}
 
@@ -1542,13 +1603,13 @@ gf100_gr_ctxctl_isr(struct gf100_gr *gr)
 	}
 }
 
-static void
-gf100_gr_intr(struct nvkm_gr *base)
+static irqreturn_t
+gf100_gr_intr(struct nvkm_inth *inth)
 {
-	struct gf100_gr *gr = gf100_gr(base);
+	struct gf100_gr *gr = container_of(inth, typeof(*gr), base.engine.subdev.inth);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
 	u32 stat = nvkm_rd32(device, 0x400100);
@@ -1561,10 +1622,10 @@ gf100_gr_intr(struct nvkm_gr *base)
 	const char *name = "unknown";
 	int chid = -1;
 
-	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(&gr->base.engine, (u64)inst << 12, &flags);
 	if (chan) {
-		name = chan->object.client->name;
-		chid = chan->chid;
+		name = chan->name;
+		chid = chan->id;
 	}
 
 	if (device->card_type < NV_E0 || subc < 4)
@@ -1631,7 +1692,8 @@ gf100_gr_intr(struct nvkm_gr *base)
 	}
 
 	nvkm_wr32(device, 0x400500, 0x00010001);
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
+	return IRQ_HANDLED;
 }
 
 static void
@@ -1721,7 +1783,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 	nvkm_mc_unk260(device, 1);
 
 	/* start both of them running */
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x41a10c, 0x00000000);
 	nvkm_wr32(device, 0x40910c, 0x00000000);
 
@@ -1763,15 +1825,6 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 			return ret;
 	}
 
-	/* Generate golden context image. */
-	if (gr->data == NULL) {
-		int ret = gf100_grctx_generate(gr);
-		if (ret) {
-			nvkm_error(subdev, "failed to construct context\n");
-			return ret;
-		}
-	}
-
 	return 0;
 }
 
@@ -1823,14 +1876,6 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
 	}
 
 	gr->size = nvkm_rd32(device, 0x409804);
-	if (gr->data == NULL) {
-		int ret = gf100_grctx_generate(gr);
-		if (ret) {
-			nvkm_error(subdev, "failed to construct context\n");
-			return ret;
-		}
-	}
-
 	return 0;
 }
 
@@ -1847,10 +1892,11 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	return ret;
 }
 
-void
+int
 gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
 {
 	int tpc, gpc;
+
 	for (tpc = 0; tpc < gr->tpc_max; tpc++) {
 		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 			if (tpc < gr->tpc_nr[gpc]) {
@@ -1860,6 +1906,8 @@ gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
 			}
 		}
 	}
+
+	return 0;
 }
 
 void
@@ -1922,8 +1970,8 @@ gf100_gr_oneinit_tiles(struct gf100_gr *gr)
 
 	for (i = 0; i < gr->gpc_nr; i++) {
 		init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
-		 init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
-		  run_err[i] = init_frac[i] + init_err[i];
+		init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
+		run_err[i] = init_frac[i] + init_err[i];
 	}
 
 	for (i = 0; i < gr->tpc_total;) {
@@ -1944,7 +1992,17 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 	struct gf100_gr *gr = gf100_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int i, j;
+	struct nvkm_intr *intr = &device->mc->intr;
+	enum nvkm_intr_type intr_type = NVKM_INTR_SUBDEV;
+	int ret, i, j;
+
+	if (gr->func->oneinit_intr)
+		intr = gr->func->oneinit_intr(gr, &intr_type);
+
+	ret = nvkm_inth_add(intr, intr_type, NVKM_INTR_PRIO_NORMAL, &gr->base.engine.subdev,
+			    gf100_gr_intr, &gr->base.engine.subdev.inth);
+	if (ret)
+		return ret;
 
 	nvkm_pmu_pgob(device->pmu, false);
 
@@ -1954,12 +2012,14 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
 		gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]);
 		gr->tpc_total += gr->tpc_nr[i];
-		gr->ppc_nr[i]  = gr->func->ppc_nr;
-		for (j = 0; j < gr->ppc_nr[i]; j++) {
+		for (j = 0; j < gr->func->ppc_nr; j++) {
 			gr->ppc_tpc_mask[i][j] =
 				nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
 			if (gr->ppc_tpc_mask[i][j] == 0)
 				continue;
+
+			gr->ppc_nr[i]++;
+
 			gr->ppc_mask[i] |= (1 << j);
 			gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
 			if (gr->ppc_tpc_min == 0 ||
@@ -1968,12 +2028,37 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 			if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j])
 				gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j];
 		}
+
+		gr->ppc_total += gr->ppc_nr[i];
+	}
+
+	/* Allocate global context buffers. */
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST_SR_LOST,
+			      gr->func->grctx->pagepool_size, 0x100, false, &gr->pagepool);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST_SR_LOST, gr->func->grctx->bundle_size,
+			      0x100, false, &gr->bundle_cb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST_SR_LOST,
+			      gr->func->grctx->attrib_cb_size(gr), 0x1000, false, &gr->attrib_cb);
+	if (ret)
+		return ret;
+
+	if (gr->func->grctx->unknown_size) {
+		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size,
+				      0x100, false, &gr->unknown);
+		if (ret)
+			return ret;
 	}
 
 	memset(gr->tile, 0xff, sizeof(gr->tile));
 	gr->func->oneinit_tiles(gr);
-	gr->func->oneinit_sm_id(gr);
-	return 0;
+
+	return gr->func->oneinit_sm_id(gr);
 }
 
 static int
@@ -1983,7 +2068,7 @@ gf100_gr_init_(struct nvkm_gr *base)
 	struct nvkm_subdev *subdev = &base->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	bool reset = device->chipset == 0x137 || device->chipset == 0x138;
-	u32 ret;
+	int ret;
 
 	/* On certain GP107/GP108 boards, we trigger a weird issue where
 	 * GR will stop responding to PRI accesses after we've asked the
@@ -2019,7 +2104,12 @@ gf100_gr_init_(struct nvkm_gr *base)
 	if (ret)
 		return ret;
 
-	return gr->func->init(gr);
+	ret = gr->func->init(gr);
+	if (ret)
+		return ret;
+
+	nvkm_inth_allow(&subdev->inth);
+	return 0;
 }
 
 static int
@@ -2027,6 +2117,9 @@ gf100_gr_fini(struct nvkm_gr *base, bool suspend)
 {
 	struct gf100_gr *gr = gf100_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+
+	nvkm_inth_block(&subdev->inth);
+
 	nvkm_falcon_put(&gr->gpccs.falcon, subdev);
 	nvkm_falcon_put(&gr->fecs.falcon, subdev);
 	return 0;
@@ -2039,6 +2132,11 @@ gf100_gr_dtor(struct nvkm_gr *base)
 
 	kfree(gr->data);
 
+	nvkm_memory_unref(&gr->unknown);
+	nvkm_memory_unref(&gr->attrib_cb);
+	nvkm_memory_unref(&gr->bundle_cb);
+	nvkm_memory_unref(&gr->pagepool);
+
 	nvkm_falcon_dtor(&gr->gpccs.falcon);
 	nvkm_falcon_dtor(&gr->fecs.falcon);
 
@@ -2047,81 +2145,27 @@ gf100_gr_dtor(struct nvkm_gr *base)
 	nvkm_blob_dtor(&gr->gpccs.inst);
 	nvkm_blob_dtor(&gr->gpccs.data);
 
+	vfree(gr->bundle64);
+	vfree(gr->bundle_veid);
 	vfree(gr->bundle);
 	vfree(gr->method);
 	vfree(gr->sw_ctx);
 	vfree(gr->sw_nonctx);
+	vfree(gr->sw_nonctx1);
+	vfree(gr->sw_nonctx2);
+	vfree(gr->sw_nonctx3);
+	vfree(gr->sw_nonctx4);
 
 	return gr;
 }
 
-static const struct nvkm_gr_func
-gf100_gr_ = {
-	.dtor = gf100_gr_dtor,
-	.oneinit = gf100_gr_oneinit,
-	.init = gf100_gr_init_,
-	.fini = gf100_gr_fini,
-	.intr = gf100_gr_intr,
-	.units = gf100_gr_units,
-	.chan_new = gf100_gr_chan_new,
-	.object_get = gf100_gr_object_get,
-	.chsw_load = gf100_gr_chsw_load,
-	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
-	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
-	.ctxsw.inst = gf100_gr_ctxsw_inst,
-};
-
 static const struct nvkm_falcon_func
 gf100_gr_flcn = {
-	.fbif = 0x600,
 	.load_imem = nvkm_falcon_v1_load_imem,
 	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
 	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
 };
 
-int
-gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device,
-	      enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
-{
-	struct gf100_gr *gr;
-	int ret;
-
-	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
-		return -ENOMEM;
-	*pgr = &gr->base;
-
-	ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base);
-	if (ret)
-		return ret;
-
-	fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr);
-	if (IS_ERR(fwif))
-		return PTR_ERR(fwif);
-
-	gr->func = fwif->func;
-
-	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
-			       "fecs", 0x409000, &gr->fecs.falcon);
-	if (ret)
-		return ret;
-
-	mutex_init(&gr->fecs.mutex);
-
-	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
-			       "gpccs", 0x41a000, &gr->gpccs.falcon);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 void
 gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *gr, bool pd, bool ds)
 {
@@ -2146,6 +2190,29 @@ gf100_gr_init_400054(struct gf100_gr *gr)
 }
 
 void
+gf100_gr_init_exception2(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+}
+
+void
+gf100_gr_init_rop_exceptions(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int rop;
+
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+}
+
+void
 gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -2252,21 +2319,47 @@ gf100_gr_init_vsc_stream_master(struct gf100_gr *gr)
 	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
 }
 
+static int
+gf100_gr_reset(struct nvkm_gr *base)
+{
+	struct nvkm_subdev *subdev = &base->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gf100_gr *gr = gf100_gr(base);
+
+	nvkm_mask(device, 0x400500, 0x00000001, 0x00000000);
+
+	WARN_ON(gf100_gr_fecs_halt_pipeline(gr));
+
+	subdev->func->fini(subdev, false);
+	nvkm_mc_disable(device, subdev->type, subdev->inst);
+	if (gr->func->gpccs.reset)
+		gr->func->gpccs.reset(gr);
+
+	nvkm_mc_enable(device, subdev->type, subdev->inst);
+	return subdev->func->init(subdev);
+}
+
 int
 gf100_gr_init(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int gpc, tpc, rop;
+	int gpc, tpc;
 
-	if (gr->func->init_419bd8)
-		gr->func->init_419bd8(gr);
+	nvkm_mask(device, 0x400500, 0x00010001, 0x00000000);
 
 	gr->func->init_gpc_mmu(gr);
 
-	if (gr->sw_nonctx)
+	if (gr->sw_nonctx1) {
+		gf100_gr_mmio(gr, gr->sw_nonctx1);
+		gf100_gr_mmio(gr, gr->sw_nonctx2);
+		gf100_gr_mmio(gr, gr->sw_nonctx3);
+		gf100_gr_mmio(gr, gr->sw_nonctx4);
+	} else
+	if (gr->sw_nonctx) {
 		gf100_gr_mmio(gr, gr->sw_nonctx);
-	else
+	} else {
 		gf100_gr_mmio(gr, gr->func->mmio);
+	}
 
 	gf100_gr_wait_idle(gr);
 
@@ -2298,6 +2391,10 @@ gf100_gr_init(struct gf100_gr *gr)
 	nvkm_wr32(device, 0x400124, 0x00000002);
 
 	gr->func->init_fecs_exceptions(gr);
+
+	if (gr->func->init_40a790)
+		gr->func->init_40a790(gr);
+
 	if (gr->func->init_ds_hww_esr_2)
 		gr->func->init_ds_hww_esr_2(gr);
 
@@ -2346,19 +2443,14 @@ gf100_gr_init(struct gf100_gr *gr)
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
+	gr->func->init_rop_exceptions(gr);
 
 	nvkm_wr32(device, 0x400108, 0xffffffff);
 	nvkm_wr32(device, 0x400138, 0xffffffff);
 	nvkm_wr32(device, 0x400118, 0xffffffff);
 	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
+	if (gr->func->init_exception2)
+		gr->func->init_exception2(gr);
 
 	if (gr->func->init_400054)
 		gr->func->init_400054(gr);
@@ -2371,6 +2463,18 @@ gf100_gr_init(struct gf100_gr *gr)
 	return gf100_gr_init_ctxctl(gr);
 }
 
+void
+gf100_gr_fecs_reset(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409614, 0x00000070);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_rd32(device, 0x409614);
+}
+
 #include "fuc/hubgf100.fuc3.h"
 
 struct gf100_gr_ucode
@@ -2391,6 +2495,34 @@ gf100_gr_gpccs_ucode = {
 	.data.size = sizeof(gf100_grgpc_data),
 };
 
+static int
+gf100_gr_nonstall(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+
+	if (gr->func->nonstall)
+		return gr->func->nonstall(gr);
+
+	return -EINVAL;
+}
+
+static const struct nvkm_gr_func
+gf100_gr_ = {
+	.dtor = gf100_gr_dtor,
+	.oneinit = gf100_gr_oneinit,
+	.init = gf100_gr_init_,
+	.fini = gf100_gr_fini,
+	.nonstall = gf100_gr_nonstall,
+	.reset = gf100_gr_reset,
+	.units = gf100_gr_units,
+	.chan_new = gf100_gr_chan_new,
+	.object_get = gf100_gr_object_get,
+	.chsw_load = gf100_gr_chsw_load,
+	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
+	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
+	.ctxsw.inst = gf100_gr_ctxsw_inst,
+};
+
 static const struct gf100_gr_func
 gf100_gr = {
 	.oneinit_tiles = gf100_gr_oneinit_tiles,
@@ -2406,10 +2538,13 @@ gf100_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf100_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf100_grctx,
@@ -2483,6 +2618,42 @@ gf100_gr_fwif[] = {
 };
 
 int
+gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device,
+	      enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
+{
+	struct gf100_gr *gr;
+	int ret;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+
+	ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base);
+	if (ret)
+		return ret;
+
+	fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr);
+	if (IS_ERR(fwif))
+		return PTR_ERR(fwif);
+
+	gr->func = fwif->func;
+
+	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
+			       "fecs", 0x409000, &gr->fecs.falcon);
+	if (ret)
+		return ret;
+
+	mutex_init(&gr->fecs.mutex);
+
+	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
+			       "gpccs", 0x41a000, &gr->gpccs.falcon);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int
 gf100_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
 {
 	return gf100_gr_new_(gf100_gr_fwif, device, type, inst, pgr);