diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c')
| -rw-r--r-- | drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 665 |
1 files changed, 424 insertions, 241 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index bc77eea351a5..cb390e0134a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -26,6 +26,7 @@ #include <subdev/fb.h> #include <subdev/mc.h> #include <subdev/timer.h> +#include <engine/fifo.h> /******************************************************************************* * PGRAPH context register lists @@ -850,12 +851,17 @@ gf100_grctx_init_gcc_0[] = { }; const struct gf100_gr_pack -gf100_grctx_pack_gpc[] = { +gf100_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf100_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gf100_grctx_pack_gpc_1[] = { { gf100_grctx_init_crstr_0 }, { gf100_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -985,187 +991,117 @@ gf100_grctx_pack_tpc[] = { * PGRAPH context implementation ******************************************************************************/ -int -gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, u32 access) +void +gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data) { - if (info->data) { - info->buffer[info->buffer_nr] = round_up(info->addr, align); - info->addr = info->buffer[info->buffer_nr] + size; - info->data->size = size; - info->data->align = align; - info->data->access = access; - info->data++; - return info->buffer_nr++; + if (unlikely(!chan->mmio)) { + nvkm_wr32(chan->gr->base.engine.subdev.device, addr, data); + return; } - return -1; + + nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr); + nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data); } void -gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data, - int shift, int buffer) +gf100_grctx_generate_r419cb8(struct gf100_gr *gr) { - struct nvkm_device *device = info->gr->base.engine.subdev.device; - if (info->data) { - if (shift >= 0) { - info->mmio->addr = addr; - info->mmio->data = data; - info->mmio->shift = shift; - info->mmio->buffer = buffer; - if (buffer >= 0) - data |= info->buffer[buffer] >> shift; - info->mmio++; - } else - return; - } else { - if (buffer >= 0) - return; - } - - nvkm_wr32(device, addr, data); + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419cb8, 0x00007c00, 0x00000000); } void -gf100_grctx_generate_bundle(struct gf100_grctx *info) +gf100_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; - const int s = 8; - const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access); - mmio_refn(info, 0x408004, 0x00000000, s, b); - mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s)); - mmio_refn(info, 0x418808, 0x00000000, s, b); - mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s)); + gf100_grctx_patch_wr32(chan, 0x408004, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408008, 0x80000000 | (size >> 8)); + gf100_grctx_patch_wr32(chan, 0x418808, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x41880c, 0x80000000 | (size >> 8)); } void -gf100_grctx_generate_pagepool(struct gf100_grctx *info) +gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr) { - const struct gf100_grctx_func *grctx = info->gr->func->grctx; - const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS; - const int s = 8; - const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access); - mmio_refn(info, 0x40800c, 0x00000000, s, b); - mmio_wr32(info, 0x408010, 0x80000000); - mmio_refn(info, 0x419004, 0x00000000, s, b); - mmio_wr32(info, 0x419008, 0x00000000); + gf100_grctx_patch_wr32(chan, 0x40800c, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x408010, 0x80000000); + gf100_grctx_patch_wr32(chan, 0x419004, addr >> 8); + gf100_grctx_patch_wr32(chan, 0x419008, 0x00000000); } void -gf100_grctx_generate_attrib(struct gf100_grctx *info) +gf100_grctx_generate_attrib(struct gf100_gr_chan *chan) { - struct gf100_gr *gr = info->gr; + struct gf100_gr *gr = chan->gr; const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 attrib = grctx->attrib_nr; - const u32 size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 access = NV_MEM_ACCESS_RW; - const int s = 12; - const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access); int gpc, tpc; u32 bo = 0; - mmio_refn(info, 0x418810, 0x80000000, s, b); - mmio_refn(info, 0x419848, 0x10000000, s, b); - mmio_wr32(info, 0x405830, (attrib << 16)); + gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16)); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { const u32 o = TPC_UNIT(gpc, tpc, 0x0520); - mmio_skip(info, o, (attrib << 16) | ++bo); - mmio_wr32(info, o, (attrib << 16) | --bo); + + gf100_grctx_patch_wr32(chan, o, (attrib << 16) | bo); bo += grctx->attrib_nr_max; } } } void -gf100_grctx_generate_unkn(struct gf100_gr *gr) +gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size) { + gf100_grctx_patch_wr32(chan, 0x418810, 0x80000000 | addr >> 12); + gf100_grctx_patch_wr32(chan, 0x419848, 0x10000000 | addr >> 12); } -void -gf100_grctx_generate_tpcid(struct gf100_gr *gr) +u32 +gf100_grctx_generate_attrib_cb_size(struct gf100_gr *gr) { - struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; - - for (tpc = 0, id = 0; tpc < 4; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } + const struct gf100_grctx_func *grctx = gr->func->grctx; - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); - } - } + return 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max) * gr->tpc_total; } void -gf100_grctx_generate_r406028(struct gf100_gr *gr) +gf100_grctx_generate_unkn(struct gf100_gr *gr) { - struct nvkm_device *device = gr->base.engine.subdev.device; - u32 tmp[GPC_MAX / 8] = {}, i = 0; - for (i = 0; i < gr->gpc_nr; i++) - tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4); - for (i = 0; i < 4; i++) { - nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]); - nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]); - } } void gf100_grctx_generate_r4060a8(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u8 tpcnr[GPC_MAX], data[TPC_MAX]; - int gpc, tpc, i; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - memset(data, 0x1f, sizeof(data)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - data[tpc] = gpc; + const u8 gpcmax = nvkm_rd32(device, 0x022430); + const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax; + int i, j, sm = 0; + u32 data; + + for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) { + for (data = 0, j = 0; j < 4; j++) { + if (sm < gr->sm_nr) + data |= gr->sm[sm++].gpc << (j * 8); + else + data |= 0x1f << (j * 8); + } + nvkm_wr32(device, 0x4060a8 + (i * 4), data); } - - for (i = 0; i < 4; i++) - nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]); } void -gf100_grctx_generate_r418bb8(struct gf100_gr *gr) +gf100_grctx_generate_rop_mapping(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; u32 data[6] = {}, data2[2] = {}; - u8 tpcnr[GPC_MAX]; u8 shift, ntpcv; - int gpc, tpc, i; - - /* calculate first set of magics */ - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); + int i; - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - - data[tpc / 6] |= gpc << ((tpc % 6) * 5); - } + /* Pack tile map into register format. */ + for (i = 0; i < 32; i++) + data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5); - for (; tpc < 32; tpc++) - data[tpc / 6] |= 7 << ((tpc % 6) * 5); - - /* and the second... */ + /* Magic. */ shift = 0; ntpcv = gr->tpc_total; while (!(ntpcv & (1 << 4))) { @@ -1200,152 +1136,375 @@ gf100_grctx_generate_r418bb8(struct gf100_gr *gr) } void -gf100_grctx_generate_r406800(struct gf100_gr *gr) +gf100_grctx_generate_max_ways_evict(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u64 tpc_mask = 0, tpc_set = 0; - u8 tpcnr[GPC_MAX]; - int gpc, tpc; - int i, a, b; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8); - - for (i = 0, gpc = -1, b = -1; i < 32; i++) { - a = (i * (gr->tpc_total - 1)) / 32; - if (a != b) { - b = a; - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - tpc_set |= 1ULL << ((gpc * 8) + tpc); + u32 fbps = nvkm_rd32(device, 0x121c74); + if (fbps == 1) + nvkm_mask(device, 0x17e91c, 0x001f0000, 0x00090000); +} + +static const u32 +gf100_grctx_alpha_beta_map[17][32] = { + [1] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + [2] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + //XXX: 3 + [4] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + }, + //XXX: 5 + //XXX: 6 + [7] = { + 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, + }, + [8] = { + 1, 1, 1, + 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, + 7, 7, 7, + }, + //XXX: 9 + //XXX: 10 + [11] = { + 1, 1, + 2, 2, 2, 2, + 3, 3, 3, + 4, 4, 4, 4, + 5, 5, 5, + 6, 6, 6, + 7, 7, 7, 7, + 8, 8, 8, + 9, 9, 9, 9, + 10, 10, + }, + //XXX: 12 + //XXX: 13 + [14] = { + 1, 1, + 2, 2, + 3, 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + }, + [15] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, 9, + 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + 14, 14, + }, + [16] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, + 12, 12, + 13, 13, + 14, 14, + 15, 15, + }, +}; + +void +gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + int i, gpc; + + for (i = 0; i < 32; i++) { + u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i]; + u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0; + + if (!atarget) { + nvkm_warn(subdev, "missing alpha/beta mapping table\n"); + atarget = max_t(u32, gr->tpc_total * i / 32, 1); + } + + while (atarget) { + for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) { + if (abits[gpc] < gr->tpc_nr[gpc]) { + abits[gpc]++; + atarget--; + } + } } - nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask)); - if (gr->gpc_nr > 4) { - nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask)); + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + u32 bbits = gr->tpc_nr[gpc] - abits[gpc]; + amask |= ((1 << abits[gpc]) - 1) << (gpc * 8); + bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8); } + + nvkm_wr32(device, 0x406800 + (i * 0x20), amask); + nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask); + } +} + +void +gf100_grctx_generate_tpc_nr(struct gf100_gr *gr, int gpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); +} + +void +gf100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); +} + +void +gf100_grctx_generate_floorsweep(struct gf100_gr *gr) +{ + const struct gf100_grctx_func *func = gr->func->grctx; + int sm; + + for (sm = 0; sm < gr->sm_nr; sm++) { + func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm); + if (func->tpc_nr) + func->tpc_nr(gr, gr->sm[sm].gpc); } + + gf100_gr_init_num_tpc_per_gpc(gr, false, true); + if (!func->skip_pd_num_tpc_per_gpc) + gf100_gr_init_num_tpc_per_gpc(gr, true, false); + + if (func->r4060a8) + func->r4060a8(gr); + + func->rop_mapping(gr); + + if (func->alpha_beta_tables) + func->alpha_beta_tables(gr); + if (func->max_ways_evict) + func->max_ways_evict(gr); + if (func->dist_skip_table) + func->dist_skip_table(gr); + if (func->r406500) + func->r406500(gr); + if (func->gpc_tpc_nr) + func->gpc_tpc_nr(gr); + if (func->r419f78) + func->r419f78(gr); + if (func->tpc_mask) + func->tpc_mask(gr); + if (func->smid_config) + func->smid_config(gr); } void -gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +gf100_grctx_generate_main(struct gf100_gr_chan *chan) { + struct gf100_gr *gr = chan->gr; struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; nvkm_mc_unk260(device, 0); - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); + if (!gr->sw_ctx) { + gf100_gr_mmio(gr, grctx->hub); + gf100_gr_mmio(gr, grctx->gpc_0); + gf100_gr_mmio(gr, grctx->zcull); + gf100_gr_mmio(gr, grctx->gpc_1); + gf100_gr_mmio(gr, grctx->tpc); + gf100_gr_mmio(gr, grctx->ppc); + } else { + gf100_gr_mmio(gr, gr->sw_ctx); + } + + if (gr->func->init_419bd8) + gr->func->init_419bd8(gr); + if (grctx->r419ea8) + grctx->r419ea8(gr); + + gf100_gr_wait_idle(gr); idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); + grctx->pagepool(chan, chan->pagepool->addr); + grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size); + grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr)); + grctx->attrib(chan); + if (grctx->patch_ltc) + grctx->patch_ltc(chan); + if (grctx->unknown_size) + grctx->unknown(chan, chan->unknown->addr, grctx->unknown_size); grctx->unkn(gr); - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gf100_grctx_generate_r4060a8(gr); - gf100_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); + gf100_grctx_generate_floorsweep(gr); + + gf100_gr_wait_idle(gr); + + if (grctx->r400088) grctx->r400088(gr, false); + + if (gr->bundle) + gf100_gr_icmd(gr, gr->bundle); + else + gf100_gr_icmd(gr, grctx->icmd); + + if (gr->bundle_veid) + gf100_gr_icmd(gr, gr->bundle_veid); + else + gf100_gr_icmd(gr, grctx->sw_veid_bundle_init); + + if (gr->bundle64) + gf100_gr_icmd(gr, gr->bundle64); + else + if (grctx->sw_bundle64_init) + gf100_gr_icmd(gr, grctx->sw_bundle64_init); + + if (grctx->r400088) grctx->r400088(gr, true); - gf100_gr_icmd(gr, grctx->icmd); nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); + + if (gr->method) + gf100_gr_mthd(gr, gr->method); + else + gf100_gr_mthd(gr, grctx->mthd); nvkm_mc_unk260(device, 1); + + if (grctx->r419cb8) + grctx->r419cb8(gr); + if (grctx->r418800) + grctx->r418800(gr); + if (grctx->r419eb0) + grctx->r419eb0(gr); + if (grctx->r419e00) + grctx->r419e00(gr); + if (grctx->r418e94) + grctx->r418e94(gr); + if (grctx->r419a3c) + grctx->r419a3c(gr); + if (grctx->r408840) + grctx->r408840(gr); + if (grctx->r419c0c) + grctx->r419c0c(gr); + + gf100_gr_wait_idle(gr); } +#define CB_RESERVED 0x80000 + int -gf100_grctx_generate(struct gf100_gr *gr) +gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvkm_gpuobj *inst) { const struct gf100_grctx_func *grctx = gr->func->grctx; struct nvkm_subdev *subdev = &gr->base.engine.subdev; struct nvkm_device *device = subdev->device; - struct nvkm_memory *chan; - struct gf100_grctx info; + struct nvkm_memory *data = NULL; + struct nvkm_vma *ctx = NULL; int ret, i; u64 addr; - /* allocate memory to for a "channel", which we'll use to generate - * the default context values - */ - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x80000 + gr->size, - 0x1000, true, &chan); - if (ret) { - nvkm_error(subdev, "failed to allocate chan memory, %d\n", ret); - return ret; - } - - addr = nvkm_memory_addr(chan); + /* NV_PGRAPH_FE_PWR_MODE_FORCE_ON. */ + nvkm_wr32(device, 0x404170, 0x00000012); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x404170) & 0x00000010)) + break; + ); - /* PGD pointer */ - nvkm_kmap(chan); - nvkm_wo32(chan, 0x0200, lower_32_bits(addr + 0x1000)); - nvkm_wo32(chan, 0x0204, upper_32_bits(addr + 0x1000)); - nvkm_wo32(chan, 0x0208, 0xffffffff); - nvkm_wo32(chan, 0x020c, 0x000000ff); + if (grctx->unkn88c) + grctx->unkn88c(gr, true); - /* PGT[0] pointer */ - nvkm_wo32(chan, 0x1000, 0x00000000); - nvkm_wo32(chan, 0x1004, 0x00000001 | (addr + 0x2000) >> 8); + /* Reset FECS. */ + gr->func->fecs.reset(gr); - /* identity-map the whole "channel" into its own vm */ - for (i = 0; i < nvkm_memory_size(chan) / 4096; i++) { - u64 addr = ((nvkm_memory_addr(chan) + (i * 4096)) >> 8) | 1; - nvkm_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr)); - nvkm_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr)); - } + if (grctx->unkn88c) + grctx->unkn88c(gr, false); - /* context pointer (virt) */ - nvkm_wo32(chan, 0x0210, 0x00080004); - nvkm_wo32(chan, 0x0214, 0x00000000); - nvkm_done(chan); - - nvkm_wr32(device, 0x100cb8, (addr + 0x1000) >> 8); - nvkm_wr32(device, 0x100cbc, 0x80000001); + /* NV_PGRAPH_FE_PWR_MODE_AUTO. */ + nvkm_wr32(device, 0x404170, 0x00000010); nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x100c80) & 0x00008000) + if (!(nvkm_rd32(device, 0x404170) & 0x00000010)) break; ); - /* setup default state for mmio list construction */ - info.gr = gr; - info.data = gr->mmio_data; - info.mmio = gr->mmio_list; - info.addr = 0x2000 + (i * 8); - info.buffer_nr = 0; + /* Init SCC RAM. */ + nvkm_wr32(device, 0x40802c, 0x00000001); - /* make channel current */ - if (gr->firmware) { - nvkm_wr32(device, 0x409840, 0x00000030); - nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12); - nvkm_wr32(device, 0x409504, 0x00000003); - nvkm_msec(device, 2000, - if (nvkm_rd32(device, 0x409800) & 0x00000010) - break; - ); + /* Allocate memory to store context, and dummy global context buffers. */ + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, + CB_RESERVED + gr->size, 0, true, &data); + if (ret) + goto done; - nvkm_kmap(chan); - nvkm_wo32(chan, 0x8001c, 1); - nvkm_wo32(chan, 0x80020, 0); - nvkm_wo32(chan, 0x80028, 0); - nvkm_wo32(chan, 0x8002c, 0); - nvkm_done(chan); + ret = nvkm_vmm_get(chan->vmm, 0, nvkm_memory_size(data), &ctx); + if (ret) + goto done; + + ret = nvkm_memory_map(data, 0, chan->vmm, ctx, NULL, 0); + if (ret) + goto done; + + /* Setup context pointer. */ + nvkm_kmap(inst); + nvkm_wo32(inst, 0x0210, lower_32_bits(ctx->addr + CB_RESERVED) | 4); + nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED)); + nvkm_done(inst); + + /* Make channel current. */ + addr = inst->addr >> 12; + if (gr->firmware) { + ret = gf100_gr_fecs_bind_pointer(gr, 0x80000000 | addr); + if (ret) + goto done_inst; + + nvkm_kmap(data); + nvkm_wo32(data, 0x1c, 1); + nvkm_wo32(data, 0x20, 0); + nvkm_wo32(data, 0x28, 0); + nvkm_wo32(data, 0x2c, 0); + nvkm_done(data); } else { nvkm_wr32(device, 0x409840, 0x80000000); - nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12); + nvkm_wr32(device, 0x409500, 0x80000000 | addr); nvkm_wr32(device, 0x409504, 0x00000001); nvkm_msec(device, 2000, if (nvkm_rd32(device, 0x409800) & 0x80000000) @@ -1353,34 +1512,48 @@ gf100_grctx_generate(struct gf100_gr *gr) ); } - grctx->main(gr, &info); + grctx->main(chan); - /* trigger a context unload by unsetting the "next channel valid" bit - * and faking a context switch interrupt - */ - nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000); - nvkm_wr32(device, 0x409000, 0x00000100); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x409b00) & 0x80000000)) - break; - ) < 0) { - ret = -EBUSY; - goto done; + if (!gr->firmware) { + /* Trigger a context unload by unsetting the "next channel valid" bit + * and faking a context switch interrupt. + */ + nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000); + nvkm_wr32(device, 0x409000, 0x00000100); + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x409b00) & 0x80000000)) + break; + ) < 0) { + ret = -EBUSY; + goto done_inst; + } + } else { + ret = gf100_gr_fecs_wfi_golden_save(gr, 0x80000000 | addr); + if (ret) + goto done_inst; + + nvkm_mask(device, 0x409b00, 0x80000000, 0x00000000); } gr->data = kmalloc(gr->size, GFP_KERNEL); if (gr->data) { - nvkm_kmap(chan); + nvkm_kmap(data); for (i = 0; i < gr->size; i += 4) - gr->data[i / 4] = nvkm_ro32(chan, 0x80000 + i); - nvkm_done(chan); + gr->data[i / 4] = nvkm_ro32(data, CB_RESERVED + i); + nvkm_done(data); ret = 0; } else { ret = -ENOMEM; } +done_inst: + nvkm_kmap(inst); + nvkm_wo32(inst, 0x0210, 0); + nvkm_wo32(inst, 0x0214, 0); + nvkm_done(inst); done: - nvkm_memory_del(&chan); + nvkm_vmm_put(chan->vmm, &ctx); + nvkm_memory_unref(&data); return ret; } @@ -1389,7 +1562,8 @@ gf100_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf100_grctx_pack_gpc, + .gpc_0 = gf100_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf100_grctx_pack_tpc, .icmd = gf100_grctx_pack_icmd, @@ -1398,7 +1572,16 @@ gf100_grctx = { .bundle_size = 0x1800, .pagepool = gf100_grctx_generate_pagepool, .pagepool_size = 0x8000, + .attrib_cb_size = gf100_grctx_generate_attrib_cb_size, + .attrib_cb = gf100_grctx_generate_attrib_cb, .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; |
