/* * Copyright 2021 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "runl.h" #include "cgrp.h" #include "chan.h" #include "chid.h" #include "priv.h" #include "runq.h" #include #include #include static struct nvkm_cgrp * nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags) { struct nvkm_cgrp *cgrp = NULL; struct nvkm_chan *chan; bool cgid; int id; id = engn->func->cxid(engn, &cgid); if (id < 0) return NULL; if (!cgid) { chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags); if (chan) cgrp = chan->cgrp; } else { cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags); } WARN_ON(!cgrp); return cgrp; } static void nvkm_runl_rc(struct nvkm_runl *runl) { struct nvkm_fifo *fifo = runl->fifo; struct nvkm_cgrp *cgrp, *gtmp; struct nvkm_chan *chan, *ctmp; struct nvkm_engn *engn; unsigned long flags; int rc, state, i; bool reset; /* Runlist is blocked before scheduling recovery - fetch count. */ BUG_ON(!mutex_is_locked(&runl->mutex)); rc = atomic_xchg(&runl->rc_pending, 0); if (!rc) return; /* Look for channel groups flagged for RC. */ nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) { state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING); if (state == NVKM_CGRP_RC_PENDING) { /* Disable all channels in them, and remove from runlist. */ nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp) { nvkm_chan_error(chan, false); nvkm_chan_remove_locked(chan); } } } /* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */ if (runl->func->preempt) { for (i = 0; i < runl->runq_nr; i++) { struct nvkm_runq *runq = runl->runq[i]; if (runq) { nvkm_msec(fifo->engine.subdev.device, 2000, if (runq->func->idle(runq)) break; ); } } } /* Look for engines that are still on flagged channel groups - reset them. */ nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) { cgrp = nvkm_engn_cgrp_get(engn, &flags); if (!cgrp) { ENGN_DEBUG(engn, "cxid not valid"); continue; } reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING; nvkm_cgrp_put(&cgrp, flags); if (!reset) { ENGN_DEBUG(engn, "cxid not in recovery"); continue; } ENGN_DEBUG(engn, "resetting..."); /*TODO: can we do something less of a potential catastrophe on failure? */ WARN_ON(nvkm_engine_reset(engn->engine)); } /* Submit runlist update, and clear any remaining exception state. */ runl->func->update(runl); if (runl->func->fault_clear) runl->func->fault_clear(runl); /* Unblock runlist processing. */ while (rc--) nvkm_runl_allow(runl); runl->func->wait(runl); } static void nvkm_runl_rc_runl(struct nvkm_runl *runl) { RUNL_ERROR(runl, "rc scheduled"); nvkm_runl_block(runl); if (runl->func->preempt) runl->func->preempt(runl); atomic_inc(&runl->rc_pending); schedule_work(&runl->work); } void nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp) { if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE) return; CGRP_ERROR(cgrp, "rc scheduled"); nvkm_runl_rc_runl(cgrp->runl); } void nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn) { struct nvkm_cgrp *cgrp; unsigned long flags; /* Lookup channel group currently on engine. */ cgrp = nvkm_engn_cgrp_get(engn, &flags); if (!cgrp) { ENGN_DEBUG(engn, "rc skipped, not on channel"); return; } nvkm_runl_rc_cgrp(cgrp); nvkm_cgrp_put(&cgrp, flags); } static void nvkm_runl_work(struct work_struct *work) { struct nvkm_runl *runl = container_of(work, typeof(*runl), work); mutex_lock(&runl->mutex); nvkm_runl_rc(runl); mutex_unlock(&runl->mutex); } struct nvkm_chan * nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags) { struct nvkm_chid *chid = runl->chid; struct nvkm_chan *chan; unsigned long flags; int id; spin_lock_irqsave(&chid->lock, flags); for_each_set_bit(id, chid->used, chid->nr) { chan = chid->data[id]; if (likely(chan)) { if (chan->inst->addr == inst) { spin_lock(&chan->cgrp->lock); *pirqflags = flags; spin_unlock(&chid->lock); return chan; } } } spin_unlock_irqrestore(&chid->lock, flags); return NULL; } struct nvkm_chan * nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags) { struct nvkm_chid *chid = runl->chid; struct nvkm_chan *chan; unsigned long flags; spin_lock_irqsave(&chid->lock, flags); if (!WARN_ON(id >= chid->nr)) { chan = chid->data[id]; if (likely(chan)) { spin_lock(&chan->cgrp->lock); *pirqflags = flags; spin_unlock(&chid->lock); return chan; } } spin_unlock_irqrestore(&chid->lock, flags); return NULL; } struct nvkm_cgrp * nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags) { struct nvkm_chid *cgid = runl->cgid; struct nvkm_cgrp *cgrp; unsigned long flags; spin_lock_irqsave(&cgid->lock, flags); if (!WARN_ON(id >= cgid->nr)) { cgrp = cgid->data[id]; if (likely(cgrp)) { spin_lock(&cgrp->lock); *pirqflags = flags; spin_unlock(&cgid->lock); return cgrp; } } spin_unlock_irqrestore(&cgid->lock, flags); return NULL; } int nvkm_runl_preempt_wait(struct nvkm_runl *runl) { return nvkm_msec(runl->fifo->engine.subdev.device, runl->fifo->timeout.chan_msec, if (!runl->func->preempt_pending(runl)) break; nvkm_runl_rc(runl); usleep_range(1, 2); ) < 0 ? -ETIMEDOUT : 0; } bool nvkm_runl_update_pending(struct nvkm_runl *runl) { if (!runl->func->pending(runl)) return false; nvkm_runl_rc(runl); return true; } void nvkm_runl_update_locked(struct nvkm_runl *runl, bool wait) { if (atomic_xchg(&runl->changed, 0) && runl->func->update) { runl->func->update(runl); if (wait) runl->func->wait(runl); } } void nvkm_runl_allow(struct nvkm_runl *runl) { struct nvkm_fifo *fifo = runl->fifo; unsigned long flags; spin_lock_irqsave(&fifo->lock, flags); if (!--runl->blocked) { RUNL_TRACE(runl, "running"); runl->func->allow(runl, ~0); } spin_unlock_irqrestore(&fifo->lock, flags); } void nvkm_runl_block(struct nvkm_runl *runl) { struct nvkm_fifo *fifo = runl->fifo; unsigned long flags; spin_lock_irqsave(&fifo->lock, flags); if (!runl->blocked++) { RUNL_TRACE(runl, "stopped"); runl->func->block(runl, ~0); } spin_unlock_irqrestore(&fifo->lock, flags); } void nvkm_runl_fini(struct nvkm_runl *runl) { if (runl->func->fini) runl->func->fini(runl); flush_work(&runl->work); } void nvkm_runl_del(struct nvkm_runl *runl) { struct nvkm_engn *engn, *engt; nvkm_memory_unref(&runl->mem); list_for_each_entry_safe(engn, engt, &runl->engns, head) { list_del(&engn->head); kfree(engn); } nvkm_chid_unref(&runl->chid); nvkm_chid_unref(&runl->cgid); list_del(&runl->head); mutex_destroy(&runl->mutex); kfree(runl); } struct nvkm_engn * nvkm_runl_add(struct nvkm_runl *runl, int engi, const struct nvkm_engn_func *func, enum nvkm_subdev_type type, int inst) { struct nvkm_fifo *fifo = runl->fifo; struct nvkm_device *device = fifo->engine.subdev.device; struct nvkm_engine *engine; struct nvkm_engn *engn; engine = nvkm_device_engine(device, type, inst); if (!engine) { RUNL_DEBUG(runl, "engn %d.%d[%s] not found", engi, inst, nvkm_subdev_type[type]); return NULL; } if (!(engn = kzalloc(sizeof(*engn), GFP_KERNEL))) return NULL; engn->func = func; engn->runl = runl; engn->id = engi; engn->engine = engine; engn->fault = -1; list_add_tail(&engn->head, &runl->engns); /* Lookup MMU engine ID for fault handling. */ if (device->top) engn->fault = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst); if (engn->fault < 0 && fifo->func->mmu_fault) { const struct nvkm_enum *map = fifo->func->mmu_fault->engine; while (map->name) { if (map->data2 == engine->subdev.type && map->inst == engine->subdev.inst) { engn->fault = map->value; break; } map++; } } return engn; } struct nvkm_runl * nvkm_runl_get(struct nvkm_fifo *fifo, int runi, u32 addr) { struct nvkm_runl *runl; nvkm_runl_foreach(runl, fifo) { if ((runi >= 0 && runl->id == runi) || (runi < 0 && runl->addr == addr)) return runl; } return NULL; } struct nvkm_runl * nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr) { struct nvkm_subdev *subdev = &fifo->engine.subdev; struct nvkm_runl *runl; int ret; if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL))) return ERR_PTR(-ENOMEM); runl->func = fifo->func->runl; runl->fifo = fifo; runl->id = runi; runl->addr = addr; INIT_LIST_HEAD(&runl->engns); INIT_LIST_HEAD(&runl->cgrps); atomic_set(&runl->changed, 0); mutex_init(&runl->mutex); INIT_WORK(&runl->work, nvkm_runl_work); atomic_set(&runl->rc_triggered, 0); atomic_set(&runl->rc_pending, 0); list_add_tail(&runl->head, &fifo->runls); if (!fifo->chid) { if ((ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->cgid)) || (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) { RUNL_ERROR(runl, "cgid/chid: %d", ret); nvkm_runl_del(runl); return ERR_PTR(ret); } } else { runl->cgid = nvkm_chid_ref(fifo->cgid); runl->chid = nvkm_chid_ref(fifo->chid); } return runl; }