diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau')
12 files changed, 168 insertions, 31 deletions
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 7840b6428afb..118807e38422 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2474,7 +2474,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, err_cleanup: if (ret) - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); done: pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h index 82b267c11147..460459af272d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h @@ -14,7 +14,7 @@ struct nvkm_event { int index_nr; spinlock_t refs_lock; - spinlock_t list_lock; + rwlock_t list_lock; int *refs; struct list_head ntfy; @@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev, int types_nr, int index_nr, struct nvkm_event *event) { spin_lock_init(&event->refs_lock); - spin_lock_init(&event->list_lock); + rwlock_init(&event->list_lock); return __nvkm_event_init(func, subdev, types_nr, index_nr, event); } diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h index 5a2f273d95c8..0e32e71e123f 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h @@ -26,6 +26,49 @@ * DEALINGS IN THE SOFTWARE. */ +/** + * msgqTxHeader -- TX queue data structure + * @version: the version of this structure, must be 0 + * @size: the size of the entire queue, including this header + * @msgSize: the padded size of queue element, 16 is minimum + * @msgCount: the number of elements in this queue + * @writePtr: head index of this queue + * @flags: 1 = swap the RX pointers + * @rxHdrOff: offset of readPtr in this structure + * @entryOff: offset of beginning of queue (msgqRxHeader), relative to + * beginning of this structure + * + * The command queue is a queue of RPCs that are sent from the driver to the + * GSP. The status queue is a queue of messages/responses from GSP-RM to the + * driver. Although the driver allocates memory for both queues, the command + * queue is owned by the driver and the status queue is owned by GSP-RM. In + * addition, the headers of the two queues must not share the same 4K page. + * + * Each queue is prefixed with this data structure. The idea is that a queue + * and its header are written to only by their owner. That is, only the + * driver writes to the command queue and command queue header, and only the + * GSP writes to the status (receive) queue and its header. + * + * This is enforced by the concept of "swapping" the RX pointers. This is + * why the 'flags' field must be set to 1. 'rxHdrOff' is how the GSP knows + * where the where the tail pointer of its status queue. + * + * When the driver writes a new RPC to the command queue, it updates writePtr. + * When it reads a new message from the status queue, it updates readPtr. In + * this way, the GSP knows when a new command is in the queue (it polls + * writePtr) and it knows how much free space is in the status queue (it + * checks readPtr). The driver never cares about how much free space is in + * the status queue. + * + * As usual, producers write to the head pointer, and consumers read from the + * tail pointer. When head == tail, the queue is empty. + * + * So to summarize: + * command.writePtr = head of command queue + * command.readPtr = tail of status queue + * status.writePtr = head of status queue + * status.readPtr = tail of command queue + */ typedef struct { NvU32 version; // queue version @@ -38,6 +81,14 @@ typedef struct NvU32 entryOff; // Offset of entries from start of backing store. } msgqTxHeader; +/** + * msgqRxHeader - RX queue data structure + * @readPtr: tail index of the other queue + * + * Although this is a separate struct, it could easily be merged into + * msgqTxHeader. msgqTxHeader.rxHdrOff is simply the offset of readPtr + * from the beginning of msgqTxHeader. + */ typedef struct { NvU32 readPtr; // message id of last message read diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h index 754c6af42f30..10121218f4d3 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h @@ -38,7 +38,7 @@ typedef struct PACKED_REGISTRY_TABLE { NvU32 size; NvU32 numEntries; - PACKED_REGISTRY_ENTRY entries[0]; + PACKED_REGISTRY_ENTRY entries[] __counted_by(numEntries); } PACKED_REGISTRY_TABLE; #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0f3bd187ede6..280d1d9a559b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -318,8 +318,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; - if (pi < 0) - pi = i; + /* pick the last one as it will be smallest. */ + pi = i; + /* Stop once the buffer is larger than the current page size. */ if (*size >= 1ULL << vmm->page[i].shift) break; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index d8c92521226d..f28f9a857458 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev) if (nouveau_modeset != 2) { ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp); + /* no display hw */ + if (ret == -ENODEV) { + ret = 0; + goto disp_create_err; + } if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) { nouveau_display_create_properties(dev); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c index a6c877135598..61fed7792e41 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/event.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c @@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy) static void nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_del_init(&ntfy->head); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_add_tail(&ntfy->head, &ntfy->event->ntfy); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void @@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) return; nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id); - spin_lock_irqsave(&event->list_lock, flags); + read_lock_irqsave(&event->list_lock, flags); list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) { if (ntfy->id == id && ntfy->bits & bits) { @@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) } } - spin_unlock_irqrestore(&event->list_lock, flags); + read_unlock_irqrestore(&event->list_lock, flags); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c index e4279f1772a1..377d0e0cef84 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c @@ -385,7 +385,7 @@ nvkm_uoutp_mthd_inherit(struct nvkm_outp *outp, void *argv, u32 argc) /* Ensure an ior is hooked up to this outp already */ ior = outp->func->inherit(outp); - if (!ior) + if (!ior || !ior->arm.head) return -ENODEV; /* With iors, there will be a separate output path for each type of connector - and all of diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c index 3adbb05ff587..d088e636edc3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c @@ -539,7 +539,7 @@ r535_fifo_runl_ctor(struct nvkm_fifo *fifo) struct nvkm_runl *runl; struct nvkm_engn *engn; u32 cgids = 2048; - u32 chids = 2048 / CHID_PER_USERD; + u32 chids = 2048; int ret; NV2080_CTRL_FIFO_GET_DEVICE_INFO_TABLE_PARAMS *ctrl; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index e31f9641114b..44fb86841c05 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -365,10 +365,8 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc) } ret = r535_gsp_cmdq_push(gsp, rpc); - if (ret) { - mutex_unlock(&gsp->cmdq.mutex); + if (ret) return ERR_PTR(ret); - } if (wait) { msg = r535_gsp_msg_recv(gsp, fn, repc); @@ -689,8 +687,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc) struct nvfw_gsp_rpc *rpc; rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64))); - if (!rpc) - return NULL; + if (IS_ERR(rpc)) + return ERR_CAST(rpc); rpc->header_version = 0x03000000; rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V'; @@ -1048,7 +1046,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) char *strings; int str_offset; int i; - size_t rpc_size = sizeof(*rpc) + sizeof(rpc->entries[0]) * NV_GSP_REG_NUM_ENTRIES; + size_t rpc_size = struct_size(rpc, entries, NV_GSP_REG_NUM_ENTRIES); /* add strings + null terminator */ for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) @@ -1159,7 +1157,7 @@ static void r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode, MUX_METHOD_DATA_ELEMENT *part) { - acpi_handle iter = NULL, handle_mux; + acpi_handle iter = NULL, handle_mux = NULL; acpi_status status; unsigned long long value; @@ -1379,6 +1377,13 @@ r535_gsp_msg_post_event(void *priv, u32 fn, void *repv, u32 repc) return 0; } +/** + * r535_gsp_msg_run_cpu_sequencer() -- process I/O commands from the GSP + * + * The GSP sequencer is a list of I/O commands that the GSP can send to + * the driver to perform for various purposes. The most common usage is to + * perform a special mid-initialization reset. + */ static int r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc) { @@ -1718,6 +1723,23 @@ r535_gsp_libos_id8(const char *name) return id; } +/** + * create_pte_array() - creates a PTE array of a physically contiguous buffer + * @ptes: pointer to the array + * @addr: base address of physically contiguous buffer (GSP_PAGE_SIZE aligned) + * @size: size of the buffer + * + * GSP-RM sometimes expects physically-contiguous buffers to have an array of + * "PTEs" for each page in that buffer. Although in theory that allows for + * the buffer to be physically discontiguous, GSP-RM does not currently + * support that. + * + * In this case, the PTEs are DMA addresses of each page of the buffer. Since + * the buffer is physically contiguous, calculating all the PTEs is simple + * math. + * + * See memdescGetPhysAddrsForGpu() + */ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) { unsigned int num_pages = DIV_ROUND_UP_ULL(size, GSP_PAGE_SIZE); @@ -1727,6 +1749,35 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) ptes[i] = (u64)addr + (i << GSP_PAGE_SHIFT); } +/** + * r535_gsp_libos_init() -- create the libos arguments structure + * + * The logging buffers are byte queues that contain encoded printf-like + * messages from GSP-RM. They need to be decoded by a special application + * that can parse the buffers. + * + * The 'loginit' buffer contains logs from early GSP-RM init and + * exception dumps. The 'logrm' buffer contains the subsequent logs. Both are + * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. + * + * The physical address map for the log buffer is stored in the buffer + * itself, starting with offset 1. Offset 0 contains the "put" pointer. + * + * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is + * configured for a larger page size (e.g. 64K pages), we need to give + * the GSP an array of 4K pages. Fortunately, since the buffer is + * physically contiguous, it's simple math to calculate the addresses. + * + * The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently + * ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the + * buffers to be physically contiguous anyway. + * + * The memory allocated for the arguments must remain until the GSP sends the + * init_done RPC. + * + * See _kgspInitLibosLoggingStructures (allocates memory for buffers) + * See kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) + */ static int r535_gsp_libos_init(struct nvkm_gsp *gsp) { @@ -1837,6 +1888,35 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]); } +/** + * nvkm_gsp_radix3_sg - build a radix3 table from a S/G list + * + * The GSP uses a three-level page table, called radix3, to map the firmware. + * Each 64-bit "pointer" in the table is either the bus address of an entry in + * the next table (for levels 0 and 1) or the bus address of the next page in + * the GSP firmware image itself. + * + * Level 0 contains a single entry in one page that points to the first page + * of level 1. + * + * Level 1, since it's also only one page in size, contains up to 512 entries, + * one for each page in Level 2. + * + * Level 2 can be up to 512 pages in size, and each of those entries points to + * the next page of the firmware image. Since there can be up to 512*512 + * pages, that limits the size of the firmware to 512*512*GSP_PAGE_SIZE = 1GB. + * + * Internally, the GSP has its window into system memory, but the base + * physical address of the aperture is not 0. In fact, it varies depending on + * the GPU architecture. Since the GPU is a PCI device, this window is + * accessed via DMA and is therefore bound by IOMMU translation. The end + * result is that GSP-RM must translate the bus addresses in the table to GSP + * physical addresses. All this should happen transparently. + * + * Returns 0 on success, or negative error code + * + * See kgspCreateRadix3_IMPL + */ static int nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size, struct nvkm_gsp_radix3 *rx3) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 1b811d6972a1..201022ae9214 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -49,14 +49,14 @@ #include <subdev/mmu.h> struct gk20a_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nvkm_mm_node *mn; struct gk20a_instmem *imem; /* CPU mapping */ u32 *vaddr; }; -#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) +#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory) /* * Used for objects allocated using the DMA API @@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) list_del(&obj->vaddr_node); vunmap(obj->base.vaddr); obj->base.vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); + imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, { struct gk20a_instobj *node = gk20a_instobj(memory); struct nvkm_vmm_map map = { - .memory = &node->memory, + .memory = &node->base.memory, .offset = offset, .mem = node->mn, }; @@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, return -ENOMEM; *_node = &node->base; - nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, @@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, *_node = &node->base; node->dma_addrs = (void *)(node->pages + npages); - nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; /* Allocate backing memory */ for (i = 0; i < npages; i++) { @@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, else ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT, align, &node); - *pmemory = node ? &node->memory : NULL; + *pmemory = node ? &node->base.memory : NULL; if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index e34bc6076401..8379e72d77ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -31,7 +31,7 @@ tu102_vmm_flush(struct nvkm_vmm *vmm, int depth) type |= 0x00000001; /* PAGE_ALL */ if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) - type |= 0x00000004; /* HUB_ONLY */ + type |= 0x00000006; /* HUB_ONLY | ALL PDB (hack) */ mutex_lock(&vmm->mmu->mutex); |