diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 357 |
1 files changed, 198 insertions, 159 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6a27b000a246..22925df6a791 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -22,7 +22,6 @@ */ #include <linux/device.h> -#include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/file.h> @@ -36,7 +35,6 @@ #include <linux/mman.h> #include <linux/ptrace.h> #include <linux/dma-buf.h> -#include <linux/fdtable.h> #include <linux/processor.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" @@ -63,8 +61,10 @@ static const struct file_operations kfd_fops = { }; static int kfd_char_dev_major = -1; -static struct class *kfd_class; struct device *kfd_device; +static const struct class kfd_class = { + .name = kfd_dev_name, +}; static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) { @@ -94,14 +94,13 @@ int kfd_chardev_init(void) if (err < 0) goto err_register_chrdev; - kfd_class = class_create(kfd_dev_name); - err = PTR_ERR(kfd_class); - if (IS_ERR(kfd_class)) + err = class_register(&kfd_class); + if (err) goto err_class_create; - kfd_device = device_create(kfd_class, NULL, - MKDEV(kfd_char_dev_major, 0), - NULL, kfd_dev_name); + kfd_device = device_create(&kfd_class, NULL, + MKDEV(kfd_char_dev_major, 0), + NULL, kfd_dev_name); err = PTR_ERR(kfd_device); if (IS_ERR(kfd_device)) goto err_device_create; @@ -109,7 +108,7 @@ int kfd_chardev_init(void) return 0; err_device_create: - class_destroy(kfd_class); + class_unregister(&kfd_class); err_class_create: unregister_chrdev(kfd_char_dev_major, kfd_dev_name); err_register_chrdev: @@ -118,8 +117,8 @@ err_register_chrdev: void kfd_chardev_exit(void) { - device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); - class_destroy(kfd_class); + device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0)); + class_unregister(&kfd_class); unregister_chrdev(kfd_char_dev_major, kfd_dev_name); kfd_device = NULL; } @@ -155,8 +154,8 @@ static int kfd_open(struct inode *inode, struct file *filep) /* filep now owns the reference returned by kfd_create_process */ filep->private_data = process; - dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", - process->pasid, process->is_32bit_user_mode); + dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n", + process->lead_thread->pid, process->is_32bit_user_mode); return 0; } @@ -212,6 +211,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return -EINVAL; } + if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { + args->ring_size = KFD_MIN_QUEUE_RING_SIZE; + pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); + } + if (!access_ok((const void __user *) args->read_pointer_address, sizeof(uint32_t))) { pr_err("Can't access read pointer\n"); @@ -246,14 +250,15 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; - q_properties->read_ptr = (uint32_t *) args->read_pointer_address; - q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + q_properties->read_ptr = (void __user *)args->read_pointer_address; + q_properties->write_ptr = (void __user *)args->write_pointer_address; q_properties->eop_ring_buffer_address = args->eop_buffer_address; q_properties->eop_ring_buffer_size = args->eop_buffer_size; q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; q_properties->ctl_stack_size = args->ctl_stack_size; + q_properties->sdma_engine_id = args->sdma_engine_id; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -261,6 +266,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_SDMA; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) + q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; else return -ENOTSUPP; @@ -305,7 +312,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, struct kfd_process_device *pdd; struct queue_properties q_properties; uint32_t doorbell_offset_in_process = 0; - struct amdgpu_bo *wptr_bo = NULL; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -333,54 +339,37 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } - if (!pdd->doorbell_index && - kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) { - err = -ENOMEM; - goto err_alloc_doorbells; - } - - /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work - * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) - */ - if (dev->kfd->shared_resources.enable_mes && - ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) - >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { - struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; - - wptr_vm = drm_priv_to_vm(pdd->drm_priv); - err = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (err) - goto err_wptr_map_gart; - - wptr_mapping = amdgpu_vm_bo_lookup_mapping( - wptr_vm, args->write_pointer_address >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - pr_err("Failed to lookup wptr bo\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } + if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + + kfd_get_num_xgmi_sdma_engines(dev) - 1; - wptr_bo = wptr_mapping->bo_va->base.bo; - if (wptr_bo->tbo.base.size > PAGE_SIZE) { - pr_err("Requested GART mapping for wptr bo larger than one page\n"); + if (q_properties.sdma_engine_id > max_sdma_eng_id) { err = -EINVAL; - goto err_wptr_map_gart; + pr_err("sdma_engine_id %i exceeds maximum id of %i\n", + q_properties.sdma_engine_id, max_sdma_eng_id); + goto err_sdma_engine_id; } + } - err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo); + if (!pdd->qpd.proc_doorbells) { + err = kfd_alloc_process_doorbells(dev->kfd, pdd); if (err) { - pr_err("Failed to map wptr bo to GART\n"); - goto err_wptr_map_gart; + pr_debug("failed to allocate process doorbells\n"); + goto err_bind_process; } } - pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", - p->pasid, + err = kfd_queue_acquire_buffers(pdd, &q_properties); + if (err) { + pr_debug("failed to acquire user queue buffers\n"); + goto err_acquire_queue_buf; + } + + pr_debug("Creating queue for process pid %d on gpu 0x%x\n", + p->lead_thread->pid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, + err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id, NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -414,10 +403,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: - if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); -err_wptr_map_gart: -err_alloc_doorbells: + kfd_queue_unref_bo_vas(pdd, &q_properties); + kfd_queue_release_buffers(pdd, &q_properties); +err_acquire_queue_buf: +err_sdma_engine_id: err_bind_process: err_pdd: mutex_unlock(&p->mutex); @@ -430,9 +419,9 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, int retval; struct kfd_ioctl_destroy_queue_args *args = data; - pr_debug("Destroying queue id %d for pasid 0x%x\n", + pr_debug("Destroying queue id %d for process pid %d\n", args->queue_id, - p->pasid); + p->lead_thread->pid); mutex_lock(&p->mutex); @@ -476,6 +465,11 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, return -EINVAL; } + if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { + args->ring_size = KFD_MIN_QUEUE_RING_SIZE; + pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); + } + properties.queue_address = args->ring_base_address; properties.queue_size = args->ring_size; properties.queue_percent = args->queue_percentage & 0xFF; @@ -483,8 +477,8 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; properties.priority = args->queue_priority; - pr_debug("Updating queue id %d for pasid 0x%x\n", - args->queue_id, p->pasid); + pr_debug("Updating queue id %d for process pid %d\n", + args->queue_id, p->lead_thread->pid); mutex_lock(&p->mutex); @@ -527,15 +521,10 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); } - minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL); - if (!minfo.cu_mask.ptr) - return -ENOMEM; - - retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size); - if (retval) { + minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size); + if (IS_ERR(minfo.cu_mask.ptr)) { pr_debug("Could not copy CU mask from userspace"); - retval = -EFAULT; - goto out; + return PTR_ERR(minfo.cu_mask.ptr); } mutex_lock(&p->mutex); @@ -544,7 +533,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, mutex_unlock(&p->mutex); -out: kfree(minfo.cu_mask.ptr); return retval; } @@ -611,7 +599,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, default_policy, alternate_policy, (void __user *)args->alternate_aperture_base, - args->alternate_aperture_size)) + args->alternate_aperture_size, + args->misc_process_flag)) err = -EINVAL; out: @@ -710,7 +699,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process_device_apertures *pAperture; int i; - dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); + dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid); args->num_of_nodes = 0; @@ -762,7 +751,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, int ret; int i; - dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); + dev_dbg(kfd_device, "get apertures for process pid %d", + p->lead_thread->pid); if (args->num_of_nodes == 0) { /* Return number of nodes, so that user space can alloacate @@ -777,8 +767,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, * nodes, but not more than args->num_of_nodes as that is * the amount of memory allocated by user */ - pa = kzalloc((sizeof(struct kfd_process_device_apertures) * - args->num_of_nodes), GFP_KERNEL); + pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures), + GFP_KERNEL); if (!pa) return -ENOMEM; @@ -1020,14 +1010,11 @@ err_drm_file: bool kfd_dev_is_large_bar(struct kfd_node *dev) { - if (debug_largebar) { + if (dev->kfd->adev->debug_largebar) { pr_debug("Simulate large-bar allocation on non large-bar machine\n"); return true; } - if (dev->kfd->use_iommu_v2) - return false; - if (dev->local_mem_info.local_mem_size_private == 0 && dev->local_mem_info.local_mem_size_public > 0) return true; @@ -1077,7 +1064,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, svm_range_list_lock_and_flush_work(&p->svms, current->mm); mutex_lock(&p->svms.lock); mmap_write_unlock(current->mm); - if (interval_tree_iter_first(&p->svms.objects, + + /* Skip a special case that allocates VRAM without VA, + * VA will be invalid of 0. + */ + if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) && + interval_tree_iter_first(&p->svms.objects, args->va_addr >> PAGE_SHIFT, (args->va_addr + args->size - 1) >> PAGE_SHIFT)) { pr_err("Address: 0x%llx already allocated by SVM\n", @@ -1140,7 +1132,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } offset = dev->adev->rmmio_remap.bus_addr; - if (!offset) { + if (!offset || (PAGE_SIZE > 4096)) { err = -ENOMEM; goto err_unlock; } @@ -1166,7 +1158,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) size >>= 1; - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); } mutex_unlock(&p->mutex); @@ -1237,7 +1229,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle)); - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); + atomic64_sub(size, &pdd->vram_usage); err_unlock: err_pdd: @@ -1418,8 +1410,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { - pr_err("Failed to unmap from gpu %d/%d\n", - i, args->n_devices); + pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); goto unmap_memory_from_gpu_failed; } args->n_success = i+1; @@ -1434,17 +1425,23 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, goto sync_memory_failed; } } - mutex_unlock(&p->mutex); - if (flush_tlb) { - /* Flush TLBs after waiting for the page table updates to complete */ - for (i = 0; i < args->n_devices; i++) { - peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); - if (WARN_ON_ONCE(!peer_pdd)) - continue; + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + if (flush_tlb) kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); - } + + /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ + err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); + if (err) + goto sync_memory_failed; } + + mutex_unlock(&p->mutex); + kfree(devices_arr); return 0; @@ -1487,7 +1484,8 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, goto out_unlock; } - if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) { + if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) || + kfd_dbg_has_cwsr_workaround(dev))) { retval = -EBUSY; goto out_unlock; } @@ -1517,7 +1515,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, /* Find a KFD GPU device that supports the get_dmabuf_info query */ for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) - if (dev) + if (dev && !kfd_devcgroup_check_permission(dev)) break; if (!dev) return -EINVAL; @@ -1539,7 +1537,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, if (xcp_id >= 0) args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; else - args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id; + args->gpu_id = dev->id; args->flags = flags; /* Copy metadata buffer to user mode */ @@ -1561,16 +1559,11 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, { struct kfd_ioctl_import_dmabuf_args *args = data; struct kfd_process_device *pdd; - struct dma_buf *dmabuf; int idr_handle; uint64_t size; void *mem; int r; - dmabuf = dma_buf_get(args->dmabuf_fd); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - mutex_lock(&p->mutex); pdd = kfd_process_device_data_by_id(p, args->gpu_id); if (!pdd) { @@ -1584,10 +1577,10 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, goto err_unlock; } - r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf, - args->va_addr, pdd->drm_priv, - (struct kgd_mem **)&mem, &size, - NULL); + r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd, + args->va_addr, pdd->drm_priv, + (struct kgd_mem **)&mem, &size, + NULL); if (r) goto err_unlock; @@ -1598,7 +1591,6 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, } mutex_unlock(&p->mutex); - dma_buf_put(dmabuf); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); @@ -1609,7 +1601,6 @@ err_free: pdd->drm_priv, NULL); err_unlock: mutex_unlock(&p->mutex); - dma_buf_put(dmabuf); return r; } @@ -1845,33 +1836,34 @@ static uint32_t get_process_num_bos(struct kfd_process *p) idr_for_each_entry(&pdd->alloc_idr, mem, id) { struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; - if ((uint64_t)kgd_mem->va > pdd->gpuvm_base) + if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base) num_of_bos++; } } return num_of_bos; } -static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags, - u32 *shared_fd) +static int criu_get_prime_handle(struct kgd_mem *mem, + int flags, u32 *shared_fd, + struct file **file) { struct dma_buf *dmabuf; int ret; - dmabuf = amdgpu_gem_prime_export(gobj, flags); - if (IS_ERR(dmabuf)) { - ret = PTR_ERR(dmabuf); + ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf); + if (ret) { pr_err("dmabuf export failed for the BO\n"); return ret; } - ret = dma_buf_fd(dmabuf, flags); + ret = get_unused_fd_flags(flags); if (ret < 0) { pr_err("dmabuf create fd failed, ret:%d\n", ret); goto out_free_dmabuf; } *shared_fd = ret; + *file = dmabuf->file; return 0; out_free_dmabuf: @@ -1879,6 +1871,25 @@ out_free_dmabuf: return ret; } +static void commit_files(struct file **files, + struct kfd_criu_bo_bucket *bo_buckets, + unsigned int count, + int err) +{ + while (count--) { + struct file *file = files[count]; + + if (!file) + continue; + if (err) { + fput(file); + put_unused_fd(bo_buckets[count].dmabuf_fd); + } else { + fd_install(bo_buckets[count].dmabuf_fd, file); + } + } +} + static int criu_checkpoint_bos(struct kfd_process *p, uint32_t num_bos, uint8_t __user *user_bos, @@ -1887,6 +1898,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, { struct kfd_criu_bo_bucket *bo_buckets; struct kfd_criu_bo_priv_data *bo_privs; + struct file **files = NULL; int ret = 0, pdd_index, bo_index = 0, id; void *mem; @@ -1900,6 +1912,12 @@ static int criu_checkpoint_bos(struct kfd_process *p, goto exit; } + files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL); + if (!files) { + ret = -ENOMEM; + goto exit; + } + for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { struct kfd_process_device *pdd = p->pdds[pdd_index]; struct amdgpu_bo *dumper_bo; @@ -1910,15 +1928,14 @@ static int criu_checkpoint_bos(struct kfd_process *p, struct kfd_criu_bo_priv_data *bo_priv; int i, dev_idx = 0; - if (!mem) { - ret = -ENOMEM; - goto exit; - } - kgd_mem = (struct kgd_mem *)mem; dumper_bo = kgd_mem->bo; - if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base) + /* Skip checkpointing BOs that are used for Trap handler + * code and state. Currently, these BOs have a VA that + * is less GPUVM Base + */ + if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base) continue; bo_bucket = &bo_buckets[bo_index]; @@ -1940,10 +1957,10 @@ static int criu_checkpoint_bos(struct kfd_process *p, } if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { - ret = criu_get_prime_handle(&dumper_bo->tbo.base, + ret = criu_get_prime_handle(kgd_mem, bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, - &bo_bucket->dmabuf_fd); + &bo_bucket->dmabuf_fd, &files[bo_index]); if (ret) goto exit; } else { @@ -1961,7 +1978,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo); for (i = 0; i < p->n_pdds; i++) { - if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem)) + if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem)) bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; } @@ -1994,12 +2011,8 @@ static int criu_checkpoint_bos(struct kfd_process *p, *priv_offset += num_bos * sizeof(*bo_privs); exit: - while (ret && bo_index--) { - if (bo_buckets[bo_index].alloc_flags - & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) - close_fd(bo_buckets[bo_index].dmabuf_fd); - } - + commit_files(files, bo_buckets, bo_index, ret); + kvfree(files); kvfree(bo_buckets); kvfree(bo_privs); return ret; @@ -2024,9 +2037,7 @@ static int criu_get_process_object_info(struct kfd_process *p, num_events = kfd_get_num_events(p); - ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size); - if (ret) - return ret; + svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size); *num_objects = num_queues + num_events + num_svm_ranges; @@ -2262,10 +2273,10 @@ static int criu_restore_devices(struct kfd_process *p, goto exit; } - if (!pdd->doorbell_index && - kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) { - ret = -ENOMEM; - goto exit; + if (!pdd->qpd.proc_doorbells) { + ret = kfd_alloc_process_doorbells(dev->kfd, pdd); + if (ret) + goto exit; } } @@ -2305,7 +2316,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, return -EINVAL; } offset = pdd->dev->adev->rmmio_remap.bus_addr; - if (!offset) { + if (!offset || (PAGE_SIZE > 4096)) { pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n"); return -ENOMEM; } @@ -2344,14 +2355,15 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { bo_bucket->restored_offset = offset; /* Update the VRAM usage count */ - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); + atomic64_add(bo_bucket->size, &pdd->vram_usage); } return 0; } static int criu_restore_bo(struct kfd_process *p, struct kfd_criu_bo_bucket *bo_bucket, - struct kfd_criu_bo_priv_data *bo_priv) + struct kfd_criu_bo_priv_data *bo_priv, + struct file **file) { struct kfd_process_device *pdd; struct kgd_mem *kgd_mem; @@ -2402,8 +2414,8 @@ static int criu_restore_bo(struct kfd_process *p, /* create the dmabuf object and export the bo */ if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { - ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR, - &bo_bucket->dmabuf_fd); + ret = criu_get_prime_handle(kgd_mem, DRM_RDWR, + &bo_bucket->dmabuf_fd, file); if (ret) return ret; } else { @@ -2420,6 +2432,7 @@ static int criu_restore_bos(struct kfd_process *p, { struct kfd_criu_bo_bucket *bo_buckets = NULL; struct kfd_criu_bo_priv_data *bo_privs = NULL; + struct file **files = NULL; int ret = 0; uint32_t i = 0; @@ -2433,6 +2446,12 @@ static int criu_restore_bos(struct kfd_process *p, if (!bo_buckets) return -ENOMEM; + files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL); + if (!files) { + ret = -ENOMEM; + goto exit; + } + ret = copy_from_user(bo_buckets, (void __user *)args->bos, args->num_bos * sizeof(*bo_buckets)); if (ret) { @@ -2458,7 +2477,7 @@ static int criu_restore_bos(struct kfd_process *p, /* Create and map new BOs */ for (; i < args->num_bos; i++) { - ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]); + ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]); if (ret) { pr_debug("Failed to restore BO[%d] ret%d\n", i, ret); goto exit; @@ -2473,11 +2492,8 @@ static int criu_restore_bos(struct kfd_process *p, ret = -EFAULT; exit: - while (ret && i--) { - if (bo_buckets[i].alloc_flags - & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) - close_fd(bo_buckets[i].dmabuf_fd); - } + commit_files(files, bo_buckets, i, ret); + kvfree(files); kvfree(bo_buckets); kvfree(bo_privs); return ret; @@ -2549,8 +2565,8 @@ static int criu_restore(struct file *filep, pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n", args->num_devices, args->num_bos, args->num_objects, args->priv_data_size); - if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size || - !args->num_devices || !args->num_bos) + if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data || + !args->priv_data_size || !args->num_devices) return -EINVAL; mutex_lock(&p->mutex); @@ -2755,6 +2771,16 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug, if (pdd->qpd.queue_count) return -EEXIST; + + /* + * Setup TTMPs by default. + * Note that this call must remain here for MES ADD QUEUE to + * skip_process_ctx_clear unconditionally as the first call to + * SET_SHADER_DEBUGGER clears any stale process context data + * saved in MES. + */ + if (pdd->dev->kfd->shared_resources.enable_mes) + kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev)); } p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; @@ -2800,7 +2826,7 @@ retry: static int runtime_disable(struct kfd_process *p) { - int i = 0, ret; + int i = 0, ret = 0; bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED; p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED; @@ -2837,6 +2863,7 @@ static int runtime_disable(struct kfd_process *p) /* disable ttmp setup */ for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + int last_err = 0; if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { pdd->spi_dbg_override = @@ -2846,13 +2873,17 @@ static int runtime_disable(struct kfd_process *p) pdd->dev->vm_info.last_vmid_kfd); if (!pdd->dev->kfd->shared_resources.enable_mes) - debug_refresh_runlist(pdd->dev->dqm); + last_err = debug_refresh_runlist(pdd->dev->dqm); else - kfd_dbg_set_mes_debug_mode(pdd); + last_err = kfd_dbg_set_mes_debug_mode(pdd, + !kfd_dbg_has_cwsr_workaround(pdd->dev)); + + if (last_err) + ret = last_err; } } - return 0; + return ret; } static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) @@ -2923,6 +2954,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v if (IS_ERR_OR_NULL(target)) { pr_debug("Cannot find process PID %i to debug\n", args->pid); r = target ? PTR_ERR(target) : -ESRCH; + target = NULL; goto out; } @@ -3223,8 +3255,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) int retcode = -EINVAL; bool ptrace_attached = false; - if (nr >= AMDKFD_CORE_IOCTL_COUNT) + if (nr >= AMDKFD_CORE_IOCTL_COUNT) { + retcode = -ENOTTY; goto err_i1; + } if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { u32 amdkfd_size; @@ -3237,8 +3271,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) asize = amdkfd_size; cmd = ioctl->cmd; - } else + } else { + retcode = -ENOTTY; goto err_i1; + } dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); @@ -3335,6 +3371,9 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; + if (PAGE_SIZE > 4096) + return -EINVAL; + address = dev->adev->rmmio_remap.bus_addr; vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | @@ -3342,12 +3381,12 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("pasid 0x%x mapping mmio page\n" + pr_debug("process pid %d mapping mmio page\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", - process->pasid, (unsigned long long) vma->vm_start, + process->lead_thread->pid, (unsigned long long) vma->vm_start, address, vma->vm_flags, PAGE_SIZE); return io_remap_pfn_range(vma, |
