summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_events.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_events.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c751
1 files changed, 520 insertions, 231 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3eea4edee355..5a190dd6be4e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
- * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,7 +31,7 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
-#include "kfd_iommu.h"
+#include "kfd_device_queue_manager.h"
#include <linux/device.h>
/*
@@ -40,6 +41,7 @@ struct kfd_event_waiter {
wait_queue_entry_t wait;
struct kfd_event *event; /* Event to wait for */
bool activated; /* Becomes true when event is signaled */
+ bool event_age_enabled; /* set to true when last_event_age is non-zero */
};
/*
@@ -55,7 +57,6 @@ struct kfd_signal_page {
bool need_to_free_pages;
};
-
static uint64_t *page_slots(struct kfd_signal_page *page)
{
return page->kernel_address;
@@ -92,7 +93,8 @@ fail_alloc_signal_store:
}
static int allocate_event_notification_slot(struct kfd_process *p,
- struct kfd_event *ev)
+ struct kfd_event *ev,
+ const int *restore_id)
{
int id;
@@ -104,14 +106,19 @@ static int allocate_event_notification_slot(struct kfd_process *p,
p->signal_mapped_size = 256*8;
}
- /*
- * Compatibility with old user mode: Only use signal slots
- * user mode has mapped, may be less than
- * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
- * of the event limit without breaking user mode.
- */
- id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
- GFP_KERNEL);
+ if (restore_id) {
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ } else {
+ /*
+ * Compatibility with old user mode: Only use signal slots
+ * user mode has mapped, may be less than
+ * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+ * of the event limit without breaking user mode.
+ */
+ id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
+ GFP_KERNEL);
+ }
if (id < 0)
return id;
@@ -122,8 +129,8 @@ static int allocate_event_notification_slot(struct kfd_process *p,
}
/*
- * Assumes that p->event_mutex is held and of course that p is not going
- * away (current or locked).
+ * Assumes that p->event_mutex or rcu_readlock is held and of course that p is
+ * not going away.
*/
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{
@@ -178,9 +185,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id(
return ev;
}
-static int create_signal_event(struct file *devkfd,
- struct kfd_process *p,
- struct kfd_event *ev)
+static int create_signal_event(struct file *devkfd, struct kfd_process *p,
+ struct kfd_event *ev, const int *restore_id)
{
int ret;
@@ -193,7 +199,7 @@ static int create_signal_event(struct file *devkfd,
return -ENOSPC;
}
- ret = allocate_event_notification_slot(p, ev);
+ ret = allocate_event_notification_slot(p, ev, restore_id);
if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n");
return ret;
@@ -209,16 +215,22 @@ static int create_signal_event(struct file *devkfd,
return 0;
}
-static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
+static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
{
- /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
- * intentional integer overflow to -1 without a compiler
- * warning. idr_alloc treats a negative value as "maximum
- * signed integer".
- */
- int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
- (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
- GFP_KERNEL);
+ int id;
+
+ if (restore_id)
+ id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+ GFP_KERNEL);
+ else
+ /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+ * intentional integer overflow to -1 without a compiler
+ * warning. idr_alloc treats a negative value as "maximum
+ * signed integer".
+ */
+ id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+ (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+ GFP_KERNEL);
if (id < 0)
return id;
@@ -227,12 +239,24 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
return 0;
}
-void kfd_event_init_process(struct kfd_process *p)
+int kfd_event_init_process(struct kfd_process *p)
{
+ int id;
+
mutex_init(&p->event_mutex);
idr_init(&p->event_idr);
p->signal_page = NULL;
- p->signal_event_count = 0;
+ p->signal_event_count = 1;
+ /* Allocate event ID 0. It is used for a fast path to ignore bogus events
+ * that are sent by the CP without a context ID
+ */
+ id = idr_alloc(&p->event_idr, NULL, 0, 1, GFP_KERNEL);
+ if (id < 0) {
+ idr_destroy(&p->event_idr);
+ mutex_destroy(&p->event_mutex);
+ return id;
+ }
+ return 0;
}
static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
@@ -240,16 +264,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
struct kfd_event_waiter *waiter;
/* Wake up pending waiters. They will return failure */
+ spin_lock(&ev->lock);
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
- waiter->event = NULL;
+ WRITE_ONCE(waiter->event, NULL);
wake_up_all(&ev->wq);
+ spin_unlock(&ev->lock);
if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
ev->type == KFD_EVENT_TYPE_DEBUG)
p->signal_event_count--;
idr_remove(&p->event_idr, ev->event_id);
- kfree(ev);
+ kfree_rcu(ev, rcu);
}
static void destroy_events(struct kfd_process *p)
@@ -258,8 +284,10 @@ static void destroy_events(struct kfd_process *p)
uint32_t id;
idr_for_each_entry(&p->event_idr, ev, id)
- destroy_event(p, ev);
+ if (ev)
+ destroy_event(p, ev);
idr_destroy(&p->event_idr);
+ mutex_destroy(&p->event_mutex);
}
/*
@@ -295,8 +323,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
return ev->type == KFD_EVENT_TYPE_SIGNAL;
}
-int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
- uint64_t size)
+static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+ uint64_t size, uint64_t user_handle)
{
struct kfd_signal_page *page;
@@ -315,10 +343,56 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
p->signal_page = page;
p->signal_mapped_size = size;
-
+ p->signal_handle = user_handle;
return 0;
}
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
+{
+ struct kfd_node *kfd;
+ struct kfd_process_device *pdd;
+ void *mem, *kern_addr;
+ uint64_t size;
+ int err = 0;
+
+ if (p->signal_page) {
+ pr_err("Event page is already set\n");
+ return -EINVAL;
+ }
+
+ pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
+ if (!pdd) {
+ pr_err("Getting device by id failed in %s\n", __func__);
+ return -EINVAL;
+ }
+ kfd = pdd->dev;
+
+ pdd = kfd_bind_process_to_device(kfd, p);
+ if (IS_ERR(pdd))
+ return PTR_ERR(pdd);
+
+ mem = kfd_process_device_translate_handle(pdd,
+ GET_IDR_HANDLE(event_page_offset));
+ if (!mem) {
+ pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
+ return -EINVAL;
+ }
+
+ err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
+ if (err) {
+ pr_err("Failed to map event page to kernel\n");
+ return err;
+ }
+
+ err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
+ if (err) {
+ pr_err("Failed to set event page\n");
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+ return err;
+ }
+ return err;
+}
+
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data,
@@ -334,6 +408,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
ev->auto_reset = auto_reset;
ev->signaled = false;
+ spin_lock_init(&ev->lock);
init_waitqueue_head(&ev->wq);
*event_page_offset = 0;
@@ -343,20 +418,21 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
switch (event_type) {
case KFD_EVENT_TYPE_SIGNAL:
case KFD_EVENT_TYPE_DEBUG:
- ret = create_signal_event(devkfd, p, ev);
+ ret = create_signal_event(devkfd, p, ev, NULL);
if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_slot_index = ev->event_id;
}
break;
default:
- ret = create_other_event(p, ev);
+ ret = create_other_event(p, ev, NULL);
break;
}
if (!ret) {
*event_id = ev->event_id;
*event_trigger_data = ev->event_id;
+ ev->event_age = 1;
} else {
kfree(ev);
}
@@ -366,6 +442,166 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
return ret;
}
+int kfd_criu_restore_event(struct file *devkfd,
+ struct kfd_process *p,
+ uint8_t __user *user_priv_ptr,
+ uint64_t *priv_data_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_event_priv_data *ev_priv;
+ struct kfd_event *ev = NULL;
+ int ret = 0;
+
+ ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
+ if (!ev_priv)
+ return -ENOMEM;
+
+ ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+ if (!ev) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
+ if (ret) {
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_data_offset += sizeof(*ev_priv);
+
+ if (ev_priv->user_handle) {
+ ret = kfd_kmap_event_page(p, ev_priv->user_handle);
+ if (ret)
+ goto exit;
+ }
+
+ ev->type = ev_priv->type;
+ ev->auto_reset = ev_priv->auto_reset;
+ ev->signaled = ev_priv->signaled;
+
+ spin_lock_init(&ev->lock);
+ init_waitqueue_head(&ev->wq);
+
+ mutex_lock(&p->event_mutex);
+ switch (ev->type) {
+ case KFD_EVENT_TYPE_SIGNAL:
+ case KFD_EVENT_TYPE_DEBUG:
+ ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_MEMORY:
+ memcpy(&ev->memory_exception_data,
+ &ev_priv->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ case KFD_EVENT_TYPE_HW_EXCEPTION:
+ memcpy(&ev->hw_exception_data,
+ &ev_priv->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ ret = create_other_event(p, ev, &ev_priv->event_id);
+ break;
+ }
+ mutex_unlock(&p->event_mutex);
+
+exit:
+ if (ret)
+ kfree(ev);
+
+ kfree(ev_priv);
+
+ return ret;
+}
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+ uint8_t __user *user_priv_data,
+ uint64_t *priv_data_offset)
+{
+ struct kfd_criu_event_priv_data *ev_privs;
+ int i = 0;
+ int ret = 0;
+ struct kfd_event *ev;
+ uint32_t ev_id;
+
+ uint32_t num_events = kfd_get_num_events(p);
+
+ if (!num_events)
+ return 0;
+
+ ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
+ if (!ev_privs)
+ return -ENOMEM;
+
+
+ idr_for_each_entry(&p->event_idr, ev, ev_id) {
+ struct kfd_criu_event_priv_data *ev_priv;
+
+ /*
+ * Currently, all events have same size of private_data, but the current ioctl's
+ * and CRIU plugin supports private_data of variable sizes
+ */
+ ev_priv = &ev_privs[i];
+
+ ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
+
+ /* We store the user_handle with the first event */
+ if (i == 0 && p->signal_page)
+ ev_priv->user_handle = p->signal_handle;
+
+ ev_priv->event_id = ev->event_id;
+ ev_priv->auto_reset = ev->auto_reset;
+ ev_priv->type = ev->type;
+ ev_priv->signaled = ev->signaled;
+
+ if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
+ memcpy(&ev_priv->memory_exception_data,
+ &ev->memory_exception_data,
+ sizeof(struct kfd_hsa_memory_exception_data));
+ else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
+ memcpy(&ev_priv->hw_exception_data,
+ &ev->hw_exception_data,
+ sizeof(struct kfd_hsa_hw_exception_data));
+
+ pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
+ i,
+ ev_priv->event_id,
+ ev_priv->auto_reset,
+ ev_priv->type,
+ ev_priv->signaled);
+ i++;
+ }
+
+ ret = copy_to_user(user_priv_data + *priv_data_offset,
+ ev_privs, num_events * sizeof(*ev_privs));
+ if (ret) {
+ pr_err("Failed to copy events priv to user\n");
+ ret = -EFAULT;
+ }
+
+ *priv_data_offset += num_events * sizeof(*ev_privs);
+
+ kvfree(ev_privs);
+ return ret;
+}
+
+int kfd_get_num_events(struct kfd_process *p)
+{
+ struct kfd_event *ev;
+ uint32_t id;
+ u32 num_events = 0;
+
+ idr_for_each_entry(&p->event_idr, ev, id)
+ num_events++;
+
+ return num_events;
+}
+
/* Assumes that p is current. */
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
{
@@ -391,13 +627,18 @@ static void set_event(struct kfd_event *ev)
/* Auto reset if the list is non-empty and we're waking
* someone. waitqueue_active is safe here because we're
- * protected by the p->event_mutex, which is also held when
+ * protected by the ev->lock, which is also held when
* updating the wait queues in kfd_wait_on_events.
*/
ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
+ if (!(++ev->event_age)) {
+ /* Never wrap back to reserved/default event age 0/1 */
+ ev->event_age = 2;
+ WARN_ONCE(1, "event_age wrap back!");
+ }
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
- waiter->activated = true;
+ WRITE_ONCE(waiter->activated, true);
wake_up_all(&ev->wq);
}
@@ -408,16 +649,23 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id)
int ret = 0;
struct kfd_event *ev;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
ev = lookup_event_by_id(p, event_id);
+ if (!ev) {
+ ret = -EINVAL;
+ goto unlock_rcu;
+ }
+ spin_lock(&ev->lock);
- if (ev && event_can_be_cpu_signaled(ev))
+ if (event_can_be_cpu_signaled(ev))
set_event(ev);
else
ret = -EINVAL;
- mutex_unlock(&p->event_mutex);
+ spin_unlock(&ev->lock);
+unlock_rcu:
+ rcu_read_unlock();
return ret;
}
@@ -432,23 +680,30 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
int ret = 0;
struct kfd_event *ev;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
ev = lookup_event_by_id(p, event_id);
+ if (!ev) {
+ ret = -EINVAL;
+ goto unlock_rcu;
+ }
+ spin_lock(&ev->lock);
- if (ev && event_can_be_cpu_signaled(ev))
+ if (event_can_be_cpu_signaled(ev))
reset_event(ev);
else
ret = -EINVAL;
- mutex_unlock(&p->event_mutex);
+ spin_unlock(&ev->lock);
+unlock_rcu:
+ rcu_read_unlock();
return ret;
}
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{
- page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
+ WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
}
static void set_event_from_interrupt(struct kfd_process *p,
@@ -456,7 +711,9 @@ static void set_event_from_interrupt(struct kfd_process *p,
{
if (ev && event_can_be_gpu_signaled(ev)) {
acknowledge_signal(p, ev);
+ spin_lock(&ev->lock);
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
@@ -470,12 +727,12 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
if (valid_id_bits)
ev = lookup_signaled_event_by_partial_id(p, partial_id,
@@ -503,7 +760,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
if (id >= KFD_SIGNAL_EVENT_LIMIT)
break;
- if (slots[id] != UNSIGNALED_EVENT_SLOT)
+ if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
set_event_from_interrupt(p, ev);
}
} else {
@@ -511,15 +768,15 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* iterate over the signal slots and lookup
* only signaled events from the IDR.
*/
- for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
- if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+ for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+ if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
ev = lookup_event_by_id(p, id);
set_event_from_interrupt(p, ev);
}
}
}
- mutex_unlock(&p->event_mutex);
+ rcu_read_unlock();
kfd_unref_process(p);
}
@@ -528,43 +785,44 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
struct kfd_event_waiter *event_waiters;
uint32_t i;
- event_waiters = kmalloc_array(num_events,
- sizeof(struct kfd_event_waiter),
- GFP_KERNEL);
+ event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
+ GFP_KERNEL);
+ if (!event_waiters)
+ return NULL;
- for (i = 0; (event_waiters) && (i < num_events) ; i++) {
+ for (i = 0; i < num_events; i++)
init_wait(&event_waiters[i].wait);
- event_waiters[i].activated = false;
- }
return event_waiters;
}
-static int init_event_waiter_get_status(struct kfd_process *p,
+static int init_event_waiter(struct kfd_process *p,
struct kfd_event_waiter *waiter,
- uint32_t event_id)
+ struct kfd_event_data *event_data)
{
- struct kfd_event *ev = lookup_event_by_id(p, event_id);
+ struct kfd_event *ev = lookup_event_by_id(p, event_data->event_id);
if (!ev)
return -EINVAL;
+ spin_lock(&ev->lock);
waiter->event = ev;
waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset;
- return 0;
-}
-
-static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
-{
- struct kfd_event *ev = waiter->event;
+ /* last_event_age = 0 reserved for backward compatible */
+ if (waiter->event->type == KFD_EVENT_TYPE_SIGNAL &&
+ event_data->signal_event_data.last_event_age) {
+ waiter->event_age_enabled = true;
+ if (ev->event_age != event_data->signal_event_data.last_event_age)
+ waiter->activated = true;
+ }
- /* Only add to the wait list if we actually need to
- * wait on this event.
- */
if (!waiter->activated)
add_wait_queue(&ev->wq, &waiter->wait);
+ spin_unlock(&ev->lock);
+
+ return 0;
}
/* test_event_condition - Test condition of events being waited for
@@ -584,10 +842,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events,
uint32_t activated_count = 0;
for (i = 0; i < num_events; i++) {
- if (!event_waiters[i].event)
+ if (!READ_ONCE(event_waiters[i].event))
return KFD_IOC_WAIT_RESULT_FAIL;
- if (event_waiters[i].activated) {
+ if (READ_ONCE(event_waiters[i].activated)) {
if (!all)
return KFD_IOC_WAIT_RESULT_COMPLETE;
@@ -607,30 +865,40 @@ static int copy_signaled_event_data(uint32_t num_events,
struct kfd_event_waiter *event_waiters,
struct kfd_event_data __user *data)
{
- struct kfd_hsa_memory_exception_data *src;
- struct kfd_hsa_memory_exception_data __user *dst;
+ void *src;
+ void __user *dst;
struct kfd_event_waiter *waiter;
struct kfd_event *event;
- uint32_t i;
+ uint32_t i, size = 0;
for (i = 0; i < num_events; i++) {
waiter = &event_waiters[i];
event = waiter->event;
- if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
- dst = &data[i].memory_exception_data;
- src = &event->memory_exception_data;
- if (copy_to_user(dst, src,
- sizeof(struct kfd_hsa_memory_exception_data)))
+ if (!event)
+ return -EINVAL; /* event was destroyed */
+ if (waiter->activated) {
+ if (event->type == KFD_EVENT_TYPE_MEMORY) {
+ dst = &data[i].memory_exception_data;
+ src = &event->memory_exception_data;
+ size = sizeof(struct kfd_hsa_memory_exception_data);
+ } else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ dst = &data[i].memory_exception_data;
+ src = &event->hw_exception_data;
+ size = sizeof(struct kfd_hsa_hw_exception_data);
+ } else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
+ waiter->event_age_enabled) {
+ dst = &data[i].signal_event_data.last_event_age;
+ src = &event->event_age;
+ size = sizeof(u64);
+ }
+ if (size && copy_to_user(dst, src, size))
return -EFAULT;
}
}
return 0;
-
}
-
-
static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
{
if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
@@ -649,21 +917,28 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
return msecs_to_jiffies(user_timeout_ms) + 1;
}
-static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
+static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
+ bool undo_auto_reset)
{
uint32_t i;
for (i = 0; i < num_events; i++)
- if (waiters[i].event)
+ if (waiters[i].event) {
+ spin_lock(&waiters[i].event->lock);
remove_wait_queue(&waiters[i].event->wq,
&waiters[i].wait);
+ if (undo_auto_reset && waiters[i].activated &&
+ waiters[i].event && waiters[i].event->auto_reset)
+ set_event(waiters[i].event);
+ spin_unlock(&waiters[i].event->lock);
+ }
kfree(waiters);
}
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
- bool all, uint32_t user_timeout_ms,
+ bool all, uint32_t *user_timeout_ms,
uint32_t *wait_result)
{
struct kfd_event_data __user *events =
@@ -672,7 +947,7 @@ int kfd_wait_on_events(struct kfd_process *p,
int ret = 0;
struct kfd_event_waiter *event_waiters = NULL;
- long timeout = user_timeout_to_jiffies(user_timeout_ms);
+ long timeout = user_timeout_to_jiffies(*user_timeout_ms);
event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) {
@@ -680,6 +955,9 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out;
}
+ /* Use p->event_mutex here to protect against concurrent creation and
+ * destruction of events while we initialize event_waiters.
+ */
mutex_lock(&p->event_mutex);
for (i = 0; i < num_events; i++) {
@@ -691,8 +969,7 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out_unlock;
}
- ret = init_event_waiter_get_status(p, &event_waiters[i],
- event_data.event_id);
+ ret = init_event_waiter(p, &event_waiters[i], &event_data);
if (ret)
goto out_unlock;
}
@@ -710,10 +987,6 @@ int kfd_wait_on_events(struct kfd_process *p,
goto out_unlock;
}
- /* Add to wait lists if we need to wait. */
- for (i = 0; i < num_events; i++)
- init_event_waiter_add_to_waitlist(&event_waiters[i]);
-
mutex_unlock(&p->event_mutex);
while (true) {
@@ -723,15 +996,11 @@ int kfd_wait_on_events(struct kfd_process *p,
}
if (signal_pending(current)) {
- /*
- * This is wrong when a nonzero, non-infinite timeout
- * is specified. We need to use
- * ERESTARTSYS_RESTARTBLOCK, but struct restart_block
- * contains a union with data for each user and it's
- * in generic kernel code that I don't want to
- * touch yet.
- */
ret = -ERESTARTSYS;
+ if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
+ *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
+ *user_timeout_ms = jiffies_to_msecs(
+ max(0l, timeout-1));
break;
}
@@ -758,16 +1027,21 @@ int kfd_wait_on_events(struct kfd_process *p,
}
__set_current_state(TASK_RUNNING);
+ mutex_lock(&p->event_mutex);
/* copy_signaled_event_data may sleep. So this has to happen
* after the task state is set back to RUNNING.
+ *
+ * The event may also have been destroyed after signaling. So
+ * copy_signaled_event_data also must confirm that the event
+ * still exists. Therefore this must be under the p->event_mutex
+ * which is also held when events are destroyed.
*/
if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
ret = copy_signaled_event_data(num_events,
event_waiters, events);
- mutex_lock(&p->event_mutex);
out_unlock:
- free_waiters(num_events, event_waiters);
+ free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
mutex_unlock(&p->event_mutex);
out:
if (ret)
@@ -801,8 +1075,8 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
pfn = __pa(page->kernel_address);
pfn >>= PAGE_SHIFT;
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
- | VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
+ | VM_DONTDUMP | VM_PFNMAP);
pr_debug("Mapping signal page\n");
pr_debug(" start user address == 0x%08lx\n", vma->vm_start);
@@ -824,8 +1098,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
}
/*
- * Assumes that p->event_mutex is held and of course
- * that p is not going away (current or locked).
+ * Assumes that p is not going away.
*/
static void lookup_events_by_type_and_signal(struct kfd_process *p,
int type, void *event_data)
@@ -837,6 +1110,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
+ rcu_read_lock();
+
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == type) {
@@ -844,15 +1119,17 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
dev_dbg(kfd_device,
"Event found: id %X type %d",
ev->event_id, ev->type);
+ spin_lock(&ev->lock);
set_event(ev);
if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
ev->memory_exception_data = *ev_data;
+ spin_unlock(&ev->lock);
}
if (type == KFD_EVENT_TYPE_MEMORY) {
dev_warn(kfd_device,
- "Sending SIGSEGV to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGSEGV to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGSEGV, p->lead_thread, 0);
}
@@ -860,95 +1137,18 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (send_signal) {
if (send_sigterm) {
dev_warn(kfd_device,
- "Sending SIGTERM to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGTERM to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGTERM, p->lead_thread, 0);
} else {
dev_err(kfd_device,
- "Process %d (pasid 0x%x) got unhandled exception",
- p->lead_thread->pid, p->pasid);
+ "Process pid %d got unhandled exception",
+ p->lead_thread->pid);
}
}
-}
-
-#ifdef KFD_SUPPORT_IOMMU_V2
-void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
- unsigned long address, bool is_write_requested,
- bool is_execute_requested)
-{
- struct kfd_hsa_memory_exception_data memory_exception_data;
- struct vm_area_struct *vma;
-
- /*
- * Because we are called from arbitrary context (workqueue) as opposed
- * to process context, kfd_process could attempt to exit while we are
- * running so the lookup function increments the process ref count.
- */
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
- struct mm_struct *mm;
-
- if (!p)
- return; /* Presumably process exited. */
-
- /* Take a safe reference to the mm_struct, which may otherwise
- * disappear even while the kfd_process is still referenced.
- */
- mm = get_task_mm(p->lead_thread);
- if (!mm) {
- kfd_unref_process(p);
- return; /* Process is exiting */
- }
-
- memset(&memory_exception_data, 0, sizeof(memory_exception_data));
-
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
-
- memory_exception_data.gpu_id = dev->id;
- memory_exception_data.va = address;
- /* Set failure reason */
- memory_exception_data.failure.NotPresent = 1;
- memory_exception_data.failure.NoExecute = 0;
- memory_exception_data.failure.ReadOnly = 0;
- if (vma && address >= vma->vm_start) {
- memory_exception_data.failure.NotPresent = 0;
-
- if (is_write_requested && !(vma->vm_flags & VM_WRITE))
- memory_exception_data.failure.ReadOnly = 1;
- else
- memory_exception_data.failure.ReadOnly = 0;
-
- if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
- memory_exception_data.failure.NoExecute = 1;
- else
- memory_exception_data.failure.NoExecute = 0;
- }
-
- mmap_read_unlock(mm);
- mmput(mm);
- pr_debug("notpresent %d, noexecute %d, readonly %d\n",
- memory_exception_data.failure.NotPresent,
- memory_exception_data.failure.NoExecute,
- memory_exception_data.failure.ReadOnly);
-
- /* Workaround on Raven to not kill the process when memory is freed
- * before IOMMU is able to finish processing all the excessive PPRs
- */
- if (dev->device_info->asic_family != CHIP_RAVEN &&
- dev->device_info->asic_family != CHIP_RENOIR) {
- mutex_lock(&p->event_mutex);
-
- /* Lookup events by type and signal them */
- lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
- &memory_exception_data);
-
- mutex_unlock(&p->event_mutex);
- }
-
- kfd_unref_process(p);
+ rcu_read_unlock();
}
-#endif /* KFD_SUPPORT_IOMMU_V2 */
void kfd_signal_hw_exception_event(u32 pasid)
{
@@ -957,58 +1157,87 @@ void kfd_signal_hw_exception_event(u32 pasid)
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
- mutex_lock(&p->event_mutex);
-
- /* Lookup events by type and signal them */
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
-
- mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
-void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
- struct kfd_vm_fault_info *info)
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
+{
+ struct kfd_process_device *pdd;
+ struct kfd_hsa_memory_exception_data exception_data;
+ int i;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.va = gpu_va;
+ exception_data.failure.NotPresent = 1;
+
+ // Send VM seg fault to all kfd process device
+ for (i = 0; i < p->n_pdds; i++) {
+ pdd = p->pdds[i];
+ exception_data.gpu_id = pdd->user_gpu_id;
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
+ }
+}
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
+ struct kfd_vm_fault_info *info,
+ struct kfd_hsa_memory_exception_data *data)
{
struct kfd_event *ev;
uint32_t id;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = pdd->process;
struct kfd_hsa_memory_exception_data memory_exception_data;
+ int user_gpu_id;
- if (!p)
- return; /* Presumably process exited. */
- memset(&memory_exception_data, 0, sizeof(memory_exception_data));
- memory_exception_data.gpu_id = dev->id;
- memory_exception_data.failure.imprecise = true;
- /* Set failure reason */
- if (info) {
- memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
- memory_exception_data.failure.NotPresent =
- info->prot_valid ? 1 : 0;
- memory_exception_data.failure.NoExecute =
- info->prot_exec ? 1 : 0;
- memory_exception_data.failure.ReadOnly =
- info->prot_write ? 1 : 0;
- memory_exception_data.failure.imprecise = 0;
+ user_gpu_id = kfd_process_get_user_gpu_id(p, pdd->dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n",
+ pdd->dev->id);
+ return;
}
- mutex_lock(&p->event_mutex);
+
+ /* SoC15 chips and onwards will pass in data from now on. */
+ if (!data) {
+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+ memory_exception_data.gpu_id = user_gpu_id;
+ memory_exception_data.failure.imprecise = true;
+
+ /* Set failure reason */
+ if (info) {
+ memory_exception_data.va = (info->page_addr) <<
+ PAGE_SHIFT;
+ memory_exception_data.failure.NotPresent =
+ info->prot_valid ? 1 : 0;
+ memory_exception_data.failure.NoExecute =
+ info->prot_exec ? 1 : 0;
+ memory_exception_data.failure.ReadOnly =
+ info->prot_write ? 1 : 0;
+ memory_exception_data.failure.imprecise = 0;
+ }
+ }
+
+ rcu_read_lock();
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
- ev->memory_exception_data = memory_exception_data;
+ spin_lock(&ev->lock);
+ ev->memory_exception_data = data ? *data :
+ memory_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
- mutex_unlock(&p->event_mutex);
- kfd_unref_process(p);
+ rcu_read_unlock();
}
-void kfd_signal_reset_event(struct kfd_dev *dev)
+void kfd_signal_reset_event(struct kfd_node *dev)
{
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_hsa_memory_exception_data memory_exception_data;
@@ -1022,69 +1251,129 @@ void kfd_signal_reset_event(struct kfd_dev *dev)
/* Whole gpu reset caused by GPU hang and memory is lost */
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = reset_cause;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
- memory_exception_data.gpu_id = dev->id;
memory_exception_data.failure.imprecise = true;
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- mutex_lock(&p->event_mutex);
+ int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p);
+
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ continue;
+ }
+
+ if (unlikely(!pdd)) {
+ WARN_ONCE(1, "Could not get device data from process pid:%d\n",
+ p->lead_thread->pid);
+ continue;
+ }
+
+ if (dev->dqm->detect_hang_count && !pdd->has_reset_queue)
+ continue;
+
+ if (dev->dqm->detect_hang_count) {
+ struct amdgpu_task_info *ti;
+ struct amdgpu_fpriv *drv_priv;
+
+ if (unlikely(amdgpu_file_to_fpriv(pdd->drm_file, &drv_priv))) {
+ WARN_ONCE(1, "Could not get vm for device %x from pid:%d\n",
+ dev->id, p->lead_thread->pid);
+ continue;
+ }
+
+ ti = amdgpu_vm_get_task_info_vm(&drv_priv->vm);
+ if (ti) {
+ dev_err(dev->adev->dev,
+ "Queues reset on process %s tid %d thread %s pid %d\n",
+ ti->process_name, ti->tgid, ti->task.comm, ti->task.pid);
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
+ rcu_read_lock();
+
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
idr_for_each_entry_continue(&p->event_idr, ev, id) {
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ spin_lock(&ev->lock);
ev->hw_exception_data = hw_exception_data;
+ ev->hw_exception_data.gpu_id = user_gpu_id;
set_event(ev);
+ spin_unlock(&ev->lock);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY &&
reset_cause == KFD_HW_EXCEPTION_ECC) {
+ spin_lock(&ev->lock);
ev->memory_exception_data = memory_exception_data;
+ ev->memory_exception_data.gpu_id = user_gpu_id;
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
- mutex_unlock(&p->event_mutex);
+
+ rcu_read_unlock();
}
srcu_read_unlock(&kfd_processes_srcu, idx);
}
-void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid)
+void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
{
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_event *ev;
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ int user_gpu_id;
- if (!p)
+ if (!p) {
+ dev_warn(dev->adev->dev, "Not find process with pasid:%d\n", pasid);
return; /* Presumably process exited. */
+ }
+
+ user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ if (unlikely(user_gpu_id == -EINVAL)) {
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ kfd_unref_process(p);
+ return;
+ }
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
- hw_exception_data.gpu_id = dev->id;
+ hw_exception_data.gpu_id = user_gpu_id;
hw_exception_data.memory_lost = 1;
hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
- memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.gpu_id = user_gpu_id;
memory_exception_data.failure.imprecise = true;
- mutex_lock(&p->event_mutex);
+ rcu_read_lock();
+
idr_for_each_entry_continue(&p->event_idr, ev, id) {
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ spin_lock(&ev->lock);
ev->hw_exception_data = hw_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+ spin_lock(&ev->lock);
ev->memory_exception_data = memory_exception_data;
set_event(ev);
+ spin_unlock(&ev->lock);
}
}
- mutex_unlock(&p->event_mutex);
+
+ dev_warn(dev->adev->dev, "Send SIGBUS to process %s(pasid:%d)\n",
+ p->lead_thread->comm, pasid);
+ rcu_read_unlock();
/* user application will handle SIGBUS signal */
send_sig(SIGBUS, p->lead_thread, 0);