summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Makefile3
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c42
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c28
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c180
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c22
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c17
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.h4
-rw-r--r--arch/powerpc/platforms/powernv/setup.c26
-rw-r--r--arch/powerpc/platforms/powernv/smp.c59
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c209
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c242
-rw-r--r--arch/powerpc/platforms/powernv/vas.c32
-rw-r--r--arch/powerpc/platforms/powernv/vas.h93
19 files changed, 816 insertions, 189 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 7a31c26500e6..3732118a0482 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -15,4 +15,5 @@ obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
-obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
+obj-$(CONFIG_PPC_FTW) += nx-ftw.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 8864065eba22..4650fb294e7a 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -41,7 +41,6 @@
#include "powernv.h"
#include "pci.h"
-static bool pnv_eeh_nb_init = false;
static int eeh_event_irq = -EINVAL;
static int pnv_eeh_init(void)
@@ -197,31 +196,31 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
* been built. If the I/O cache staff has been built, EEH is
* ready to supply service.
*/
-static int pnv_eeh_post_init(void)
+int pnv_eeh_post_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
int ret = 0;
- /* Register OPAL event notifier */
- if (!pnv_eeh_nb_init) {
- eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
- if (eeh_event_irq < 0) {
- pr_err("%s: Can't register OPAL event interrupt (%d)\n",
- __func__, eeh_event_irq);
- return eeh_event_irq;
- }
+ /* Probe devices & build address cache */
+ eeh_probe_devices();
+ eeh_addr_cache_build();
- ret = request_irq(eeh_event_irq, pnv_eeh_event,
- IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
- if (ret < 0) {
- irq_dispose_mapping(eeh_event_irq);
- pr_err("%s: Can't request OPAL event interrupt (%d)\n",
- __func__, eeh_event_irq);
- return ret;
- }
+ /* Register OPAL event notifier */
+ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+ if (eeh_event_irq < 0) {
+ pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return eeh_event_irq;
+ }
- pnv_eeh_nb_init = true;
+ ret = request_irq(eeh_event_irq, pnv_eeh_event,
+ IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+ if (ret < 0) {
+ irq_dispose_mapping(eeh_event_irq);
+ pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return ret;
}
if (!eeh_enabled())
@@ -367,6 +366,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
+ /* Skip if we haven't probed yet */
+ if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE)
+ return NULL;
+
/* Initialize eeh device */
edev->class_code = pdn->class_code;
edev->mode &= 0xFFFFFF00;
@@ -1731,7 +1734,6 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
static struct eeh_ops pnv_eeh_ops = {
.name = "powernv",
.init = pnv_eeh_init,
- .post_init = pnv_eeh_post_init,
.probe = pnv_eeh_probe,
.set_option = pnv_eeh_set_option,
.get_pe_addr = pnv_eeh_get_pe_addr,
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 2cb6cbea4b3b..f6cbc1a71472 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -395,6 +395,7 @@ struct npu_context {
struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
struct mmu_notifier mn;
struct kref kref;
+ bool nmmu_flush;
/* Callback to stop translation requests on a given GPU */
struct npu_context *(*release_cb)(struct npu_context *, void *);
@@ -545,11 +546,13 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
- /*
- * Unfortunately the nest mmu does not support flushing specific
- * addresses so we have to flush the whole mm.
- */
- flush_tlb_mm(npu_context->mm);
+ if (npu_context->nmmu_flush)
+ /*
+ * Unfortunately the nest mmu does not support flushing specific
+ * addresses so we have to flush the whole mm once before
+ * shooting down the GPU translation.
+ */
+ flush_all_mm(npu_context->mm);
/*
* Loop over all the NPUs this process is active on and launch
@@ -722,6 +725,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
return ERR_PTR(-ENODEV);
npu_context->npdev[npu->index][nvlink_index] = npdev;
+ if (!nphb->npu.nmmu_flush) {
+ /*
+ * If we're not explicitly flushing ourselves we need to mark
+ * the thread for global flushes
+ */
+ npu_context->nmmu_flush = false;
+ mm_context_add_copro(mm);
+ } else
+ npu_context->nmmu_flush = true;
+
return npu_context;
}
EXPORT_SYMBOL(pnv_npu2_init_context);
@@ -731,6 +744,9 @@ static void pnv_npu2_release_context(struct kref *kref)
struct npu_context *npu_context =
container_of(kref, struct npu_context, kref);
+ if (!npu_context->nmmu_flush)
+ mm_context_remove_copro(npu_context->mm);
+
npu_context->mm->context.npu_context = NULL;
mmu_notifier_unregister(&npu_context->mn,
npu_context->mm);
@@ -819,6 +835,8 @@ int pnv_npu2_init(struct pnv_phb *phb)
static int npu_index;
uint64_t rc = 0;
+ phb->npu.nmmu_flush =
+ of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
for_each_child_of_node(phb->hose->dn, dn) {
gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
if (gpdev) {
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index cf33769a7b72..18a355fa15e8 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -1,7 +1,7 @@
/*
* PowerNV OPAL asynchronous completion interfaces
*
- * Copyright 2013 IBM Corp.
+ * Copyright 2013-2017 IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -23,40 +23,50 @@
#include <asm/machdep.h>
#include <asm/opal.h>
-#define N_ASYNC_COMPLETIONS 64
+enum opal_async_token_state {
+ ASYNC_TOKEN_UNALLOCATED = 0,
+ ASYNC_TOKEN_ALLOCATED,
+ ASYNC_TOKEN_DISPATCHED,
+ ASYNC_TOKEN_ABANDONED,
+ ASYNC_TOKEN_COMPLETED
+};
+
+struct opal_async_token {
+ enum opal_async_token_state state;
+ struct opal_msg response;
+};
-static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
-static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
static DEFINE_SPINLOCK(opal_async_comp_lock);
static struct semaphore opal_async_sem;
-static struct opal_msg *opal_async_responses;
static unsigned int opal_max_async_tokens;
+static struct opal_async_token *opal_async_tokens;
-int __opal_async_get_token(void)
+static int __opal_async_get_token(void)
{
unsigned long flags;
- int token;
+ int i, token = -EBUSY;
spin_lock_irqsave(&opal_async_comp_lock, flags);
- token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
- if (token >= opal_max_async_tokens) {
- token = -EBUSY;
- goto out;
- }
- if (__test_and_set_bit(token, opal_async_token_map)) {
- token = -EBUSY;
- goto out;
+ for (i = 0; i < opal_max_async_tokens; i++) {
+ if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) {
+ opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED;
+ token = i;
+ break;
+ }
}
- __clear_bit(token, opal_async_complete_map);
-
-out:
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
return token;
}
+/*
+ * Note: If the returned token is used in an opal call and opal returns
+ * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or
+ * opal_async_wait_response_interruptible() at least once before calling another
+ * opal_async_* function
+ */
int opal_async_get_token_interruptible(void)
{
int token;
@@ -73,9 +83,10 @@ int opal_async_get_token_interruptible(void)
}
EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
-int __opal_async_release_token(int token)
+static int __opal_async_release_token(int token)
{
unsigned long flags;
+ int rc;
if (token < 0 || token >= opal_max_async_tokens) {
pr_err("%s: Passed token is out of range, token %d\n",
@@ -84,11 +95,26 @@ int __opal_async_release_token(int token)
}
spin_lock_irqsave(&opal_async_comp_lock, flags);
- __set_bit(token, opal_async_complete_map);
- __clear_bit(token, opal_async_token_map);
+ switch (opal_async_tokens[token].state) {
+ case ASYNC_TOKEN_COMPLETED:
+ case ASYNC_TOKEN_ALLOCATED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED;
+ rc = 0;
+ break;
+ /*
+ * DISPATCHED and ABANDONED tokens must wait for OPAL to respond.
+ * Mark a DISPATCHED token as ABANDONED so that the response handling
+ * code knows no one cares and that it can free it then.
+ */
+ case ASYNC_TOKEN_DISPATCHED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
+ /* Fall through */
+ default:
+ rc = 1;
+ }
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
- return 0;
+ return rc;
}
int opal_async_release_token(int token)
@@ -96,12 +122,10 @@ int opal_async_release_token(int token)
int ret;
ret = __opal_async_release_token(token);
- if (ret)
- return ret;
-
- up(&opal_async_sem);
+ if (!ret)
+ up(&opal_async_sem);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(opal_async_release_token);
@@ -117,22 +141,83 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
return -EINVAL;
}
- /* Wakeup the poller before we wait for events to speed things
+ /*
+ * There is no need to mark the token as dispatched, wait_event()
+ * will block until the token completes.
+ *
+ * Wakeup the poller before we wait for events to speed things
* up on platforms or simulators where the interrupts aren't
* functional.
*/
opal_wake_poller();
- wait_event(opal_async_wait, test_bit(token, opal_async_complete_map));
- memcpy(msg, &opal_async_responses[token], sizeof(*msg));
+ wait_event(opal_async_wait, opal_async_tokens[token].state
+ == ASYNC_TOKEN_COMPLETED);
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
return 0;
}
EXPORT_SYMBOL_GPL(opal_async_wait_response);
+int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg)
+{
+ unsigned long flags;
+ int ret;
+
+ if (token >= opal_max_async_tokens) {
+ pr_err("%s: Invalid token passed\n", __func__);
+ return -EINVAL;
+ }
+
+ if (!msg) {
+ pr_err("%s: Invalid message pointer passed\n", __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * The first time this gets called we mark the token as DISPATCHED
+ * so that if wait_event_interruptible() returns not zero and the
+ * caller frees the token, we know not to actually free the token
+ * until the response comes.
+ *
+ * Only change if the token is ALLOCATED - it may have been
+ * completed even before the caller gets around to calling this
+ * the first time.
+ *
+ * There is also a dirty great comment at the token allocation
+ * function that if the opal call returns OPAL_ASYNC_COMPLETION to
+ * the caller then the caller *must* call this or the not
+ * interruptible version before doing anything else with the
+ * token.
+ */
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) {
+ spin_lock_irqsave(&opal_async_comp_lock, flags);
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED)
+ opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED;
+ spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ }
+
+ /*
+ * Wakeup the poller before we wait for events to speed things
+ * up on platforms or simulators where the interrupts aren't
+ * functional.
+ */
+ opal_wake_poller();
+ ret = wait_event_interruptible(opal_async_wait,
+ opal_async_tokens[token].state ==
+ ASYNC_TOKEN_COMPLETED);
+ if (!ret)
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible);
+
+/* Called from interrupt context */
static int opal_async_comp_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
{
struct opal_msg *comp_msg = msg;
+ enum opal_async_token_state state;
unsigned long flags;
uint64_t token;
@@ -140,11 +225,17 @@ static int opal_async_comp_event(struct notifier_block *nb,
return 0;
token = be64_to_cpu(comp_msg->params[0]);
- memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
spin_lock_irqsave(&opal_async_comp_lock, flags);
- __set_bit(token, opal_async_complete_map);
+ state = opal_async_tokens[token].state;
+ opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED;
spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ if (state == ASYNC_TOKEN_ABANDONED) {
+ /* Free the token, no one else will */
+ opal_async_release_token(token);
+ return 0;
+ }
+ memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg));
wake_up(&opal_async_wait);
return 0;
@@ -178,32 +269,23 @@ int __init opal_async_comp_init(void)
}
opal_max_async_tokens = be32_to_cpup(async);
- if (opal_max_async_tokens > N_ASYNC_COMPLETIONS)
- opal_max_async_tokens = N_ASYNC_COMPLETIONS;
+ opal_async_tokens = kcalloc(opal_max_async_tokens,
+ sizeof(*opal_async_tokens), GFP_KERNEL);
+ if (!opal_async_tokens) {
+ err = -ENOMEM;
+ goto out_opal_node;
+ }
err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
&opal_async_comp_nb);
if (err) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, err);
+ kfree(opal_async_tokens);
goto out_opal_node;
}
- opal_async_responses = kzalloc(
- sizeof(*opal_async_responses) * opal_max_async_tokens,
- GFP_KERNEL);
- if (!opal_async_responses) {
- pr_err("%s: Out of memory, failed to do asynchronous "
- "completion init\n", __func__);
- err = -ENOMEM;
- goto out_opal_node;
- }
-
- /* Initialize to 1 less than the maximum tokens available, as we may
- * require to pop one during emergency through synchronous call to
- * __opal_async_get_token()
- */
- sema_init(&opal_async_sem, opal_max_async_tokens - 1);
+ sema_init(&opal_async_sem, opal_max_async_tokens);
out_opal_node:
of_node_put(opal_node);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index d78fed728cdf..c9e1a4ff295c 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -1,5 +1,5 @@
/*
- * OPAL hypervisor Maintenance interrupt handling support in PowreNV.
+ * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531fae20..465ea105b771 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -153,6 +153,22 @@ static void disable_core_pmu_counters(void)
put_online_cpus();
}
+int get_max_nest_dev(void)
+{
+ struct device_node *node;
+ u32 pmu_units = 0, type;
+
+ for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) {
+ if (of_property_read_u32(node, "type", &type))
+ continue;
+
+ if (type == IMC_TYPE_CHIP)
+ pmu_units++;
+ }
+
+ return pmu_units;
+}
+
static int opal_imc_counters_probe(struct platform_device *pdev)
{
struct device_node *imc_dev = pdev->dev.of_node;
@@ -191,8 +207,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
break;
}
- if (!imc_pmu_create(imc_dev, pmu_count, domain))
- pmu_count++;
+ if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
+ if (domain == IMC_DOMAIN_NEST)
+ pmu_count++;
+ }
}
return 0;
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index ecdcba9d1220..9d1b8c0aaf93 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -174,8 +174,14 @@ void opal_event_shutdown(void)
/* First free interrupts, which will also mask them */
for (i = 0; i < opal_irq_count; i++) {
- if (opal_irqs[i])
+ if (!opal_irqs[i])
+ continue;
+
+ if (in_interrupt())
+ disable_irq_nosync(opal_irqs[i]);
+ else
free_irq(opal_irqs[i], NULL);
+
opal_irqs[i] = 0;
}
}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 4495f428b500..d9916ea62305 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -1,5 +1,5 @@
/*
- * OPAL asynchronus Memory error handling support in PowreNV.
+ * OPAL asynchronus Memory error handling support in PowerNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index aa267f120033..0a7074bb91dc 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -19,13 +19,10 @@
*/
#include <linux/delay.h>
-#include <linux/mutex.h>
#include <linux/of_platform.h>
#include <asm/opal.h>
#include <asm/machdep.h>
-static DEFINE_MUTEX(opal_sensor_mutex);
-
/*
* This will return sensor information to driver based on the requested sensor
* handle. A handle is an opaque id for the powernv, read by the driver from the
@@ -38,13 +35,9 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
__be32 data;
token = opal_async_get_token_interruptible();
- if (token < 0) {
- pr_err("%s: Couldn't get the token, returning\n", __func__);
- ret = token;
- goto out;
- }
+ if (token < 0)
+ return token;
- mutex_lock(&opal_sensor_mutex);
ret = opal_sensor_read(sensor_hndl, token, &data);
switch (ret) {
case OPAL_ASYNC_COMPLETION:
@@ -52,7 +45,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
if (ret) {
pr_err("%s: Failed to wait for the async response, %d\n",
__func__, ret);
- goto out_token;
+ goto out;
}
ret = opal_error_code(opal_get_async_rc(msg));
@@ -73,10 +66,8 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
break;
}
-out_token:
- mutex_unlock(&opal_sensor_mutex);
- opal_async_release_token(token);
out:
+ opal_async_release_token(token);
return ret;
}
EXPORT_SYMBOL_GPL(opal_get_sensor_data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 8c1ede2d3f7e..6f4b00a2ac46 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -94,7 +94,7 @@ opal_return:
* bytes (always BE) since MSR:LE will end up fixed up as a side
* effect of the rfid.
*/
- FIXUP_ENDIAN
+ FIXUP_ENDIAN_HV
ld r2,PACATOC(r13);
lwz r4,8(r1);
ld r5,PPC_LR_STKOFF(r1);
@@ -120,7 +120,7 @@ opal_real_call:
hrfid
opal_return_realmode:
- FIXUP_ENDIAN
+ FIXUP_ENDIAN_HV
ld r2,PACATOC(r13);
lwz r11,8(r1);
ld r12,PPC_LR_STKOFF(r1)
@@ -307,6 +307,7 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 65c79ecf5a4d..041ddbd1fc57 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -998,6 +998,7 @@ int opal_error_code(int rc)
case OPAL_PARAMETER: return -EINVAL;
case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
+ case OPAL_BUSY:
case OPAL_BUSY_EVENT: return -EBUSY;
case OPAL_NO_MEM: return -ENOMEM;
case OPAL_PERMISSION: return -EPERM;
@@ -1037,3 +1038,4 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
/* Export this for KVM */
EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
EXPORT_SYMBOL_GPL(opal_int_eoi);
+EXPORT_SYMBOL_GPL(opal_error_code);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57f9e55f4352..749055553064 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1002,9 +1002,12 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
}
/*
- * After doing so, there would be a "hole" in the /proc/iomem when
- * offset is a positive value. It looks like the device return some
- * mmio back to the system, which actually no one could use it.
+ * Since M64 BAR shares segments among all possible 256 PEs,
+ * we have to shift the beginning of PF IOV BAR to make it start from
+ * the segment which belongs to the PE number assigned to the first VF.
+ * This creates a "hole" in the /proc/iomem which could be used for
+ * allocating other resources so we reserve this area below and
+ * release when IOV is released.
*/
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
res = &dev->resource[i + PCI_IOV_RESOURCES];
@@ -1018,7 +1021,22 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
i, &res2, res, (offset > 0) ? "En" : "Dis",
num_vfs, offset);
+
+ if (offset < 0) {
+ devm_release_resource(&dev->dev, &pdn->holes[i]);
+ memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
+ }
+
pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+ if (offset > 0) {
+ pdn->holes[i].start = res2.start;
+ pdn->holes[i].end = res2.start + size * offset - 1;
+ pdn->holes[i].flags = IORESOURCE_BUS;
+ pdn->holes[i].name = "pnv_iov_reserved";
+ devm_request_resource(&dev->dev, res->parent,
+ &pdn->holes[i]);
+ }
}
return 0;
}
@@ -2779,7 +2797,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
return -EINVAL;
- if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
+ if (!is_power_of_2(window_size))
return -EINVAL;
/* Adjust direct table size from window_size and levels */
@@ -3293,8 +3311,7 @@ static void pnv_pci_ioda_fixup(void)
pnv_pci_ioda_create_dbgfs();
#ifdef CONFIG_EEH
- eeh_init();
- eeh_addr_cache_build();
+ pnv_eeh_post_init();
#endif
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b47f9406d97e..b772d7473896 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -188,6 +188,9 @@ struct pnv_phb {
/* Bitmask for MMIO register usage */
unsigned long mmio_atsd_usage;
+
+ /* Do we need to explicitly flush the nest mmu? */
+ bool nmmu_flush;
} npu;
#ifdef CONFIG_CXL_BASE
@@ -235,6 +238,7 @@ extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+extern int pnv_eeh_post_init(void);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index bbb73aa0eb8f..1edfbc1e40f4 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,6 +36,7 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/tm.h>
#include "powernv.h"
@@ -290,6 +291,7 @@ static void __init pnv_setup_machdep_opal(void)
ppc_md.restart = pnv_restart;
pm_power_off = pnv_power_off;
ppc_md.halt = pnv_halt;
+ /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
ppc_md.hmi_exception_early = opal_hmi_exception_early;
@@ -311,6 +313,28 @@ static int __init pnv_probe(void)
return 1;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void __init pnv_tm_init(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_OPAL) ||
+ !pvr_version_is(PVR_POWER9) ||
+ early_cpu_has_feature(CPU_FTR_TM))
+ return;
+
+ if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
+ return;
+
+ pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
+ cur_cpu_spec->cpu_features |= CPU_FTR_TM;
+ /* Make sure "normal" HTM is off (it should be) */
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
+ /* Turn on no suspend mode, and HTM no SC */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
+ PPC_FEATURE2_HTM_NOSC;
+ tm_suspend_disabled = true;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
/*
* Returns the cpu frequency for 'cpu' in Hz. This is used by
* /proc/cpuinfo
@@ -319,7 +343,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
{
unsigned long ret_freq;
- ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+ ret_freq = cpufreq_get(cpu) * 1000ul;
/*
* If the backend cpufreq driver does not exist,
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index c17f81e433f7..ba030669eca1 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -49,6 +49,13 @@
static void pnv_smp_setup_cpu(int cpu)
{
+ /*
+ * P9 workaround for CI vector load (see traps.c),
+ * enable the corresponding HMI interrupt
+ */
+ if (pvr_version_is(PVR_POWER9))
+ mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
+
if (xive_enabled())
xive_smp_setup_cpu();
else if (cpu != boot_cpuid)
@@ -290,6 +297,54 @@ static void __init pnv_smp_probe(void)
}
}
+static int pnv_system_reset_exception(struct pt_regs *regs)
+{
+ if (smp_handle_nmi_ipi(regs))
+ return 1;
+ return 0;
+}
+
+static int pnv_cause_nmi_ipi(int cpu)
+{
+ int64_t rc;
+
+ if (cpu >= 0) {
+ rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu));
+ if (rc != OPAL_SUCCESS)
+ return 0;
+ return 1;
+
+ } else if (cpu == NMI_IPI_ALL_OTHERS) {
+ bool success = true;
+ int c;
+
+
+ /*
+ * We do not use broadcasts (yet), because it's not clear
+ * exactly what semantics Linux wants or the firmware should
+ * provide.
+ */
+ for_each_online_cpu(c) {
+ if (c == smp_processor_id())
+ continue;
+
+ rc = opal_signal_system_reset(
+ get_hard_smp_processor_id(c));
+ if (rc != OPAL_SUCCESS)
+ success = false;
+ }
+ if (success)
+ return 1;
+
+ /*
+ * Caller will fall back to doorbells, which may pick
+ * up the remainders.
+ */
+ }
+
+ return 0;
+}
+
static struct smp_ops_t pnv_smp_ops = {
.message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
.cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */
@@ -308,6 +363,10 @@ static struct smp_ops_t pnv_smp_ops = {
/* This is called very early during platform setup_arch */
void __init pnv_smp_init(void)
{
+ if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
+ ppc_md.system_reset_exception = pnv_system_reset_exception;
+ pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
+ }
smp_ops = &pnv_smp_ops;
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
new file mode 100644
index 000000000000..ca22f1eae050
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include "vas.h"
+
+static struct dentry *vas_debugfs;
+
+static char *cop_to_str(int cop)
+{
+ switch (cop) {
+ case VAS_COP_TYPE_FAULT: return "Fault";
+ case VAS_COP_TYPE_842: return "NX-842 Normal Priority";
+ case VAS_COP_TYPE_842_HIPRI: return "NX-842 High Priority";
+ case VAS_COP_TYPE_GZIP: return "NX-GZIP Normal Priority";
+ case VAS_COP_TYPE_GZIP_HIPRI: return "NX-GZIP High Priority";
+ case VAS_COP_TYPE_FTW: return "Fast Thread-wakeup";
+ default: return "Unknown";
+ }
+}
+
+static int info_dbg_show(struct seq_file *s, void *private)
+{
+ struct vas_window *window = s->private;
+
+ mutex_lock(&vas_mutex);
+
+ /* ensure window is not unmapped */
+ if (!window->hvwc_map)
+ goto unlock;
+
+ seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
+ window->tx_win ? "Send" : "Receive");
+ seq_printf(s, "Pid : %d\n", window->pid);
+
+unlock:
+ mutex_unlock(&vas_mutex);
+ return 0;
+}
+
+static int info_dbg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, info_dbg_show, inode->i_private);
+}
+
+static const struct file_operations info_fops = {
+ .open = info_dbg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static inline void print_reg(struct seq_file *s, struct vas_window *win,
+ char *name, u32 reg)
+{
+ seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
+}
+
+static int hvwc_dbg_show(struct seq_file *s, void *private)
+{
+ struct vas_window *window = s->private;
+
+ mutex_lock(&vas_mutex);
+
+ /* ensure window is not unmapped */
+ if (!window->hvwc_map)
+ goto unlock;
+
+ print_reg(s, window, VREG(LPID));
+ print_reg(s, window, VREG(PID));
+ print_reg(s, window, VREG(XLATE_MSR));
+ print_reg(s, window, VREG(XLATE_LPCR));
+ print_reg(s, window, VREG(XLATE_CTL));
+ print_reg(s, window, VREG(AMR));
+ print_reg(s, window, VREG(SEIDR));
+ print_reg(s, window, VREG(FAULT_TX_WIN));
+ print_reg(s, window, VREG(OSU_INTR_SRC_RA));
+ print_reg(s, window, VREG(HV_INTR_SRC_RA));
+ print_reg(s, window, VREG(PSWID));
+ print_reg(s, window, VREG(LFIFO_BAR));
+ print_reg(s, window, VREG(LDATA_STAMP_CTL));
+ print_reg(s, window, VREG(LDMA_CACHE_CTL));
+ print_reg(s, window, VREG(LRFIFO_PUSH));
+ print_reg(s, window, VREG(CURR_MSG_COUNT));
+ print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT));
+ print_reg(s, window, VREG(LRX_WCRED));
+ print_reg(s, window, VREG(LRX_WCRED_ADDER));
+ print_reg(s, window, VREG(TX_WCRED));
+ print_reg(s, window, VREG(TX_WCRED_ADDER));
+ print_reg(s, window, VREG(LFIFO_SIZE));
+ print_reg(s, window, VREG(WINCTL));
+ print_reg(s, window, VREG(WIN_STATUS));
+ print_reg(s, window, VREG(WIN_CTX_CACHING_CTL));
+ print_reg(s, window, VREG(TX_RSVD_BUF_COUNT));
+ print_reg(s, window, VREG(LRFIFO_WIN_PTR));
+ print_reg(s, window, VREG(LNOTIFY_CTL));
+ print_reg(s, window, VREG(LNOTIFY_PID));
+ print_reg(s, window, VREG(LNOTIFY_LPID));
+ print_reg(s, window, VREG(LNOTIFY_TID));
+ print_reg(s, window, VREG(LNOTIFY_SCOPE));
+ print_reg(s, window, VREG(NX_UTIL_ADDER));
+unlock:
+ mutex_unlock(&vas_mutex);
+ return 0;
+}
+
+static int hvwc_dbg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, hvwc_dbg_show, inode->i_private);
+}
+
+static const struct file_operations hvwc_fops = {
+ .open = hvwc_dbg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void vas_window_free_dbgdir(struct vas_window *window)
+{
+ if (window->dbgdir) {
+ debugfs_remove_recursive(window->dbgdir);
+ kfree(window->dbgname);
+ window->dbgdir = NULL;
+ window->dbgname = NULL;
+ }
+}
+
+void vas_window_init_dbgdir(struct vas_window *window)
+{
+ struct dentry *f, *d;
+
+ if (!window->vinst->dbgdir)
+ return;
+
+ window->dbgname = kzalloc(16, GFP_KERNEL);
+ if (!window->dbgname)
+ return;
+
+ snprintf(window->dbgname, 16, "w%d", window->winid);
+
+ d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir);
+ if (IS_ERR(d))
+ goto free_name;
+
+ window->dbgdir = d;
+
+ f = debugfs_create_file("info", 0444, d, window, &info_fops);
+ if (IS_ERR(f))
+ goto remove_dir;
+
+ f = debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
+ if (IS_ERR(f))
+ goto remove_dir;
+
+ return;
+
+free_name:
+ kfree(window->dbgname);
+ window->dbgname = NULL;
+
+remove_dir:
+ debugfs_remove_recursive(window->dbgdir);
+ window->dbgdir = NULL;
+}
+
+void vas_instance_init_dbgdir(struct vas_instance *vinst)
+{
+ struct dentry *d;
+
+ if (!vas_debugfs)
+ return;
+
+ vinst->dbgname = kzalloc(16, GFP_KERNEL);
+ if (!vinst->dbgname)
+ return;
+
+ snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id);
+
+ d = debugfs_create_dir(vinst->dbgname, vas_debugfs);
+ if (IS_ERR(d))
+ goto free_name;
+
+ vinst->dbgdir = d;
+ return;
+
+free_name:
+ kfree(vinst->dbgname);
+ vinst->dbgname = NULL;
+ vinst->dbgdir = NULL;
+}
+
+void vas_init_dbgdir(void)
+{
+ vas_debugfs = debugfs_create_dir("vas", NULL);
+ if (IS_ERR(vas_debugfs))
+ vas_debugfs = NULL;
+}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 5aae845b8cd9..2b3eb01ab110 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -16,7 +16,8 @@
#include <linux/log2.h>
#include <linux/rcupdate.h>
#include <linux/cred.h>
-
+#include <asm/switch_to.h>
+#include <asm/ppc-opcode.h>
#include "vas.h"
#include "copy-paste.h"
@@ -40,6 +41,16 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len
pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
}
+u64 vas_win_paste_addr(struct vas_window *win)
+{
+ u64 addr;
+
+ compute_paste_address(win, &addr, NULL);
+
+ return addr;
+}
+EXPORT_SYMBOL(vas_win_paste_addr);
+
static inline void get_hvwc_mmio_bar(struct vas_window *window,
u64 *start, int *len)
{
@@ -145,23 +156,37 @@ static void unmap_paste_region(struct vas_window *window)
}
/*
- * Unmap the MMIO regions for a window.
+ * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
+ * unmap when the window's debugfs dir is in use. This serializes close
+ * of a window even on another VAS instance but since its not a critical
+ * path, just minimize the time we hold the mutex for now. We can add
+ * a per-instance mutex later if necessary.
*/
static void unmap_winctx_mmio_bars(struct vas_window *window)
{
int len;
+ void *uwc_map;
+ void *hvwc_map;
u64 busaddr_start;
- if (window->hvwc_map) {
+ mutex_lock(&vas_mutex);
+
+ hvwc_map = window->hvwc_map;
+ window->hvwc_map = NULL;
+
+ uwc_map = window->uwc_map;
+ window->uwc_map = NULL;
+
+ mutex_unlock(&vas_mutex);
+
+ if (hvwc_map) {
get_hvwc_mmio_bar(window, &busaddr_start, &len);
- unmap_region(window->hvwc_map, busaddr_start, len);
- window->hvwc_map = NULL;
+ unmap_region(hvwc_map, busaddr_start, len);
}
- if (window->uwc_map) {
+ if (uwc_map) {
get_uwc_mmio_bar(window, &busaddr_start, &len);
- unmap_region(window->uwc_map, busaddr_start, len);
- window->uwc_map = NULL;
+ unmap_region(uwc_map, busaddr_start, len);
}
}
@@ -528,6 +553,9 @@ static void vas_window_free(struct vas_window *window)
struct vas_instance *vinst = window->vinst;
unmap_winctx_mmio_bars(window);
+
+ vas_window_free_dbgdir(window);
+
kfree(window);
vas_release_window_id(&vinst->ida, winid);
@@ -552,6 +580,8 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
if (map_winctx_mmio_bars(window))
goto out_free;
+ vas_window_init_dbgdir(window);
+
return window;
out_free:
@@ -569,6 +599,32 @@ static void put_rx_win(struct vas_window *rxwin)
}
/*
+ * Find the user space receive window given the @pswid.
+ * - We must have a valid vasid and it must belong to this instance.
+ * (so both send and receive windows are on the same VAS instance)
+ * - The window must refer to an OPEN, FTW, RECEIVE window.
+ *
+ * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
+ */
+static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
+{
+ int vasid, winid;
+ struct vas_window *rxwin;
+
+ decode_pswid(pswid, &vasid, &winid);
+
+ if (vinst->vas_id != vasid)
+ return ERR_PTR(-EINVAL);
+
+ rxwin = vinst->windows[winid];
+
+ if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
+ return ERR_PTR(-EINVAL);
+
+ return rxwin;
+}
+
+/*
* Get the VAS receive window associated with NX engine identified
* by @cop and if applicable, @pswid.
*
@@ -581,10 +637,10 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
mutex_lock(&vinst->mutex);
- if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI)
- rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
+ if (cop == VAS_COP_TYPE_FTW)
+ rxwin = get_user_rxwin(vinst, pswid);
else
- rxwin = ERR_PTR(-EINVAL);
+ rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
if (!IS_ERR(rxwin))
atomic_inc(&rxwin->num_txwins);
@@ -674,15 +730,18 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
winctx->rx_fifo = rxattr->rx_fifo;
winctx->rx_fifo_size = rxattr->rx_fifo_size;
- winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+ winctx->wcreds_max = rxwin->wcreds_max;
winctx->pin_win = rxattr->pin_win;
winctx->nx_win = rxattr->nx_win;
winctx->fault_win = rxattr->fault_win;
+ winctx->user_win = rxattr->user_win;
+ winctx->rej_no_credit = rxattr->rej_no_credit;
winctx->rx_word_mode = rxattr->rx_win_ord_mode;
winctx->tx_word_mode = rxattr->tx_win_ord_mode;
winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
+ winctx->notify_early = rxattr->notify_early;
if (winctx->nx_win) {
winctx->data_stamp = true;
@@ -723,7 +782,10 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
static bool rx_win_args_valid(enum vas_cop_type cop,
struct vas_rx_win_attr *attr)
{
- dump_rx_win_attr(attr);
+ pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
+ attr->fault_win, attr->notify_disable,
+ attr->intr_disable, attr->notify_early,
+ attr->rx_fifo_size);
if (cop >= VAS_COP_TYPE_MAX)
return false;
@@ -735,6 +797,9 @@ static bool rx_win_args_valid(enum vas_cop_type cop,
if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
return false;
+ if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
+ return false;
+
if (attr->nx_win) {
/* cannot be fault or user window if it is nx */
if (attr->fault_win || attr->user_win)
@@ -835,6 +900,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
rxwin->nx_win = rxattr->nx_win;
rxwin->user_win = rxattr->user_win;
rxwin->cop = cop;
+ rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
if (rxattr->user_win)
rxwin->pid = task_pid_vnr(current);
@@ -884,21 +950,23 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
*/
memset(winctx, 0, sizeof(struct vas_winctx));
- winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+ winctx->wcreds_max = txwin->wcreds_max;
winctx->user_win = txattr->user_win;
winctx->nx_win = txwin->rxwin->nx_win;
winctx->pin_win = txattr->pin_win;
+ winctx->rej_no_credit = txattr->rej_no_credit;
+ winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;
winctx->rx_wcred_mode = txattr->rx_wcred_mode;
winctx->tx_wcred_mode = txattr->tx_wcred_mode;
winctx->rx_word_mode = txattr->rx_win_ord_mode;
winctx->tx_word_mode = txattr->tx_win_ord_mode;
+ winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;
- if (winctx->nx_win) {
+ winctx->intr_disable = true;
+ if (winctx->nx_win)
winctx->data_stamp = true;
- winctx->intr_disable = true;
- }
winctx->lpid = txattr->lpid;
winctx->pidr = txattr->pidr;
@@ -921,6 +989,9 @@ static bool tx_win_args_valid(enum vas_cop_type cop,
if (cop > VAS_COP_TYPE_MAX)
return false;
+ if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
+ return false;
+
if (attr->user_win &&
(cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
return false;
@@ -940,6 +1011,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
if (!tx_win_args_valid(cop, attr))
return ERR_PTR(-EINVAL);
+ /*
+ * If caller did not specify a vasid but specified the PSWID of a
+ * receive window (applicable only to FTW windows), use the vasid
+ * from that receive window.
+ */
+ if (vasid == -1 && attr->pswid)
+ decode_pswid(attr->pswid, &vasid, NULL);
+
vinst = find_vas_instance(vasid);
if (!vinst) {
pr_devel("vasid %d not found!\n", vasid);
@@ -958,11 +1037,13 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
goto put_rxwin;
}
+ txwin->cop = cop;
txwin->tx_win = 1;
txwin->rxwin = rxwin;
txwin->nx_win = txwin->rxwin->nx_win;
txwin->pid = attr->pid;
txwin->user_win = attr->user_win;
+ txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
init_winctx_for_txwin(txwin, attr, &winctx);
@@ -984,6 +1065,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
}
}
+ /*
+ * Now that we have a send window, ensure context switch issues
+ * CP_ABORT for this thread.
+ */
+ rc = -EINVAL;
+ if (set_thread_uses_vas() < 0)
+ goto free_window;
+
set_vinst_win(vinst, txwin);
return txwin;
@@ -1038,50 +1127,110 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
else
rc = -EINVAL;
- print_fifo_msg_count(txwin);
+ pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
+ read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
return rc;
}
EXPORT_SYMBOL_GPL(vas_paste_crb);
+/*
+ * If credit checking is enabled for this window, poll for the return
+ * of window credits (i.e for NX engines to process any outstanding CRBs).
+ * Since NX-842 waits for the CRBs to be processed before closing the
+ * window, we should not have to wait for too long.
+ *
+ * TODO: We retry in 10ms intervals now. We could/should probably peek at
+ * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
+ * CRBs on the FIFO and compute the delay dynamically on each retry.
+ * But that is not really needed until we support NX-GZIP access from
+ * user space. (NX-842 driver waits for CSB and Fast thread-wakeup
+ * doesn't use credit checking).
+ */
+static void poll_window_credits(struct vas_window *window)
+{
+ u64 val;
+ int creds, mode;
+
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ if (window->tx_win)
+ mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
+ else
+ mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);
+
+ if (!mode)
+ return;
+retry:
+ if (window->tx_win) {
+ val = read_hvwc_reg(window, VREG(TX_WCRED));
+ creds = GET_FIELD(VAS_TX_WCRED, val);
+ } else {
+ val = read_hvwc_reg(window, VREG(LRX_WCRED));
+ creds = GET_FIELD(VAS_LRX_WCRED, val);
+ }
+
+ if (creds < window->wcreds_max) {
+ val = 0;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(msecs_to_jiffies(10));
+ goto retry;
+ }
+}
+
+/*
+ * Wait for the window to go to "not-busy" state. It should only take a
+ * short time to queue a CRB, so window should not be busy for too long.
+ * Trying 5ms intervals.
+ */
static void poll_window_busy_state(struct vas_window *window)
{
int busy;
u64 val;
retry:
- /*
- * Poll Window Busy flag
- */
val = read_hvwc_reg(window, VREG(WIN_STATUS));
busy = GET_FIELD(VAS_WIN_BUSY, val);
if (busy) {
val = 0;
set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
+ schedule_timeout(msecs_to_jiffies(5));
goto retry;
}
}
+/*
+ * Have the hardware cast a window out of cache and wait for it to
+ * be completed.
+ *
+ * NOTE: It can take a relatively long time to cast the window context
+ * out of the cache. It is not strictly necessary to cast out if:
+ *
+ * - we clear the "Pin Window" bit (so hardware is free to evict)
+ *
+ * - we re-initialize the window context when it is reassigned.
+ *
+ * We do the former in vas_win_close() and latter in vas_win_open().
+ * So, ignoring the cast-out for now. We can add it as needed. If
+ * casting out becomes necessary we should consider offloading the
+ * job to a worker thread, so the window close can proceed quickly.
+ */
static void poll_window_castout(struct vas_window *window)
{
- int cached;
- u64 val;
+ /* stub for now */
+}
- /* Cast window context out of the cache */
-retry:
- val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
- cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
- if (cached) {
- val = 0ULL;
- val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
- val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
- write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+/*
+ * Unpin and close a window so no new requests are accepted and the
+ * hardware can evict this window from cache if necessary.
+ */
+static void unpin_close_window(struct vas_window *window)
+{
+ u64 val;
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ);
- goto retry;
- }
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+ write_hvwc_reg(window, VREG(WINCTL), val);
}
/*
@@ -1098,8 +1247,6 @@ retry:
*/
int vas_win_close(struct vas_window *window)
{
- u64 val;
-
if (!window)
return 0;
@@ -1115,11 +1262,9 @@ int vas_win_close(struct vas_window *window)
poll_window_busy_state(window);
- /* Unpin window from cache and close it */
- val = read_hvwc_reg(window, VREG(WINCTL));
- val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
- val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
- write_hvwc_reg(window, VREG(WINCTL), val);
+ unpin_close_window(window);
+
+ poll_window_credits(window);
poll_window_castout(window);
@@ -1132,3 +1277,12 @@ int vas_win_close(struct vas_window *window)
return 0;
}
EXPORT_SYMBOL_GPL(vas_win_close);
+
+/*
+ * Return a system-wide unique window id for the window @win.
+ */
+u32 vas_win_id(struct vas_window *win)
+{
+ return encode_pswid(win->vinst->vas_id, win->winid);
+}
+EXPORT_SYMBOL_GPL(vas_win_id);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index 565a4878fefa..aebbe95c9230 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -18,15 +18,18 @@
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/of.h>
+#include <asm/prom.h>
#include "vas.h"
-static DEFINE_MUTEX(vas_mutex);
+DEFINE_MUTEX(vas_mutex);
static LIST_HEAD(vas_instances);
+static DEFINE_PER_CPU(int, cpu_vas_id);
+
static int init_vas_instance(struct platform_device *pdev)
{
- int rc, vasid;
+ int rc, cpu, vasid;
struct resource *res;
struct vas_instance *vinst;
struct device_node *dn = pdev->dev.of_node;
@@ -74,10 +77,17 @@ static int init_vas_instance(struct platform_device *pdev)
"paste_win_id_shift 0x%llx\n", pdev->name, vasid,
vinst->paste_base_addr, vinst->paste_win_id_shift);
+ for_each_possible_cpu(cpu) {
+ if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
+ per_cpu(cpu_vas_id, cpu) = vasid;
+ }
+
mutex_lock(&vas_mutex);
list_add(&vinst->node, &vas_instances);
mutex_unlock(&vas_mutex);
+ vas_instance_init_dbgdir(vinst);
+
dev_set_drvdata(&pdev->dev, vinst);
return 0;
@@ -98,6 +108,10 @@ struct vas_instance *find_vas_instance(int vasid)
struct vas_instance *vinst;
mutex_lock(&vas_mutex);
+
+ if (vasid == -1)
+ vasid = per_cpu(cpu_vas_id, smp_processor_id());
+
list_for_each(ent, &vas_instances) {
vinst = list_entry(ent, struct vas_instance, node);
if (vinst->vas_id == vasid) {
@@ -111,6 +125,18 @@ struct vas_instance *find_vas_instance(int vasid)
return NULL;
}
+int chip_to_vas_id(int chipid)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu_to_chip_id(cpu) == chipid)
+ return per_cpu(cpu_vas_id, cpu);
+ }
+ return -1;
+}
+EXPORT_SYMBOL(chip_to_vas_id);
+
static int vas_probe(struct platform_device *pdev)
{
return init_vas_instance(pdev);
@@ -134,6 +160,8 @@ static int __init vas_init(void)
int found = 0;
struct device_node *dn;
+ vas_init_dbgdir();
+
platform_driver_register(&vas_driver);
for_each_compatible_node(dn, NULL, "ibm,vas") {
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index 38dee5d50f31..ae0100fd35bb 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -13,6 +13,8 @@
#include <linux/idr.h>
#include <asm/vas.h>
#include <linux/io.h>
+#include <linux/dcache.h>
+#include <linux/mutex.h>
/*
* Overview of Virtual Accelerator Switchboard (VAS).
@@ -106,8 +108,8 @@
*
* TODO: Needs tuning for per-process credits
*/
-#define VAS_WCREDS_MIN 16
-#define VAS_WCREDS_MAX ((64 << 10) - 1)
+#define VAS_RX_WCREDS_MAX ((64 << 10) - 1)
+#define VAS_TX_WCREDS_MAX ((4 << 10) - 1)
#define VAS_WCREDS_DEFAULT (1 << 10)
/*
@@ -259,6 +261,16 @@
#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63)
/*
+ * VREG(x):
+ * Expand a register's short name (eg: LPID) into two parameters:
+ * - the register's short name in string form ("LPID"), and
+ * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
+ * register's offset in the window context
+ */
+#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
+#define VREG(r) VREG_SFX(r, _OFFSET)
+
+/*
* Local Notify Scope Control Register. (Receive windows only).
*/
enum vas_notify_scope {
@@ -307,6 +319,9 @@ struct vas_instance {
struct mutex mutex;
struct vas_window *rxwin[VAS_COP_TYPE_MAX];
struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+
+ char *dbgname;
+ struct dentry *dbgdir;
};
/*
@@ -322,6 +337,10 @@ struct vas_window {
void *hvwc_map; /* HV window context */
void *uwc_map; /* OS/User window context */
pid_t pid; /* Linux process id of owner */
+ int wcreds_max; /* Window credits */
+
+ char *dbgname;
+ struct dentry *dbgdir;
/* Fields applicable only to send windows */
void *paste_kaddr;
@@ -383,45 +402,23 @@ struct vas_winctx {
enum vas_notify_after_count notify_after_count;
};
-extern struct vas_instance *find_vas_instance(int vasid);
+extern struct mutex vas_mutex;
-/*
- * VREG(x):
- * Expand a register's short name (eg: LPID) into two parameters:
- * - the register's short name in string form ("LPID"), and
- * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
- * register's offset in the window context
- */
-#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
-#define VREG(r) VREG_SFX(r, _OFFSET)
-
-#ifdef vas_debug
-static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
-{
- pr_err("fault %d, notify %d, intr %d early %d\n",
- attr->fault_win, attr->notify_disable,
- attr->intr_disable, attr->notify_early);
-
- pr_err("rx_fifo_size %d, max value %d\n",
- attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX);
-}
+extern struct vas_instance *find_vas_instance(int vasid);
+extern void vas_init_dbgdir(void);
+extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
+extern void vas_window_init_dbgdir(struct vas_window *win);
+extern void vas_window_free_dbgdir(struct vas_window *win);
static inline void vas_log_write(struct vas_window *win, char *name,
void *regptr, u64 val)
{
if (val)
- pr_err("%swin #%d: %s reg %p, val 0x%016llx\n",
+ pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
win->tx_win ? "Tx" : "Rx", win->winid, name,
regptr, val);
}
-#else /* vas_debug */
-
-#define vas_log_write(win, name, reg, val)
-#define dump_rx_win_attr(attr)
-
-#endif /* vas_debug */
-
static inline void write_uwc_reg(struct vas_window *win, char *name,
s32 reg, u64 val)
{
@@ -450,18 +447,32 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
return in_be64(win->hvwc_map+reg);
}
-#ifdef vas_debug
-
-static void print_fifo_msg_count(struct vas_window *txwin)
+/*
+ * Encode/decode the Partition Send Window ID (PSWID) for a window in
+ * a way that we can uniquely identify any window in the system. i.e.
+ * we should be able to locate the 'struct vas_window' given the PSWID.
+ *
+ * Bits Usage
+ * 0:7 VAS id (8 bits)
+ * 8:15 Unused, 0 (3 bits)
+ * 16:31 Window id (16 bits)
+ */
+static inline u32 encode_pswid(int vasid, int winid)
{
- uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o);
- pr_devel("Winid %d, Msg count %llu\n", txwin->winid,
- (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
-}
-#else /* vas_debug */
+ u32 pswid = 0;
-#define print_fifo_msg_count(window)
+ pswid |= vasid << (31 - 7);
+ pswid |= winid;
-#endif /* vas_debug */
+ return pswid;
+}
+
+static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
+{
+ if (vasid)
+ *vasid = pswid >> (31 - 7) & 0xFF;
+ if (winid)
+ *winid = pswid & 0xFFFF;
+}
#endif /* _VAS_H */