27 files changed, 3863 insertions, 531 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 6722073e339b..03605f8fc0dc 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -512,5 +512,6 @@ source "drivers/misc/mic/Kconfig"
 source "drivers/misc/genwqe/Kconfig"
 source "drivers/misc/echo/Kconfig"
 source "drivers/misc/cxl/Kconfig"
+source "drivers/misc/ocxl/Kconfig"
 source "drivers/misc/cardreader/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 8d8cc096063b..c3c8624f4d95 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -55,7 +55,8 @@ obj-$(CONFIG_CXL_BASE)		+= cxl/
 obj-$(CONFIG_ASPEED_LPC_CTRL)	+= aspeed-lpc-ctrl.o
 obj-$(CONFIG_ASPEED_LPC_SNOOP)	+= aspeed-lpc-snoop.o
 obj-$(CONFIG_PCI_ENDPOINT_TEST)	+= pci_endpoint_test.o
-obj-$(CONFIG_MISC_RTSX)	+= cardreader/
+obj-$(CONFIG_OCXL)		+= ocxl/
+obj-$(CONFIG_MISC_RTSX)		+= cardreader/
 
 lkdtm-$(CONFIG_LKDTM)		+= lkdtm_core.o
 lkdtm-$(CONFIG_LKDTM)		+= lkdtm_bugs.o
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 12a41b2753f0..7ff315ad3692 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
 	ctx->pid = NULL; /* Set in start work ioctl */
 	mutex_init(&ctx->mapping_lock);
 	ctx->mapping = NULL;
+	ctx->tidr = 0;
+	ctx->assign_tidr = false;
 
 	if (cxl_is_power8()) {
 		spin_lock_init(&ctx->sste_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a798c2ccd67d..4f015da78f28 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -630,6 +630,9 @@ struct cxl_context {
 	struct list_head extra_irq_contexts;
 
 	struct mm_struct *mm;
+
+	u16 tidr;
+	bool assign_tidr;
 };
 
 struct cxl_irq_info;
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..30ccba436b3b 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task,
 		 */
 		attr->pid = mm->context.id;
 		mmput(mm);
+		attr->tid = task->thread.tidr;
 	} else {
 		attr->pid = 0;
+		attr->tid = 0;
 	}
-	attr->tid = 0;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 90341ccda9bd..0162516f5e57 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 	 * flags are set it's invalid
 	 */
 	if (work.reserved1 || work.reserved2 || work.reserved3 ||
-	    work.reserved4 || work.reserved5 || work.reserved6 ||
+	    work.reserved4 || work.reserved5 ||
 	    (work.flags & ~CXL_START_WORK_ALL)) {
 		rc = -EINVAL;
 		goto out;
@@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 		rc =  -EINVAL;
 		goto out;
 	}
+
 	if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
 		goto out;
 
 	if (work.flags & CXL_START_WORK_AMR)
 		amr = work.amr & mfspr(SPRN_UAMOR);
 
+	if (work.flags & CXL_START_WORK_TID)
+		ctx->assign_tidr = true;
+
 	ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
 
 	/*
@@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 		goto out;
 	}
 
-	ctx->status = STARTED;
 	rc = 0;
+	if (work.flags & CXL_START_WORK_TID) {
+		work.tid = ctx->tidr;
+		if (copy_to_user(uwork, &work, sizeof(work)))
+			rc = -EFAULT;
+	}
+
+	ctx->status = STARTED;
+
 out:
 	mutex_unlock(&ctx->status_mutex);
 	return rc;
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 02b6b45b4c20..1b3d7c65ea3f 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <asm/synch.h>
+#include <asm/switch_to.h>
 #include <misc/cxl-base.h>
 
 #include "cxl.h"
@@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx)
 static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
 {
 	u32 pid;
+	int rc;
 
 	cxl_assign_psn_space(ctx);
 
@@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
 		pid = ctx->mm->context.id;
 	}
 
-	ctx->elem->common.tid = 0;
+	/* Assign a unique TIDR (thread id) for the current thread */
+	if (!(ctx->tidr) && (ctx->assign_tidr)) {
+		rc = set_thread_tidr(current);
+		if (rc)
+			return -ENODEV;
+		ctx->tidr = current->thread.tidr;
+		pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr);
+	}
+
+	ctx->elem->common.tid = cpu_to_be32(ctx->tidr);
 	ctx->elem->common.pid = cpu_to_be32(pid);
 
 	ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..758842f65a1b 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -125,8 +125,6 @@ static const struct pci_device_id cxl_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), },
-	{ PCI_DEVICE_CLASS(0x120000, ~0), },
-
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, cxl_pci_tbl);
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
index de58762097c4..68a1ac929917 100644
--- a/drivers/misc/eeprom/Kconfig
+++ b/drivers/misc/eeprom/Kconfig
@@ -4,6 +4,7 @@ config EEPROM_AT24
 	tristate "I2C EEPROMs / RAMs / ROMs from most vendors"
 	depends on I2C && SYSFS
 	select NVMEM
+	select REGMAP_I2C
 	help
 	  Enable this driver to get read/write support to most I2C EEPROMs
 	  and compatible devices like FRAMs, SRAMs, ROMs etc. After you
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 4d63ac8a82e0..01f9c4921c50 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -24,8 +24,10 @@
 #include <linux/acpi.h>
 #include <linux/i2c.h>
 #include <linux/nvmem-provider.h>
+#include <linux/regmap.h>
 #include <linux/platform_data/at24.h>
 #include <linux/pm_runtime.h>
+#include <linux/gpio/consumer.h>
 
 /*
  * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
@@ -55,14 +57,13 @@
  * which won't work on pure SMBus systems.
  */
 
+struct at24_client {
+	struct i2c_client *client;
+	struct regmap *regmap;
+};
+
 struct at24_data {
 	struct at24_platform_data chip;
-	int use_smbus;
-	int use_smbus_write;
-
-	ssize_t (*read_func)(struct at24_data *, char *, unsigned int, size_t);
-	ssize_t (*write_func)(struct at24_data *,
-			      const char *, unsigned int, size_t);
 
 	/*
 	 * Lock protects against activities from other Linux tasks,
@@ -70,18 +71,20 @@ struct at24_data {
 	 */
 	struct mutex lock;
 
-	u8 *writebuf;
-	unsigned write_max;
-	unsigned num_addresses;
+	unsigned int write_max;
+	unsigned int num_addresses;
+	unsigned int offset_adj;
 
 	struct nvmem_config nvmem_config;
 	struct nvmem_device *nvmem;
 
+	struct gpio_desc *wp_gpio;
+
 	/*
 	 * Some chips tie up multiple I2C addresses; dummy devices reserve
 	 * them for us, and we'll use them with SMBus calls.
 	 */
-	struct i2c_client *client[];
+	struct at24_client client[];
 };
 
 /*
@@ -93,27 +96,17 @@ struct at24_data {
  *
  * This value is forced to be a power of two so that writes align on pages.
  */
-static unsigned io_limit = 128;
-module_param(io_limit, uint, 0);
-MODULE_PARM_DESC(io_limit, "Maximum bytes per I/O (default 128)");
+static unsigned int at24_io_limit = 128;
+module_param_named(io_limit, at24_io_limit, uint, 0);
+MODULE_PARM_DESC(at24_io_limit, "Maximum bytes per I/O (default 128)");
 
 /*
  * Specs often allow 5 msec for a page write, sometimes 20 msec;
  * it's important to recover from write timeouts.
  */
-static unsigned write_timeout = 25;
-module_param(write_timeout, uint, 0);
-MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
-
-#define AT24_SIZE_BYTELEN 5
-#define AT24_SIZE_FLAGS 8
-
-#define AT24_BITMASK(x) (BIT(x) - 1)
-
-/* create non-zero magic value for given eeprom parameters */
-#define AT24_DEVICE_MAGIC(_len, _flags) 		\
-	((1 << AT24_SIZE_FLAGS | (_flags)) 		\
-	    << AT24_SIZE_BYTELEN | ilog2(_len))
+static unsigned int at24_write_timeout = 25;
+module_param_named(write_timeout, at24_write_timeout, uint, 0);
+MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)");
 
 /*
  * Both reads and writes fail if the previous write didn't complete yet. This
@@ -126,118 +119,123 @@ MODULE_PARM_DESC(write_timeout, "Time (in ms) to try writes (default 25)");
  * iteration of processing the request. Both should be unsigned integers
  * holding at least 32 bits.
  */
-#define loop_until_timeout(tout, op_time)				\
-	for (tout = jiffies + msecs_to_jiffies(write_timeout), op_time = 0; \
+#define at24_loop_until_timeout(tout, op_time)				\
+	for (tout = jiffies + msecs_to_jiffies(at24_write_timeout),	\
+	     op_time = 0;						\
 	     op_time ? time_before(op_time, tout) : true;		\
 	     usleep_range(1000, 1500), op_time = jiffies)
 
+struct at24_chip_data {
+	/*
+	 * these fields mirror their equivalents in
+	 * struct at24_platform_data
+	 */
+	u32 byte_len;
+	u8 flags;
+};
+
+#define AT24_CHIP_DATA(_name, _len, _flags)				\
+	static const struct at24_chip_data _name = {			\
+		.byte_len = _len, .flags = _flags,			\
+	}
+
+/* needs 8 addresses as A0-A2 are ignored */
+AT24_CHIP_DATA(at24_data_24c00, 128 / 8, AT24_FLAG_TAKE8ADDR);
+/* old variants can't be handled with this generic entry! */
+AT24_CHIP_DATA(at24_data_24c01, 1024 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs01, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c02, 2048 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs02, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24mac402, 48 / 8,
+	AT24_FLAG_MAC | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24mac602, 64 / 8,
+	AT24_FLAG_MAC | AT24_FLAG_READONLY);
+/* spd is a 24c02 in memory DIMMs */
+AT24_CHIP_DATA(at24_data_spd, 2048 / 8,
+	AT24_FLAG_READONLY | AT24_FLAG_IRUGO);
+AT24_CHIP_DATA(at24_data_24c04, 4096 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs04, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+/* 24rf08 quirk is handled at i2c-core */
+AT24_CHIP_DATA(at24_data_24c08, 8192 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs08, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c16, 16384 / 8, 0);
+AT24_CHIP_DATA(at24_data_24cs16, 16,
+	AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c32, 32768 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24cs32, 16,
+	AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c64, 65536 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24cs64, 16,
+	AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY);
+AT24_CHIP_DATA(at24_data_24c128, 131072 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c256, 262144 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c512, 524288 / 8, AT24_FLAG_ADDR16);
+AT24_CHIP_DATA(at24_data_24c1024, 1048576 / 8, AT24_FLAG_ADDR16);
+/* identical to 24c08 ? */
+AT24_CHIP_DATA(at24_data_INT3499, 8192 / 8, 0);
+
 static const struct i2c_device_id at24_ids[] = {
-	/* needs 8 addresses as A0-A2 are ignored */
-	{ "24c00",	AT24_DEVICE_MAGIC(128 / 8,	AT24_FLAG_TAKE8ADDR) },
-	/* old variants can't be handled with this generic entry! */
-	{ "24c01",	AT24_DEVICE_MAGIC(1024 / 8,	0) },
-	{ "24cs01",	AT24_DEVICE_MAGIC(16,
-				AT24_FLAG_SERIAL | AT24_FLAG_READONLY) },
-	{ "24c02",	AT24_DEVICE_MAGIC(2048 / 8,	0) },
-	{ "24cs02",	AT24_DEVICE_MAGIC(16,
-				AT24_FLAG_SERIAL | AT24_FLAG_READONLY) },
-	{ "24mac402",	AT24_DEVICE_MAGIC(48 / 8,
-				AT24_FLAG_MAC | AT24_FLAG_READONLY) },
-	{ "24mac602",	AT24_DEVICE_MAGIC(64 / 8,
-				AT24_FLAG_MAC | AT24_FLAG_READONLY) },
-	/* spd is a 24c02 in memory DIMMs */
-	{ "spd",	AT24_DEVICE_MAGIC(2048 / 8,
-				AT24_FLAG_READONLY | AT24_FLAG_IRUGO) },
-	{ "24c04",	AT24_DEVICE_MAGIC(4096 / 8,	0) },
-	{ "24cs04",	AT24_DEVICE_MAGIC(16,
-				AT24_FLAG_SERIAL | AT24_FLAG_READONLY) },
-	/* 24rf08 quirk is handled at i2c-core */
-	{ "24c08",	AT24_DEVICE_MAGIC(8192 / 8,	0) },
-	{ "24cs08",	AT24_DEVICE_MAGIC(16,
-				AT24_FLAG_SERIAL | AT24_FLAG_READONLY) },
-	{ "24c16",	AT24_DEVICE_MAGIC(16384 / 8,	0) },
-	{ "24cs16",	AT24_DEVICE_MAGIC(16,
-				AT24_FLAG_SERIAL | AT24_FLAG_READONLY) },
-	{ "24c32",	AT24_DEVICE_MAGIC(32768 / 8,	AT24_FLAG_ADDR16) },
-	{ "24cs32",	AT24_DEVICE_MAGIC(16,
-				AT24_FLAG_ADDR16 |
-				AT24_FLAG_SERIAL |
-				AT24_FLAG_READONLY) },
-	{ "24c64",	AT24_DEVICE_MAGIC(65536 / 8,	AT24_FLAG_ADDR16) },
-	{ "24cs64",	AT24_DEVICE_MAGIC(16,
-				AT24_FLAG_ADDR16 |
-				AT24_FLAG_SERIAL |
-				AT24_FLAG_READONLY) },
-	{ "24c128",	AT24_DEVICE_MAGIC(131072 / 8,	AT24_FLAG_ADDR16) },
-	{ "24c256",	AT24_DEVICE_MAGIC(262144 / 8,	AT24_FLAG_ADDR16) },
-	{ "24c512",	AT24_DEVICE_MAGIC(524288 / 8,	AT24_FLAG_ADDR16) },
-	{ "24c1024",	AT24_DEVICE_MAGIC(1048576 / 8,	AT24_FLAG_ADDR16) },
-	{ "at24", 0 },
+	{ "24c00",	(kernel_ulong_t)&at24_data_24c00 },
+	{ "24c01",	(kernel_ulong_t)&at24_data_24c01 },
+	{ "24cs01",	(kernel_ulong_t)&at24_data_24cs01 },
+	{ "24c02",	(kernel_ulong_t)&at24_data_24c02 },
+	{ "24cs02",	(kernel_ulong_t)&at24_data_24cs02 },
+	{ "24mac402",	(kernel_ulong_t)&at24_data_24mac402 },
+	{ "24mac602",	(kernel_ulong_t)&at24_data_24mac602 },
+	{ "spd",	(kernel_ulong_t)&at24_data_spd },
+	{ "24c04",	(kernel_ulong_t)&at24_data_24c04 },
+	{ "24cs04",	(kernel_ulong_t)&at24_data_24cs04 },
+	{ "24c08",	(kernel_ulong_t)&at24_data_24c08 },
+	{ "24cs08",	(kernel_ulong_t)&at24_data_24cs08 },
+	{ "24c16",	(kernel_ulong_t)&at24_data_24c16 },
+	{ "24cs16",	(kernel_ulong_t)&at24_data_24cs16 },
+	{ "24c32",	(kernel_ulong_t)&at24_data_24c32 },
+	{ "24cs32",	(kernel_ulong_t)&at24_data_24cs32 },
+	{ "24c64",	(kernel_ulong_t)&at24_data_24c64 },
+	{ "24cs64",	(kernel_ulong_t)&at24_data_24cs64 },
+	{ "24c128",	(kernel_ulong_t)&at24_data_24c128 },
+	{ "24c256",	(kernel_ulong_t)&at24_data_24c256 },
+	{ "24c512",	(kernel_ulong_t)&at24_data_24c512 },
+	{ "24c1024",	(kernel_ulong_t)&at24_data_24c1024 },
+	{ "at24",	0 },
 	{ /* END OF LIST */ }
 };
 MODULE_DEVICE_TABLE(i2c, at24_ids);
 
 static const struct of_device_id at24_of_match[] = {
-	{
-		.compatible = "atmel,24c00",
-		.data = (void *)AT24_DEVICE_MAGIC(128 / 8, AT24_FLAG_TAKE8ADDR)
-	},
-	{
-		.compatible = "atmel,24c01",
-		.data = (void *)AT24_DEVICE_MAGIC(1024 / 8, 0)
-	},
-	{
-		.compatible = "atmel,24c02",
-		.data = (void *)AT24_DEVICE_MAGIC(2048 / 8, 0)
-	},
-	{
-		.compatible = "atmel,spd",
-		.data = (void *)AT24_DEVICE_MAGIC(2048 / 8,
-				AT24_FLAG_READONLY | AT24_FLAG_IRUGO)
-	},
-	{
-		.compatible = "atmel,24c04",
-		.data = (void *)AT24_DEVICE_MAGIC(4096 / 8, 0)
-	},
-	{
-		.compatible = "atmel,24c08",
-		.data = (void *)AT24_DEVICE_MAGIC(8192 / 8, 0)
-	},
-	{
-		.compatible = "atmel,24c16",
-		.data = (void *)AT24_DEVICE_MAGIC(16384 / 8, 0)
-	},
-	{
-		.compatible = "atmel,24c32",
-		.data = (void *)AT24_DEVICE_MAGIC(32768 / 8, AT24_FLAG_ADDR16)
-	},
-	{
-		.compatible = "atmel,24c64",
-		.data = (void *)AT24_DEVICE_MAGIC(65536 / 8, AT24_FLAG_ADDR16)
-	},
-	{
-		.compatible = "atmel,24c128",
-		.data = (void *)AT24_DEVICE_MAGIC(131072 / 8, AT24_FLAG_ADDR16)
-	},
-	{
-		.compatible = "atmel,24c256",
-		.data = (void *)AT24_DEVICE_MAGIC(262144 / 8, AT24_FLAG_ADDR16)
-	},
-	{
-		.compatible = "atmel,24c512",
-		.data = (void *)AT24_DEVICE_MAGIC(524288 / 8, AT24_FLAG_ADDR16)
-	},
-	{
-		.compatible = "atmel,24c1024",
-		.data = (void *)AT24_DEVICE_MAGIC(1048576 / 8, AT24_FLAG_ADDR16)
-	},
-	{ },
+	{ .compatible = "atmel,24c00",		.data = &at24_data_24c00 },
+	{ .compatible = "atmel,24c01",		.data = &at24_data_24c01 },
+	{ .compatible = "atmel,24cs01",		.data = &at24_data_24cs01 },
+	{ .compatible = "atmel,24c02",		.data = &at24_data_24c02 },
+	{ .compatible = "atmel,24cs02",		.data = &at24_data_24cs02 },
+	{ .compatible = "atmel,24mac402",	.data = &at24_data_24mac402 },
+	{ .compatible = "atmel,24mac602",	.data = &at24_data_24mac602 },
+	{ .compatible = "atmel,spd",		.data = &at24_data_spd },
+	{ .compatible = "atmel,24c04",		.data = &at24_data_24c04 },
+	{ .compatible = "atmel,24cs04",		.data = &at24_data_24cs04 },
+	{ .compatible = "atmel,24c08",		.data = &at24_data_24c08 },
+	{ .compatible = "atmel,24cs08",		.data = &at24_data_24cs08 },
+	{ .compatible = "atmel,24c16",		.data = &at24_data_24c16 },
+	{ .compatible = "atmel,24cs16",		.data = &at24_data_24cs16 },
+	{ .compatible = "atmel,24c32",		.data = &at24_data_24c32 },
+	{ .compatible = "atmel,24cs32",		.data = &at24_data_24cs32 },
+	{ .compatible = "atmel,24c64",		.data = &at24_data_24c64 },
+	{ .compatible = "atmel,24cs64",		.data = &at24_data_24cs64 },
+	{ .compatible = "atmel,24c128",		.data = &at24_data_24c128 },
+	{ .compatible = "atmel,24c256",		.data = &at24_data_24c256 },
+	{ .compatible = "atmel,24c512",		.data = &at24_data_24c512 },
+	{ .compatible = "atmel,24c1024",	.data = &at24_data_24c1024 },
+	{ /* END OF LIST */ },
 };
 MODULE_DEVICE_TABLE(of, at24_of_match);
 
 static const struct acpi_device_id at24_acpi_ids[] = {
-	{ "INT3499", AT24_DEVICE_MAGIC(8192 / 8, 0) },
-	{ }
+	{ "INT3499",	(kernel_ulong_t)&at24_data_INT3499 },
+	{ /* END OF LIST */ }
 };
 MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
 
@@ -251,20 +249,11 @@ MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
  * Slave address and byte offset derive from the offset. Always
  * set the byte address; on a multi-master board, another master
  * may have changed the chip's "current" address pointer.
- *
- * REVISIT some multi-address chips don't rollover page reads to
- * the next slave address, so we may need to truncate the count.
- * Those chips might need another quirk flag.
- *
- * If the real hardware used four adjacent 24c02 chips and that
- * were misconfigured as one 24c08, that would be a similar effect:
- * one "eeprom" file not four, but larger reads would fail when
- * they crossed certain pages.
  */
-static struct i2c_client *at24_translate_offset(struct at24_data *at24,
-						unsigned int *offset)
+static struct at24_client *at24_translate_offset(struct at24_data *at24,
+						 unsigned int *offset)
 {
-	unsigned i;
+	unsigned int i;
 
 	if (at24->chip.flags & AT24_FLAG_ADDR16) {
 		i = *offset >> 16;
@@ -274,168 +263,55 @@ static struct i2c_client *at24_translate_offset(struct at24_data *at24,
 		*offset &= 0xff;
 	}
 
-	return at24->client[i];
+	return &at24->client[i];
 }
 
-static ssize_t at24_eeprom_read_smbus(struct at24_data *at24, char *buf,
+static size_t at24_adjust_read_count(struct at24_data *at24,
 				      unsigned int offset, size_t count)
 {
-	unsigned long timeout, read_time;
-	struct i2c_client *client;
-	int status;
-
-	client = at24_translate_offset(at24, &offset);
-
-	if (count > io_limit)
-		count = io_limit;
-
-	/* Smaller eeproms can work given some SMBus extension calls */
-	if (count > I2C_SMBUS_BLOCK_MAX)
-		count = I2C_SMBUS_BLOCK_MAX;
-
-	loop_until_timeout(timeout, read_time) {
-		status = i2c_smbus_read_i2c_block_data_or_emulated(client,
-								   offset,
-								   count, buf);
-
-		dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
-				count, offset, status, jiffies);
-
-		if (status == count)
-			return count;
-	}
-
-	return -ETIMEDOUT;
-}
-
-static ssize_t at24_eeprom_read_i2c(struct at24_data *at24, char *buf,
-				    unsigned int offset, size_t count)
-{
-	unsigned long timeout, read_time;
-	struct i2c_client *client;
-	struct i2c_msg msg[2];
-	int status, i;
-	u8 msgbuf[2];
-
-	memset(msg, 0, sizeof(msg));
-	client = at24_translate_offset(at24, &offset);
-
-	if (count > io_limit)
-		count = io_limit;
+	unsigned int bits;
+	size_t remainder;
 
 	/*
-	 * When we have a better choice than SMBus calls, use a combined I2C
-	 * message. Write address; then read up to io_limit data bytes. Note
-	 * that read page rollover helps us here (unlike writes). msgbuf is
-	 * u8 and will cast to our needs.
+	 * In case of multi-address chips that don't rollover reads to
+	 * the next slave address: truncate the count to the slave boundary,
+	 * so that the read never straddles slaves.
 	 */
-	i = 0;
-	if (at24->chip.flags & AT24_FLAG_ADDR16)
-		msgbuf[i++] = offset >> 8;
-	msgbuf[i++] = offset;
-
-	msg[0].addr = client->addr;
-	msg[0].buf = msgbuf;
-	msg[0].len = i;
-
-	msg[1].addr = client->addr;
-	msg[1].flags = I2C_M_RD;
-	msg[1].buf = buf;
-	msg[1].len = count;
-
-	loop_until_timeout(timeout, read_time) {
-		status = i2c_transfer(client->adapter, msg, 2);
-		if (status == 2)
-			status = count;
-
-		dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
-				count, offset, status, jiffies);
-
-		if (status == count)
-			return count;
+	if (at24->chip.flags & AT24_FLAG_NO_RDROL) {
+		bits = (at24->chip.flags & AT24_FLAG_ADDR16) ? 16 : 8;
+		remainder = BIT(bits) - offset;
+		if (count > remainder)
+			count = remainder;
 	}
 
-	return -ETIMEDOUT;
+	if (count > at24_io_limit)
+		count = at24_io_limit;
+
+	return count;
 }
 
-static ssize_t at24_eeprom_read_serial(struct at24_data *at24, char *buf,
-				       unsigned int offset, size_t count)
+static ssize_t at24_regmap_read(struct at24_data *at24, char *buf,
+				unsigned int offset, size_t count)
 {
 	unsigned long timeout, read_time;
+	struct at24_client *at24_client;
 	struct i2c_client *client;
-	struct i2c_msg msg[2];
-	u8 addrbuf[2];
-	int status;
-
-	client = at24_translate_offset(at24, &offset);
-
-	memset(msg, 0, sizeof(msg));
-	msg[0].addr = client->addr;
-	msg[0].buf = addrbuf;
-
-	/*
-	 * The address pointer of the device is shared between the regular
-	 * EEPROM array and the serial number block. The dummy write (part of
-	 * the sequential read protocol) ensures the address pointer is reset
-	 * to the desired position.
-	 */
-	if (at24->chip.flags & AT24_FLAG_ADDR16) {
-		/*
-		 * For 16 bit address pointers, the word address must contain
-		 * a '10' sequence in bits 11 and 10 regardless of the
-		 * intended position of the address pointer.
-		 */
-		addrbuf[0] = 0x08;
-		addrbuf[1] = offset;
-		msg[0].len = 2;
-	} else {
-		/*
-		 * Otherwise the word address must begin with a '10' sequence,
-		 * regardless of the intended address.
-		 */
-		addrbuf[0] = 0x80 + offset;
-		msg[0].len = 1;
-	}
-
-	msg[1].addr = client->addr;
-	msg[1].flags = I2C_M_RD;
-	msg[1].buf = buf;
-	msg[1].len = count;
+	struct regmap *regmap;
+	int ret;
 
-	loop_until_timeout(timeout, read_time) {
-		status = i2c_transfer(client->adapter, msg, 2);
-		if (status == 2)
-			return count;
-	}
+	at24_client = at24_translate_offset(at24, &offset);
+	regmap = at24_client->regmap;
+	client = at24_client->client;
+	count = at24_adjust_read_count(at24, offset, count);
 
-	return -ETIMEDOUT;
-}
+	/* adjust offset for mac and serial read ops */
+	offset += at24->offset_adj;
 
-static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf,
-				    unsigned int offset, size_t count)
-{
-	unsigned long timeout, read_time;
-	struct i2c_client *client;
-	struct i2c_msg msg[2];
-	u8 addrbuf[2];
-	int status;
-
-	client = at24_translate_offset(at24, &offset);
-
-	memset(msg, 0, sizeof(msg));
-	msg[0].addr = client->addr;
-	msg[0].buf = addrbuf;
-	/* EUI-48 starts from 0x9a, EUI-64 from 0x98 */
-	addrbuf[0] = 0xa0 - at24->chip.byte_len + offset;
-	msg[0].len = 1;
-	msg[1].addr = client->addr;
-	msg[1].flags = I2C_M_RD;
-	msg[1].buf = buf;
-	msg[1].len = count;
-
-	loop_until_timeout(timeout, read_time) {
-		status = i2c_transfer(client->adapter, msg, 2);
-		if (status == 2)
+	at24_loop_until_timeout(timeout, read_time) {
+		ret = regmap_bulk_read(regmap, offset, buf, count);
+		dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n",
+			count, offset, ret, jiffies);
+		if (!ret)
 			return count;
 	}
 
@@ -454,7 +330,7 @@ static ssize_t at24_eeprom_read_mac(struct at24_data *at24, char *buf,
 static size_t at24_adjust_write_count(struct at24_data *at24,
 				      unsigned int offset, size_t count)
 {
-	unsigned next_page;
+	unsigned int next_page;
 
 	/* write_max is at most a page */
 	if (count > at24->write_max)
@@ -468,91 +344,25 @@ static size_t at24_adjust_write_count(struct at24_data *at24,
 	return count;
 }
 
-static ssize_t at24_eeprom_write_smbus_block(struct at24_data *at24,
-					     const char *buf,
-					     unsigned int offset, size_t count)
+static ssize_t at24_regmap_write(struct at24_data *at24, const char *buf,
+				 unsigned int offset, size_t count)
 {
 	unsigned long timeout, write_time;
+	struct at24_client *at24_client;
 	struct i2c_client *client;
-	ssize_t status = 0;
-
-	client = at24_translate_offset(at24, &offset);
-	count = at24_adjust_write_count(at24, offset, count);
-
-	loop_until_timeout(timeout, write_time) {
-		status = i2c_smbus_write_i2c_block_data(client,
-							offset, count, buf);
-		if (status == 0)
-			status = count;
-
-		dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n",
-				count, offset, status, jiffies);
-
-		if (status == count)
-			return count;
-	}
-
-	return -ETIMEDOUT;
-}
-
-static ssize_t at24_eeprom_write_smbus_byte(struct at24_data *at24,
-					    const char *buf,
-					    unsigned int offset, size_t count)
-{
-	unsigned long timeout, write_time;
-	struct i2c_client *client;
-	ssize_t status = 0;
-
-	client = at24_translate_offset(at24, &offset);
-
-	loop_until_timeout(timeout, write_time) {
-		status = i2c_smbus_write_byte_data(client, offset, buf[0]);
-		if (status == 0)
-			status = count;
-
-		dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n",
-				count, offset, status, jiffies);
-
-		if (status == count)
-			return count;
-	}
-
-	return -ETIMEDOUT;
-}
-
-static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf,
-				     unsigned int offset, size_t count)
-{
-	unsigned long timeout, write_time;
-	struct i2c_client *client;
-	struct i2c_msg msg;
-	ssize_t status = 0;
-	int i = 0;
+	struct regmap *regmap;
+	int ret;
 
-	client = at24_translate_offset(at24, &offset);
+	at24_client = at24_translate_offset(at24, &offset);
+	regmap = at24_client->regmap;
+	client = at24_client->client;
 	count = at24_adjust_write_count(at24, offset, count);
 
-	msg.addr = client->addr;
-	msg.flags = 0;
-
-	/* msg.buf is u8 and casts will mask the values */
-	msg.buf = at24->writebuf;
-	if (at24->chip.flags & AT24_FLAG_ADDR16)
-		msg.buf[i++] = offset >> 8;
-
-	msg.buf[i++] = offset;
-	memcpy(&msg.buf[i], buf, count);
-	msg.len = i + count;
-
-	loop_until_timeout(timeout, write_time) {
-		status = i2c_transfer(client->adapter, &msg, 1);
-		if (status == 1)
-			status = count;
-
-		dev_dbg(&client->dev, "write %zu@%d --> %zd (%ld)\n",
-				count, offset, status, jiffies);
-
-		if (status == count)
+	at24_loop_until_timeout(timeout, write_time) {
+		ret = regmap_bulk_write(regmap, offset, buf, count);
+		dev_dbg(&client->dev, "write %zu@%d --> %d (%ld)\n",
+			count, offset, ret, jiffies);
+		if (!ret)
 			return count;
 	}
 
@@ -562,7 +372,7 @@ static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf,
 static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 {
 	struct at24_data *at24 = priv;
-	struct device *dev = &at24->client[0]->dev;
+	struct device *dev = &at24->client[0].client->dev;
 	char *buf = val;
 	int ret;
 
@@ -587,7 +397,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 	while (count) {
 		int	status;
 
-		status = at24->read_func(at24, buf, off, count);
+		status = at24_regmap_read(at24, buf, off, count);
 		if (status < 0) {
 			mutex_unlock(&at24->lock);
 			pm_runtime_put(dev);
@@ -608,7 +418,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count)
 static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 {
 	struct at24_data *at24 = priv;
-	struct device *dev = &at24->client[0]->dev;
+	struct device *dev = &at24->client[0].client->dev;
 	char *buf = val;
 	int ret;
 
@@ -629,12 +439,14 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 	 * from this host, but not from other I2C masters.
 	 */
 	mutex_lock(&at24->lock);
+	gpiod_set_value_cansleep(at24->wp_gpio, 0);
 
 	while (count) {
 		int status;
 
-		status = at24->write_func(at24, buf, off, count);
+		status = at24_regmap_write(at24, buf, off, count);
 		if (status < 0) {
+			gpiod_set_value_cansleep(at24->wp_gpio, 1);
 			mutex_unlock(&at24->lock);
 			pm_runtime_put(dev);
 			return status;
@@ -644,6 +456,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 		count -= status;
 	}
 
+	gpiod_set_value_cansleep(at24->wp_gpio, 1);
 	mutex_unlock(&at24->lock);
 
 	pm_runtime_put(dev);
@@ -658,6 +471,8 @@ static void at24_get_pdata(struct device *dev, struct at24_platform_data *chip)
 
 	if (device_property_present(dev, "read-only"))
 		chip->flags |= AT24_FLAG_READONLY;
+	if (device_property_present(dev, "no-read-rollover"))
+		chip->flags |= AT24_FLAG_NO_RDROL;
 
 	err = device_property_read_u32(dev, "size", &val);
 	if (!err)
@@ -676,16 +491,38 @@ static void at24_get_pdata(struct device *dev, struct at24_platform_data *chip)
 	}
 }
 
+static unsigned int at24_get_offset_adj(u8 flags, unsigned int byte_len)
+{
+	if (flags & AT24_FLAG_MAC) {
+		/* EUI-48 starts from 0x9a, EUI-64 from 0x98 */
+		return 0xa0 - byte_len;
+	} else if (flags & AT24_FLAG_SERIAL && flags & AT24_FLAG_ADDR16) {
+		/*
+		 * For 16 bit address pointers, the word address must contain
+		 * a '10' sequence in bits 11 and 10 regardless of the
+		 * intended position of the address pointer.
+		 */
+		return 0x0800;
+	} else if (flags & AT24_FLAG_SERIAL) {
+		/*
+		 * Otherwise the word address must begin with a '10' sequence,
+		 * regardless of the intended address.
+		 */
+		return 0x0080;
+	} else {
+		return 0;
+	}
+}
+
 static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
-	struct at24_platform_data chip;
-	kernel_ulong_t magic = 0;
+	struct at24_platform_data chip = { 0 };
+	const struct at24_chip_data *cd = NULL;
 	bool writable;
-	int use_smbus = 0;
-	int use_smbus_write = 0;
 	struct at24_data *at24;
 	int err;
-	unsigned i, num_addresses;
+	unsigned int i, num_addresses;
+	struct regmap_config regmap_config = { };
 	u8 test_byte;
 
 	if (client->dev.platform_data) {
@@ -698,28 +535,22 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		 */
 		if (client->dev.of_node &&
 		    of_match_device(at24_of_match, &client->dev)) {
-			magic = (kernel_ulong_t)
-				of_device_get_match_data(&client->dev);
+			cd = of_device_get_match_data(&client->dev);
 		} else if (id) {
-			magic = id->driver_data;
+			cd = (void *)id->driver_data;
 		} else {
 			const struct acpi_device_id *aid;
 
 			aid = acpi_match_device(at24_acpi_ids, &client->dev);
 			if (aid)
-				magic = aid->driver_data;
+				cd = (void *)aid->driver_data;
 		}
-		if (!magic)
+		if (!cd)
 			return -ENODEV;
 
-		chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
-		magic >>= AT24_SIZE_BYTELEN;
-		chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
-
+		chip.byte_len = cd->byte_len;
+		chip.flags = cd->flags;
 		at24_get_pdata(&client->dev, &chip);
-
-		chip.setup = NULL;
-		chip.context = NULL;
 	}
 
 	if (!is_power_of_2(chip.byte_len))
@@ -733,43 +564,10 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		dev_warn(&client->dev,
 			"page_size looks suspicious (no power of 2)!\n");
 
-	/*
-	 * REVISIT: the size of the EUI-48 byte array is 6 in at24mac402, while
-	 * the call to ilog2() in AT24_DEVICE_MAGIC() rounds it down to 4.
-	 *
-	 * Eventually we'll get rid of the magic values altoghether in favor of
-	 * real structs, but for now just manually set the right size.
-	 */
-	if (chip.flags & AT24_FLAG_MAC && chip.byte_len == 4)
-		chip.byte_len = 6;
-
-	/* Use I2C operations unless we're stuck with SMBus extensions. */
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
-		if (chip.flags & AT24_FLAG_ADDR16)
-			return -EPFNOSUPPORT;
-
-		if (i2c_check_functionality(client->adapter,
-				I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
-			use_smbus = I2C_SMBUS_I2C_BLOCK_DATA;
-		} else if (i2c_check_functionality(client->adapter,
-				I2C_FUNC_SMBUS_READ_WORD_DATA)) {
-			use_smbus = I2C_SMBUS_WORD_DATA;
-		} else if (i2c_check_functionality(client->adapter,
-				I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
-			use_smbus = I2C_SMBUS_BYTE_DATA;
-		} else {
-			return -EPFNOSUPPORT;
-		}
-
-		if (i2c_check_functionality(client->adapter,
-				I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
-			use_smbus_write = I2C_SMBUS_I2C_BLOCK_DATA;
-		} else if (i2c_check_functionality(client->adapter,
-				I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) {
-			use_smbus_write = I2C_SMBUS_BYTE_DATA;
-			chip.page_size = 1;
-		}
-	}
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C) &&
+	    !i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_WRITE_I2C_BLOCK))
+		chip.page_size = 1;
 
 	if (chip.flags & AT24_FLAG_TAKE8ADDR)
 		num_addresses = 8;
@@ -777,16 +575,28 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		num_addresses =	DIV_ROUND_UP(chip.byte_len,
 			(chip.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
 
+	regmap_config.val_bits = 8;
+	regmap_config.reg_bits = (chip.flags & AT24_FLAG_ADDR16) ? 16 : 8;
+
 	at24 = devm_kzalloc(&client->dev, sizeof(struct at24_data) +
-		num_addresses * sizeof(struct i2c_client *), GFP_KERNEL);
+		num_addresses * sizeof(struct at24_client), GFP_KERNEL);
 	if (!at24)
 		return -ENOMEM;
 
 	mutex_init(&at24->lock);
-	at24->use_smbus = use_smbus;
-	at24->use_smbus_write = use_smbus_write;
 	at24->chip = chip;
 	at24->num_addresses = num_addresses;
+	at24->offset_adj = at24_get_offset_adj(chip.flags, chip.byte_len);
+
+	at24->wp_gpio = devm_gpiod_get_optional(&client->dev,
+						"wp", GPIOD_OUT_HIGH);
+	if (IS_ERR(at24->wp_gpio))
+		return PTR_ERR(at24->wp_gpio);
+
+	at24->client[0].client = client;
+	at24->client[0].regmap = devm_regmap_init_i2c(client, &regmap_config);
+	if (IS_ERR(at24->client[0].regmap))
+		return PTR_ERR(at24->client[0].regmap);
 
 	if ((chip.flags & AT24_FLAG_SERIAL) && (chip.flags & AT24_FLAG_MAC)) {
 		dev_err(&client->dev,
@@ -794,59 +604,32 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return -EINVAL;
 	}
 
-	if (chip.flags & AT24_FLAG_SERIAL) {
-		at24->read_func = at24_eeprom_read_serial;
-	} else if (chip.flags & AT24_FLAG_MAC) {
-		at24->read_func = at24_eeprom_read_mac;
-	} else {
-		at24->read_func = at24->use_smbus ? at24_eeprom_read_smbus
-						  : at24_eeprom_read_i2c;
-	}
-
-	if (at24->use_smbus) {
-		if (at24->use_smbus_write == I2C_SMBUS_I2C_BLOCK_DATA)
-			at24->write_func = at24_eeprom_write_smbus_block;
-		else
-			at24->write_func = at24_eeprom_write_smbus_byte;
-	} else {
-		at24->write_func = at24_eeprom_write_i2c;
-	}
-
 	writable = !(chip.flags & AT24_FLAG_READONLY);
 	if (writable) {
-		if (!use_smbus || use_smbus_write) {
-
-			unsigned write_max = chip.page_size;
-
-			if (write_max > io_limit)
-				write_max = io_limit;
-			if (use_smbus && write_max > I2C_SMBUS_BLOCK_MAX)
-				write_max = I2C_SMBUS_BLOCK_MAX;
-			at24->write_max = write_max;
-
-			/* buffer (data + address at the beginning) */
-			at24->writebuf = devm_kzalloc(&client->dev,
-				write_max + 2, GFP_KERNEL);
-			if (!at24->writebuf)
-				return -ENOMEM;
-		} else {
-			dev_warn(&client->dev,
-				"cannot write due to controller restrictions.");
-		}
+		at24->write_max = min_t(unsigned int,
+					chip.page_size, at24_io_limit);
+		if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C) &&
+		    at24->write_max > I2C_SMBUS_BLOCK_MAX)
+			at24->write_max = I2C_SMBUS_BLOCK_MAX;
 	}
 
-	at24->client[0] = client;
-
 	/* use dummy devices for multiple-address chips */
 	for (i = 1; i < num_addresses; i++) {
-		at24->client[i] = i2c_new_dummy(client->adapter,
-					client->addr + i);
-		if (!at24->client[i]) {
+		at24->client[i].client = i2c_new_dummy(client->adapter,
+						       client->addr + i);
+		if (!at24->client[i].client) {
 			dev_err(&client->dev, "address 0x%02x unavailable\n",
 					client->addr + i);
 			err = -EADDRINUSE;
 			goto err_clients;
 		}
+		at24->client[i].regmap = devm_regmap_init_i2c(
+						at24->client[i].client,
+						&regmap_config);
+		if (IS_ERR(at24->client[i].regmap)) {
+			err = PTR_ERR(at24->client[i].regmap);
+			goto err_clients;
+		}
 	}
 
 	i2c_set_clientdata(client, at24);
@@ -890,12 +673,6 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	dev_info(&client->dev, "%u byte %s EEPROM, %s, %u bytes/write\n",
 		chip.byte_len, client->name,
 		writable ? "writable" : "read-only", at24->write_max);
-	if (use_smbus == I2C_SMBUS_WORD_DATA ||
-	    use_smbus == I2C_SMBUS_BYTE_DATA) {
-		dev_notice(&client->dev, "Falling back to %s reads, "
-			   "performance will suffer\n", use_smbus ==
-			   I2C_SMBUS_WORD_DATA ? "word" : "byte");
-	}
 
 	/* export data to kernel code */
 	if (chip.setup)
@@ -905,8 +682,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 err_clients:
 	for (i = 1; i < num_addresses; i++)
-		if (at24->client[i])
-			i2c_unregister_device(at24->client[i]);
+		if (at24->client[i].client)
+			i2c_unregister_device(at24->client[i].client);
 
 	pm_runtime_disable(&client->dev);
 
@@ -923,7 +700,7 @@ static int at24_remove(struct i2c_client *client)
 	nvmem_unregister(at24->nvmem);
 
 	for (i = 1; i < at24->num_addresses; i++)
-		i2c_unregister_device(at24->client[i]);
+		i2c_unregister_device(at24->client[i].client);
 
 	pm_runtime_disable(&client->dev);
 	pm_runtime_set_suspended(&client->dev);
@@ -946,12 +723,12 @@ static struct i2c_driver at24_driver = {
 
 static int __init at24_init(void)
 {
-	if (!io_limit) {
-		pr_err("at24: io_limit must not be 0!\n");
+	if (!at24_io_limit) {
+		pr_err("at24: at24_io_limit must not be 0!\n");
 		return -EINVAL;
 	}
 
-	io_limit = rounddown_pow_of_two(io_limit);
+	at24_io_limit = rounddown_pow_of_two(at24_io_limit);
 	return i2c_add_driver(&at24_driver);
 }
 module_init(at24_init);
diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h
index 687a0dbbe199..9e513dcfd809 100644
--- a/drivers/misc/lkdtm.h
+++ b/drivers/misc/lkdtm.h
@@ -76,8 +76,8 @@ void __init lkdtm_usercopy_init(void);
 void __exit lkdtm_usercopy_exit(void);
 void lkdtm_USERCOPY_HEAP_SIZE_TO(void);
 void lkdtm_USERCOPY_HEAP_SIZE_FROM(void);
-void lkdtm_USERCOPY_HEAP_FLAG_TO(void);
-void lkdtm_USERCOPY_HEAP_FLAG_FROM(void);
+void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void);
+void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void);
 void lkdtm_USERCOPY_STACK_FRAME_TO(void);
 void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
 void lkdtm_USERCOPY_STACK_BEYOND(void);
diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c
index 4942da93d066..2154d1bfd18b 100644
--- a/drivers/misc/lkdtm_core.c
+++ b/drivers/misc/lkdtm_core.c
@@ -177,8 +177,8 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(ATOMIC_TIMING),
 	CRASHTYPE(USERCOPY_HEAP_SIZE_TO),
 	CRASHTYPE(USERCOPY_HEAP_SIZE_FROM),
-	CRASHTYPE(USERCOPY_HEAP_FLAG_TO),
-	CRASHTYPE(USERCOPY_HEAP_FLAG_FROM),
+	CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO),
+	CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM),
 	CRASHTYPE(USERCOPY_STACK_FRAME_TO),
 	CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
 	CRASHTYPE(USERCOPY_STACK_BEYOND),
diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c
index a64372cc148d..9725aed305bb 100644
--- a/drivers/misc/lkdtm_usercopy.c
+++ b/drivers/misc/lkdtm_usercopy.c
@@ -20,7 +20,7 @@
  */
 static volatile size_t unconst = 0;
 static volatile size_t cache_size = 1024;
-static struct kmem_cache *bad_cache;
+static struct kmem_cache *whitelist_cache;
 
 static const unsigned char test_text[] = "This is a test.\n";
 
@@ -115,10 +115,16 @@ free_user:
 	vm_munmap(user_addr, PAGE_SIZE);
 }
 
+/*
+ * This checks for whole-object size validation with hardened usercopy,
+ * with or without usercopy whitelisting.
+ */
 static void do_usercopy_heap_size(bool to_user)
 {
 	unsigned long user_addr;
 	unsigned char *one, *two;
+	void __user *test_user_addr;
+	void *test_kern_addr;
 	size_t size = unconst + 1024;
 
 	one = kmalloc(size, GFP_KERNEL);
@@ -139,27 +145,30 @@ static void do_usercopy_heap_size(bool to_user)
 	memset(one, 'A', size);
 	memset(two, 'B', size);
 
+	test_user_addr = (void __user *)(user_addr + 16);
+	test_kern_addr = one + 16;
+
 	if (to_user) {
 		pr_info("attempting good copy_to_user of correct size\n");
-		if (copy_to_user((void __user *)user_addr, one, size)) {
+		if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) {
 			pr_warn("copy_to_user failed unexpectedly?!\n");
 			goto free_user;
 		}
 
 		pr_info("attempting bad copy_to_user of too large size\n");
-		if (copy_to_user((void __user *)user_addr, one, 2 * size)) {
+		if (copy_to_user(test_user_addr, test_kern_addr, size)) {
 			pr_warn("copy_to_user failed, but lacked Oops\n");
 			goto free_user;
 		}
 	} else {
 		pr_info("attempting good copy_from_user of correct size\n");
-		if (copy_from_user(one, (void __user *)user_addr, size)) {
+		if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) {
 			pr_warn("copy_from_user failed unexpectedly?!\n");
 			goto free_user;
 		}
 
 		pr_info("attempting bad copy_from_user of too large size\n");
-		if (copy_from_user(one, (void __user *)user_addr, 2 * size)) {
+		if (copy_from_user(test_kern_addr, test_user_addr, size)) {
 			pr_warn("copy_from_user failed, but lacked Oops\n");
 			goto free_user;
 		}
@@ -172,77 +181,79 @@ free_kernel:
 	kfree(two);
 }
 
-static void do_usercopy_heap_flag(bool to_user)
+/*
+ * This checks for the specific whitelist window within an object. If this
+ * test passes, then do_usercopy_heap_size() tests will pass too.
+ */
+static void do_usercopy_heap_whitelist(bool to_user)
 {
-	unsigned long user_addr;
-	unsigned char *good_buf = NULL;
-	unsigned char *bad_buf = NULL;
+	unsigned long user_alloc;
+	unsigned char *buf = NULL;
+	unsigned char __user *user_addr;
+	size_t offset, size;
 
 	/* Make sure cache was prepared. */
-	if (!bad_cache) {
+	if (!whitelist_cache) {
 		pr_warn("Failed to allocate kernel cache\n");
 		return;
 	}
 
 	/*
-	 * Allocate one buffer from each cache (kmalloc will have the
-	 * SLAB_USERCOPY flag already, but "bad_cache" won't).
+	 * Allocate a buffer with a whitelisted window in the buffer.
 	 */
-	good_buf = kmalloc(cache_size, GFP_KERNEL);
-	bad_buf = kmem_cache_alloc(bad_cache, GFP_KERNEL);
-	if (!good_buf || !bad_buf) {
-		pr_warn("Failed to allocate buffers from caches\n");
+	buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL);
+	if (!buf) {
+		pr_warn("Failed to allocate buffer from whitelist cache\n");
 		goto free_alloc;
 	}
 
 	/* Allocate user memory we'll poke at. */
-	user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
+	user_alloc = vm_mmap(NULL, 0, PAGE_SIZE,
 			    PROT_READ | PROT_WRITE | PROT_EXEC,
 			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
-	if (user_addr >= TASK_SIZE) {
+	if (user_alloc >= TASK_SIZE) {
 		pr_warn("Failed to allocate user memory\n");
 		goto free_alloc;
 	}
+	user_addr = (void __user *)user_alloc;
 
-	memset(good_buf, 'A', cache_size);
-	memset(bad_buf, 'B', cache_size);
+	memset(buf, 'B', cache_size);
+
+	/* Whitelisted window in buffer, from kmem_cache_create_usercopy. */
+	offset = (cache_size / 4) + unconst;
+	size = (cache_size / 16) + unconst;
 
 	if (to_user) {
-		pr_info("attempting good copy_to_user with SLAB_USERCOPY\n");
-		if (copy_to_user((void __user *)user_addr, good_buf,
-				 cache_size)) {
+		pr_info("attempting good copy_to_user inside whitelist\n");
+		if (copy_to_user(user_addr, buf + offset, size)) {
 			pr_warn("copy_to_user failed unexpectedly?!\n");
 			goto free_user;
 		}
 
-		pr_info("attempting bad copy_to_user w/o SLAB_USERCOPY\n");
-		if (copy_to_user((void __user *)user_addr, bad_buf,
-				 cache_size)) {
+		pr_info("attempting bad copy_to_user outside whitelist\n");
+		if (copy_to_user(user_addr, buf + offset - 1, size)) {
 			pr_warn("copy_to_user failed, but lacked Oops\n");
 			goto free_user;
 		}
 	} else {
-		pr_info("attempting good copy_from_user with SLAB_USERCOPY\n");
-		if (copy_from_user(good_buf, (void __user *)user_addr,
-				   cache_size)) {
+		pr_info("attempting good copy_from_user inside whitelist\n");
+		if (copy_from_user(buf + offset, user_addr, size)) {
 			pr_warn("copy_from_user failed unexpectedly?!\n");
 			goto free_user;
 		}
 
-		pr_info("attempting bad copy_from_user w/o SLAB_USERCOPY\n");
-		if (copy_from_user(bad_buf, (void __user *)user_addr,
-				   cache_size)) {
+		pr_info("attempting bad copy_from_user outside whitelist\n");
+		if (copy_from_user(buf + offset - 1, user_addr, size)) {
 			pr_warn("copy_from_user failed, but lacked Oops\n");
 			goto free_user;
 		}
 	}
 
 free_user:
-	vm_munmap(user_addr, PAGE_SIZE);
+	vm_munmap(user_alloc, PAGE_SIZE);
 free_alloc:
-	if (bad_buf)
-		kmem_cache_free(bad_cache, bad_buf);
-	kfree(good_buf);
+	if (buf)
+		kmem_cache_free(whitelist_cache, buf);
 }
 
 /* Callable tests. */
@@ -256,14 +267,14 @@ void lkdtm_USERCOPY_HEAP_SIZE_FROM(void)
 	do_usercopy_heap_size(false);
 }
 
-void lkdtm_USERCOPY_HEAP_FLAG_TO(void)
+void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void)
 {
-	do_usercopy_heap_flag(true);
+	do_usercopy_heap_whitelist(true);
 }
 
-void lkdtm_USERCOPY_HEAP_FLAG_FROM(void)
+void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void)
 {
-	do_usercopy_heap_flag(false);
+	do_usercopy_heap_whitelist(false);
 }
 
 void lkdtm_USERCOPY_STACK_FRAME_TO(void)
@@ -314,11 +325,15 @@ free_user:
 void __init lkdtm_usercopy_init(void)
 {
 	/* Prepare cache that lacks SLAB_USERCOPY flag. */
-	bad_cache = kmem_cache_create("lkdtm-no-usercopy", cache_size, 0,
-				      0, NULL);
+	whitelist_cache =
+		kmem_cache_create_usercopy("lkdtm-usercopy", cache_size,
+					   0, 0,
+					   cache_size / 4,
+					   cache_size / 16,
+					   NULL);
 }
 
 void __exit lkdtm_usercopy_exit(void)
 {
-	kmem_cache_destroy(bad_cache);
+	kmem_cache_destroy(whitelist_cache);
 }
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
new file mode 100644
index 000000000000..4bbdb0d3c8ee
--- /dev/null
+++ b/drivers/misc/ocxl/Kconfig
@@ -0,0 +1,31 @@
+#
+# Open Coherent Accelerator (OCXL) compatible devices
+#
+
+config OCXL_BASE
+	bool
+	default n
+	select PPC_COPRO_BASE
+
+config OCXL
+	tristate "OpenCAPI coherent accelerator support"
+	depends on PPC_POWERNV && PCI && EEH
+	select OCXL_BASE
+	default m
+	help
+	  Select this option to enable the ocxl driver for Open
+	  Coherent Accelerator Processor Interface (OpenCAPI) devices.
+
+	  OpenCAPI allows FPGA and ASIC accelerators to be coherently
+	  attached to a CPU over an OpenCAPI link.
+
+	  The ocxl driver enables userspace programs to access these
+	  accelerators through devices in /dev/ocxl/.
+
+	  For more information, see http://opencapi.org.
+
+	  This is not to be confused with the support for IBM CAPI
+	  accelerators (CONFIG_CXL), which are PCI-based instead of a
+	  dedicated OpenCAPI link, and don't follow the same protocol.
+
+	  If unsure, say N.
diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile
new file mode 100644
index 000000000000..5229dcda8297
--- /dev/null
+++ b/drivers/misc/ocxl/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0+
+ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
+
+ocxl-y				+= main.o pci.o config.o file.o pasid.o
+ocxl-y				+= link.o context.o afu_irq.o sysfs.o trace.o
+obj-$(CONFIG_OCXL)		+= ocxl.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure:
+CFLAGS_trace.o := -I$(src)
+
+# ccflags-y += -DDEBUG
diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
new file mode 100644
index 000000000000..e70cfa24577f
--- /dev/null
+++ b/drivers/misc/ocxl/afu_irq.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/interrupt.h>
+#include <linux/eventfd.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+struct afu_irq {
+	int id;
+	int hw_irq;
+	unsigned int virq;
+	char *name;
+	u64 trigger_page;
+	struct eventfd_ctx *ev_ctx;
+};
+
+static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
+{
+	return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
+}
+
+static u64 irq_id_to_offset(struct ocxl_context *ctx, int id)
+{
+	return ctx->afu->irq_base_offset + (id << PAGE_SHIFT);
+}
+
+static irqreturn_t afu_irq_handler(int virq, void *data)
+{
+	struct afu_irq *irq = (struct afu_irq *) data;
+
+	trace_ocxl_afu_irq_receive(virq);
+	if (irq->ev_ctx)
+		eventfd_signal(irq->ev_ctx, 1);
+	return IRQ_HANDLED;
+}
+
+static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)
+{
+	int rc;
+
+	irq->virq = irq_create_mapping(NULL, irq->hw_irq);
+	if (!irq->virq) {
+		pr_err("irq_create_mapping failed\n");
+		return -ENOMEM;
+	}
+	pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq);
+
+	irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq);
+	if (!irq->name) {
+		irq_dispose_mapping(irq->virq);
+		return -ENOMEM;
+	}
+
+	rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq);
+	if (rc) {
+		kfree(irq->name);
+		irq->name = NULL;
+		irq_dispose_mapping(irq->virq);
+		pr_err("request_irq failed: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+
+static void release_afu_irq(struct afu_irq *irq)
+{
+	free_irq(irq->virq, irq);
+	irq_dispose_mapping(irq->virq);
+	kfree(irq->name);
+}
+
+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset)
+{
+	struct afu_irq *irq;
+	int rc;
+
+	irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL);
+	if (!irq)
+		return -ENOMEM;
+
+	/*
+	 * We limit the number of afu irqs per context and per link to
+	 * avoid a single process or user depleting the pool of IPIs
+	 */
+
+	mutex_lock(&ctx->irq_lock);
+
+	irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT,
+			GFP_KERNEL);
+	if (irq->id < 0) {
+		rc = -ENOSPC;
+		goto err_unlock;
+	}
+
+	rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq,
+				&irq->trigger_page);
+	if (rc)
+		goto err_idr;
+
+	rc = setup_afu_irq(ctx, irq);
+	if (rc)
+		goto err_alloc;
+
+	*irq_offset = irq_id_to_offset(ctx, irq->id);
+
+	trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq,
+				*irq_offset);
+	mutex_unlock(&ctx->irq_lock);
+	return 0;
+
+err_alloc:
+	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+err_idr:
+	idr_remove(&ctx->irq_idr, irq->id);
+err_unlock:
+	mutex_unlock(&ctx->irq_lock);
+	kfree(irq);
+	return rc;
+}
+
+static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx)
+{
+	trace_ocxl_afu_irq_free(ctx->pasid, irq->id);
+	if (ctx->mapping)
+		unmap_mapping_range(ctx->mapping,
+				irq_id_to_offset(ctx, irq->id),
+				1 << PAGE_SHIFT, 1);
+	release_afu_irq(irq);
+	if (irq->ev_ctx)
+		eventfd_ctx_put(irq->ev_ctx);
+	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
+	kfree(irq);
+}
+
+int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset)
+{
+	struct afu_irq *irq;
+	int id = irq_offset_to_id(ctx, irq_offset);
+
+	mutex_lock(&ctx->irq_lock);
+
+	irq = idr_find(&ctx->irq_idr, id);
+	if (!irq) {
+		mutex_unlock(&ctx->irq_lock);
+		return -EINVAL;
+	}
+	idr_remove(&ctx->irq_idr, irq->id);
+	afu_irq_free(irq, ctx);
+	mutex_unlock(&ctx->irq_lock);
+	return 0;
+}
+
+void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
+{
+	struct afu_irq *irq;
+	int id;
+
+	mutex_lock(&ctx->irq_lock);
+	idr_for_each_entry(&ctx->irq_idr, irq, id)
+		afu_irq_free(irq, ctx);
+	mutex_unlock(&ctx->irq_lock);
+}
+
+int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd)
+{
+	struct afu_irq *irq;
+	struct eventfd_ctx *ev_ctx;
+	int rc = 0, id = irq_offset_to_id(ctx, irq_offset);
+
+	mutex_lock(&ctx->irq_lock);
+	irq = idr_find(&ctx->irq_idr, id);
+	if (!irq) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	ev_ctx = eventfd_ctx_fdget(eventfd);
+	if (IS_ERR(ev_ctx)) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	irq->ev_ctx = ev_ctx;
+unlock:
+	mutex_unlock(&ctx->irq_lock);
+	return rc;
+}
+
+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset)
+{
+	struct afu_irq *irq;
+	int id = irq_offset_to_id(ctx, irq_offset);
+	u64 addr = 0;
+
+	mutex_lock(&ctx->irq_lock);
+	irq = idr_find(&ctx->irq_idr, id);
+	if (irq)
+		addr = irq->trigger_page;
+	mutex_unlock(&ctx->irq_lock);
+	return addr;
+}
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
new file mode 100644
index 000000000000..2e30de9c694a
--- /dev/null
+++ b/drivers/misc/ocxl/config.c
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/pci.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include <misc/ocxl-config.h>
+
+#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
+#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
+
+#define OCXL_DVSEC_AFU_IDX_MASK              GENMASK(5, 0)
+#define OCXL_DVSEC_ACTAG_MASK                GENMASK(11, 0)
+#define OCXL_DVSEC_PASID_MASK                GENMASK(19, 0)
+#define OCXL_DVSEC_PASID_LOG_MASK            GENMASK(4, 0)
+
+#define OCXL_DVSEC_TEMPL_VERSION         0x0
+#define OCXL_DVSEC_TEMPL_NAME            0x4
+#define OCXL_DVSEC_TEMPL_AFU_VERSION     0x1C
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL     0x20
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ  0x28
+#define OCXL_DVSEC_TEMPL_MMIO_PP         0x30
+#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ      0x38
+#define OCXL_DVSEC_TEMPL_MEM_SZ          0x3C
+#define OCXL_DVSEC_TEMPL_WWID            0x40
+
+#define OCXL_MAX_AFU_PER_FUNCTION 64
+#define OCXL_TEMPL_LEN            0x58
+#define OCXL_TEMPL_NAME_LEN       24
+#define OCXL_CFG_TIMEOUT     3
+
+static int find_dvsec(struct pci_dev *dev, int dvsec_id)
+{
+	int vsec = 0;
+	u16 vendor, id;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec,
+						    OCXL_EXT_CAP_ID_DVSEC))) {
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+				&vendor);
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+		if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+			return vsec;
+	}
+	return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+	int vsec = 0;
+	u16 vendor, id;
+	u8 idx;
+
+	while ((vsec = pci_find_next_ext_capability(dev, vsec,
+						    OCXL_EXT_CAP_ID_DVSEC))) {
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+				&vendor);
+		pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+
+		if (vendor == PCI_VENDOR_ID_IBM &&
+			id == OCXL_DVSEC_AFU_CTRL_ID) {
+			pci_read_config_byte(dev,
+					vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+					&idx);
+			if (idx == afu_idx)
+				return vsec;
+		}
+	}
+	return 0;
+}
+
+static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	u16 val;
+	int pos;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
+	if (!pos) {
+		/*
+		 * PASID capability is not mandatory, but there
+		 * shouldn't be any AFU
+		 */
+		dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
+		fn->max_pasid_log = -1;
+		goto out;
+	}
+	pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
+	fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
+
+out:
+	dev_dbg(&dev->dev, "PASID capability:\n");
+	dev_dbg(&dev->dev, "  Max PASID log = %d\n", fn->max_pasid_log);
+	return 0;
+}
+
+static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
+	if (!pos && PCI_FUNC(dev->devfn) == 0) {
+		dev_err(&dev->dev, "Can't find TL DVSEC\n");
+		return -ENODEV;
+	}
+	if (pos && PCI_FUNC(dev->devfn) != 0) {
+		dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
+		return -ENODEV;
+	}
+	fn->dvsec_tl_pos = pos;
+	return 0;
+}
+
+static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos, afu_present;
+	u32 val;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find function DVSEC\n");
+		return -ENODEV;
+	}
+	fn->dvsec_function_pos = pos;
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+	afu_present = EXTRACT_BIT(val, 31);
+	if (!afu_present) {
+		fn->max_afu_index = -1;
+		dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
+		goto out;
+	}
+	fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
+
+out:
+	dev_dbg(&dev->dev, "Function DVSEC:\n");
+	dev_dbg(&dev->dev, "  Max AFU index = %d\n", fn->max_afu_index);
+	return 0;
+}
+
+static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int pos;
+
+	if (fn->max_afu_index < 0) {
+		fn->dvsec_afu_info_pos = -1;
+		return 0;
+	}
+
+	pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
+		return -ENODEV;
+	}
+	fn->dvsec_afu_info_pos = pos;
+	return 0;
+}
+
+static int read_dvsec_vendor(struct pci_dev *dev)
+{
+	int pos;
+	u32 cfg, tlx, dlx;
+
+	/*
+	 * vendor specific DVSEC is optional
+	 *
+	 * It's currently only used on function 0 to specify the
+	 * version of some logic blocks. Some older images may not
+	 * even have it so we ignore any errors
+	 */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return 0;
+
+	pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+	if (!pos)
+		return 0;
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
+
+	dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
+	dev_dbg(&dev->dev, "  CFG version = 0x%x\n", cfg);
+	dev_dbg(&dev->dev, "  TLX version = 0x%x\n", tlx);
+	dev_dbg(&dev->dev, "  DLX version = 0x%x\n", dlx);
+	return 0;
+}
+
+static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
+		dev_err(&dev->dev,
+			"AFUs are defined but no PASIDs are requested\n");
+		return -EINVAL;
+	}
+
+	if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
+		dev_err(&dev->dev,
+			"Max AFU index out of architectural limit (%d vs %d)\n",
+			fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+	int rc;
+
+	rc = read_pasid(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_tl(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid Transaction Layer DVSEC configuration: %d\n",
+			rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_function(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid Function DVSEC configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_afu_info(dev, fn);
+	if (rc) {
+		dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
+		return -ENODEV;
+	}
+
+	rc = read_dvsec_vendor(dev);
+	if (rc) {
+		dev_err(&dev->dev,
+			"Invalid vendor specific DVSEC configuration: %d\n",
+			rc);
+		return -ENODEV;
+	}
+
+	rc = validate_function(dev, fn);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_function);
+
+static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			int offset, u32 *data)
+{
+	u32 val;
+	unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+	int pos = fn->dvsec_afu_info_pos;
+
+	/* Protect 'data valid' bit */
+	if (EXTRACT_BIT(offset, 31)) {
+		dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
+		return -EINVAL;
+	}
+
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+	while (!EXTRACT_BIT(val, 31)) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_err(&dev->dev,
+				"Timeout while reading AFU info DVSEC (offset=%d)\n",
+				offset);
+			return -EBUSY;
+		}
+		cpu_relax();
+		pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+	}
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
+	return 0;
+}
+
+int ocxl_config_check_afu_index(struct pci_dev *dev,
+				struct ocxl_fn_config *fn, int afu_idx)
+{
+	u32 val;
+	int rc, templ_major, templ_minor, len;
+
+	pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
+	if (rc)
+		return rc;
+
+	/* AFU index map can have holes */
+	if (!val)
+		return 0;
+
+	templ_major = EXTRACT_BITS(val, 8, 15);
+	templ_minor = EXTRACT_BITS(val, 0, 7);
+	dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
+		templ_major, templ_minor);
+
+	len = EXTRACT_BITS(val, 16, 31);
+	if (len != OCXL_TEMPL_LEN) {
+		dev_warn(&dev->dev,
+			"Unexpected template length in AFU information (%#x)\n",
+			len);
+	}
+	return 1;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index);
+
+static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu)
+{
+	int i, rc;
+	u32 val, *ptr;
+
+	BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
+	for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
+		rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
+		if (rc)
+			return rc;
+		ptr = (u32 *) &afu->name[i];
+		*ptr = val;
+	}
+	afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
+	return 0;
+}
+
+static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu)
+{
+	int rc;
+	u32 val;
+
+	/*
+	 * Global MMIO
+	 */
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
+	afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_offset += (u64) val << 32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
+	if (rc)
+		return rc;
+	afu->global_mmio_size = val;
+
+	/*
+	 * Per-process MMIO
+	 */
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
+	afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_offset += (u64) val << 32;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
+	if (rc)
+		return rc;
+	afu->pp_mmio_stride = val;
+
+	return 0;
+}
+
+static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+	int pos;
+	u8 val8;
+	u16 val16;
+
+	pos = find_dvsec_afu_ctrl(dev, afu->idx);
+	if (!pos) {
+		dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
+			afu->idx);
+		return -ENODEV;
+	}
+	afu->dvsec_afu_control_pos = pos;
+
+	pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
+	afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
+
+	pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
+	afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
+	return 0;
+}
+
+static bool char_allowed(int c)
+{
+	/*
+	 * Permitted Characters : Alphanumeric, hyphen, underscore, comma
+	 */
+	if ((c >= 0x30 && c <= 0x39) /* digits */ ||
+		(c >= 0x41 && c <= 0x5A) /* upper case */ ||
+		(c >= 0x61 && c <= 0x7A) /* lower case */ ||
+		c == 0 /* NULL */ ||
+		c == 0x2D /* - */ ||
+		c == 0x5F /* _ */ ||
+		c == 0x2C /* , */)
+		return true;
+	return false;
+}
+
+static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+	int i;
+
+	if (!afu->name[0]) {
+		dev_err(&dev->dev, "Empty AFU name\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
+		if (!char_allowed(afu->name[i])) {
+			dev_err(&dev->dev,
+				"Invalid character in AFU name\n");
+			return -EINVAL;
+		}
+	}
+
+	if (afu->global_mmio_bar != 0 &&
+		afu->global_mmio_bar != 2 &&
+		afu->global_mmio_bar != 4) {
+		dev_err(&dev->dev, "Invalid global MMIO bar number\n");
+		return -EINVAL;
+	}
+	if (afu->pp_mmio_bar != 0 &&
+		afu->pp_mmio_bar != 2 &&
+		afu->pp_mmio_bar != 4) {
+		dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
+			struct ocxl_afu_config *afu, u8 afu_idx)
+{
+	int rc;
+	u32 val32;
+
+	/*
+	 * First, we need to write the AFU idx for the AFU we want to
+	 * access.
+	 */
+	WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
+	afu->idx = afu_idx;
+	pci_write_config_byte(dev,
+			fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+			afu->idx);
+
+	rc = read_afu_name(dev, fn, afu);
+	if (rc)
+		return rc;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
+	if (rc)
+		return rc;
+	afu->version_major = EXTRACT_BITS(val32, 24, 31);
+	afu->version_minor = EXTRACT_BITS(val32, 16, 23);
+	afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
+	afu->afum_type = EXTRACT_BITS(val32, 12, 13);
+	afu->profile = EXTRACT_BITS(val32, 0, 7);
+
+	rc = read_afu_mmio(dev, fn, afu);
+	if (rc)
+		return rc;
+
+	rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32);
+	if (rc)
+		return rc;
+	afu->log_mem_size = EXTRACT_BITS(val32, 0, 7);
+
+	rc = read_afu_control(dev, afu);
+	if (rc)
+		return rc;
+
+	dev_dbg(&dev->dev, "AFU configuration:\n");
+	dev_dbg(&dev->dev, "  name = %s\n", afu->name);
+	dev_dbg(&dev->dev, "  version = %d.%d\n", afu->version_major,
+		afu->version_minor);
+	dev_dbg(&dev->dev, "  global mmio bar = %hhu\n", afu->global_mmio_bar);
+	dev_dbg(&dev->dev, "  global mmio offset = %#llx\n",
+		afu->global_mmio_offset);
+	dev_dbg(&dev->dev, "  global mmio size = %#x\n", afu->global_mmio_size);
+	dev_dbg(&dev->dev, "  pp mmio bar = %hhu\n", afu->pp_mmio_bar);
+	dev_dbg(&dev->dev, "  pp mmio offset = %#llx\n", afu->pp_mmio_offset);
+	dev_dbg(&dev->dev, "  pp mmio stride = %#x\n", afu->pp_mmio_stride);
+	dev_dbg(&dev->dev, "  mem size (log) = %hhu\n", afu->log_mem_size);
+	dev_dbg(&dev->dev, "  pasid supported (log) = %u\n",
+		afu->pasid_supported_log);
+	dev_dbg(&dev->dev, "  actag supported = %u\n",
+		afu->actag_supported);
+
+	rc = validate_afu(dev, afu);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_read_afu);
+
+int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
+			u16 *supported)
+{
+	int rc;
+
+	/*
+	 * This is really a simple wrapper for the kernel API, to
+	 * avoid an external driver using ocxl as a library to call
+	 * platform-dependent code
+	 */
+	rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
+	if (rc) {
+		dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info);
+
+void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
+			int actag_count)
+{
+	u16 val;
+
+	val = actag_count & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
+
+	val = actag_base & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag);
+
+int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
+{
+	return pnv_ocxl_get_pasid_count(dev, count);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info);
+
+void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
+			u32 pasid_count_log)
+{
+	u8 val8;
+	u32 val32;
+
+	val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
+
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+			&val32);
+	val32 &= ~OCXL_DVSEC_PASID_MASK;
+	val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
+	pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+			val32);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid);
+
+void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
+{
+	u8 val;
+
+	pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
+	if (enable)
+		val |= 1;
+	else
+		val &= 0xFE;
+	pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);
+
+int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
+{
+	u32 val;
+	__be32 *be32ptr;
+	u8 timers;
+	int i, rc;
+	long recv_cap;
+	char *recv_rate;
+
+	/*
+	 * Skip on function != 0, as the TL can only be defined on 0
+	 */
+	if (PCI_FUNC(dev->devfn) != 0)
+		return 0;
+
+	recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
+	if (!recv_rate)
+		return -ENOMEM;
+	/*
+	 * The spec defines 64 templates for messages in the
+	 * Transaction Layer (TL).
+	 *
+	 * The host and device each support a subset, so we need to
+	 * configure the transmitters on each side to send only
+	 * templates the receiver understands, at a rate the receiver
+	 * can process.  Per the spec, template 0 must be supported by
+	 * everybody. That's the template which has been used by the
+	 * host and device so far.
+	 *
+	 * The sending rate limit must be set before the template is
+	 * enabled.
+	 */
+
+	/*
+	 * Device -> host
+	 */
+	rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
+				PNV_OCXL_TL_RATE_BUF_SIZE);
+	if (rc)
+		goto out;
+
+	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+		be32ptr = (__be32 *) &recv_rate[i];
+		pci_write_config_dword(dev,
+				tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
+				be32_to_cpu(*be32ptr));
+	}
+	val = recv_cap >> 32;
+	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
+	val = recv_cap & GENMASK(31, 0);
+	pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
+
+	/*
+	 * Host -> device
+	 */
+	for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+		pci_read_config_dword(dev,
+				tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
+				&val);
+		be32ptr = (__be32 *) &recv_rate[i];
+		*be32ptr = cpu_to_be32(val);
+	}
+	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
+	recv_cap = (long) val << 32;
+	pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
+	recv_cap |= val;
+
+	rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
+				PNV_OCXL_TL_RATE_BUF_SIZE);
+	if (rc)
+		goto out;
+
+	/*
+	 * Opencapi commands needing to be retried are classified per
+	 * the TL in 2 groups: short and long commands.
+	 *
+	 * The short back off timer it not used for now. It will be
+	 * for opencapi 4.0.
+	 *
+	 * The long back off timer is typically used when an AFU hits
+	 * a page fault but the NPU is already processing one. So the
+	 * AFU needs to wait before it can resubmit. Having a value
+	 * too low doesn't break anything, but can generate extra
+	 * traffic on the link.
+	 * We set it to 1.6 us for now. It's shorter than, but in the
+	 * same order of magnitude as the time spent to process a page
+	 * fault.
+	 */
+	timers = 0x2 << 4; /* long timer = 1.6 us */
+	pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
+			timers);
+
+	rc = 0;
+out:
+	kfree(recv_rate);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_TL);
+
+int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
+{
+	u32 val;
+	unsigned long timeout;
+
+	pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			&val);
+	if (EXTRACT_BIT(val, 20)) {
+		dev_err(&dev->dev,
+			"Can't terminate PASID %#x, previous termination didn't complete\n",
+			pasid);
+		return -EBUSY;
+	}
+
+	val &= ~OCXL_DVSEC_PASID_MASK;
+	val |= pasid & OCXL_DVSEC_PASID_MASK;
+	val |= BIT(20);
+	pci_write_config_dword(dev,
+			afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			val);
+
+	timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+	pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+			&val);
+	while (EXTRACT_BIT(val, 20)) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_err(&dev->dev,
+				"Timeout while waiting for AFU to terminate PASID %#x\n",
+				pasid);
+			return -EBUSY;
+		}
+		cpu_relax();
+		pci_read_config_dword(dev,
+				afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+				&val);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid);
+
+void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
+			u32 tag_count)
+{
+	u32 val;
+
+	val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
+	val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
+	pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
+			val);
+}
+EXPORT_SYMBOL_GPL(ocxl_config_set_actag);
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
new file mode 100644
index 000000000000..909e8807824a
--- /dev/null
+++ b/drivers/misc/ocxl/context.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include "trace.h"
+#include "ocxl_internal.h"
+
+struct ocxl_context *ocxl_context_alloc(void)
+{
+	return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
+}
+
+int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+		struct address_space *mapping)
+{
+	int pasid;
+
+	ctx->afu = afu;
+	mutex_lock(&afu->contexts_lock);
+	pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
+			afu->pasid_base + afu->pasid_max, GFP_KERNEL);
+	if (pasid < 0) {
+		mutex_unlock(&afu->contexts_lock);
+		return pasid;
+	}
+	afu->pasid_count++;
+	mutex_unlock(&afu->contexts_lock);
+
+	ctx->pasid = pasid;
+	ctx->status = OPENED;
+	mutex_init(&ctx->status_mutex);
+	ctx->mapping = mapping;
+	mutex_init(&ctx->mapping_lock);
+	init_waitqueue_head(&ctx->events_wq);
+	mutex_init(&ctx->xsl_error_lock);
+	mutex_init(&ctx->irq_lock);
+	idr_init(&ctx->irq_idr);
+	/*
+	 * Keep a reference on the AFU to make sure it's valid for the
+	 * duration of the life of the context
+	 */
+	ocxl_afu_get(afu);
+	return 0;
+}
+
+/*
+ * Callback for when a translation fault triggers an error
+ * data:	a pointer to the context which triggered the fault
+ * addr:	the address that triggered the error
+ * dsisr:	the value of the PPC64 dsisr register
+ */
+static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
+{
+	struct ocxl_context *ctx = (struct ocxl_context *) data;
+
+	mutex_lock(&ctx->xsl_error_lock);
+	ctx->xsl_error.addr = addr;
+	ctx->xsl_error.dsisr = dsisr;
+	ctx->xsl_error.count++;
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	wake_up_all(&ctx->events_wq);
+}
+
+int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
+{
+	int rc;
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != OPENED) {
+		rc = -EIO;
+		goto out;
+	}
+
+	rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
+			current->mm->context.id, 0, amr, current->mm,
+			xsl_fault_error, ctx);
+	if (rc)
+		goto out;
+
+	ctx->status = ATTACHED;
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+
+static int map_afu_irq(struct vm_area_struct *vma, unsigned long address,
+		u64 offset, struct ocxl_context *ctx)
+{
+	u64 trigger_addr;
+
+	trigger_addr = ocxl_afu_irq_get_addr(ctx, offset);
+	if (!trigger_addr)
+		return VM_FAULT_SIGBUS;
+
+	vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT);
+	return VM_FAULT_NOPAGE;
+}
+
+static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
+		u64 offset, struct ocxl_context *ctx)
+{
+	u64 pp_mmio_addr;
+	int pasid_off;
+
+	if (offset >= ctx->afu->config.pp_mmio_stride)
+		return VM_FAULT_SIGBUS;
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status != ATTACHED) {
+		mutex_unlock(&ctx->status_mutex);
+		pr_debug("%s: Context not attached, failing mmio mmap\n",
+			__func__);
+		return VM_FAULT_SIGBUS;
+	}
+
+	pasid_off = ctx->pasid - ctx->afu->pasid_base;
+	pp_mmio_addr = ctx->afu->pp_mmio_start +
+		pasid_off * ctx->afu->config.pp_mmio_stride +
+		offset;
+
+	vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
+	mutex_unlock(&ctx->status_mutex);
+	return VM_FAULT_NOPAGE;
+}
+
+static int ocxl_mmap_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ocxl_context *ctx = vma->vm_file->private_data;
+	u64 offset;
+	int rc;
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
+		ctx->pasid, vmf->address, offset);
+
+	if (offset < ctx->afu->irq_base_offset)
+		rc = map_pp_mmio(vma, vmf->address, offset, ctx);
+	else
+		rc = map_afu_irq(vma, vmf->address, offset, ctx);
+	return rc;
+}
+
+static const struct vm_operations_struct ocxl_vmops = {
+	.fault = ocxl_mmap_fault,
+};
+
+static int check_mmap_afu_irq(struct ocxl_context *ctx,
+			struct vm_area_struct *vma)
+{
+	/* only one page */
+	if (vma_pages(vma) != 1)
+		return -EINVAL;
+
+	/* check offset validty */
+	if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT))
+		return -EINVAL;
+
+	/*
+	 * trigger page should only be accessible in write mode.
+	 *
+	 * It's a bit theoretical, as a page mmaped with only
+	 * PROT_WRITE is currently readable, but it doesn't hurt.
+	 */
+	if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) ||
+		!(vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+	vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC);
+	return 0;
+}
+
+static int check_mmap_mmio(struct ocxl_context *ctx,
+			struct vm_area_struct *vma)
+{
+	if ((vma_pages(vma) + vma->vm_pgoff) >
+		(ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
+		return -EINVAL;
+	return 0;
+}
+
+int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
+{
+	int rc;
+
+	if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset)
+		rc = check_mmap_mmio(ctx, vma);
+	else
+		rc = check_mmap_afu_irq(ctx, vma);
+	if (rc)
+		return rc;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &ocxl_vmops;
+	return 0;
+}
+
+int ocxl_context_detach(struct ocxl_context *ctx)
+{
+	struct pci_dev *dev;
+	int afu_control_pos;
+	enum ocxl_context_status status;
+	int rc;
+
+	mutex_lock(&ctx->status_mutex);
+	status = ctx->status;
+	ctx->status = CLOSED;
+	mutex_unlock(&ctx->status_mutex);
+	if (status != ATTACHED)
+		return 0;
+
+	dev = to_pci_dev(ctx->afu->fn->dev.parent);
+	afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
+
+	mutex_lock(&ctx->afu->afu_control_lock);
+	rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
+	mutex_unlock(&ctx->afu->afu_control_lock);
+	trace_ocxl_terminate_pasid(ctx->pasid, rc);
+	if (rc) {
+		/*
+		 * If we timeout waiting for the AFU to terminate the
+		 * pasid, then it's dangerous to clean up the Process
+		 * Element entry in the SPA, as it may be referenced
+		 * in the future by the AFU. In which case, we would
+		 * checkstop because of an invalid PE access (FIR
+		 * register 2, bit 42). So leave the PE
+		 * defined. Caller shouldn't free the context so that
+		 * PASID remains allocated.
+		 *
+		 * A link reset will be required to cleanup the AFU
+		 * and the SPA.
+		 */
+		if (rc == -EBUSY)
+			return rc;
+	}
+	rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
+	if (rc) {
+		dev_warn(&ctx->afu->dev,
+			"Couldn't remove PE entry cleanly: %d\n", rc);
+	}
+	return 0;
+}
+
+void ocxl_context_detach_all(struct ocxl_afu *afu)
+{
+	struct ocxl_context *ctx;
+	int tmp;
+
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+		ocxl_context_detach(ctx);
+		/*
+		 * We are force detaching - remove any active mmio
+		 * mappings so userspace cannot interfere with the
+		 * card if it comes back.  Easiest way to exercise
+		 * this is to unbind and rebind the driver via sysfs
+		 * while it is in use.
+		 */
+		mutex_lock(&ctx->mapping_lock);
+		if (ctx->mapping)
+			unmap_mapping_range(ctx->mapping, 0, 0, 1);
+		mutex_unlock(&ctx->mapping_lock);
+	}
+	mutex_unlock(&afu->contexts_lock);
+}
+
+void ocxl_context_free(struct ocxl_context *ctx)
+{
+	mutex_lock(&ctx->afu->contexts_lock);
+	ctx->afu->pasid_count--;
+	idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
+	mutex_unlock(&ctx->afu->contexts_lock);
+
+	ocxl_afu_irq_free_all(ctx);
+	idr_destroy(&ctx->irq_idr);
+	/* reference to the AFU taken in ocxl_context_init */
+	ocxl_afu_put(ctx->afu);
+	kfree(ctx);
+}
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
new file mode 100644
index 000000000000..c90c1a578d2f
--- /dev/null
+++ b/drivers/misc/ocxl/file.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/ocxl.h>
+#include "ocxl_internal.h"
+
+
+#define OCXL_NUM_MINORS 256 /* Total to reserve */
+
+static dev_t ocxl_dev;
+static struct class *ocxl_class;
+static struct mutex minors_idr_lock;
+static struct idr minors_idr;
+
+static struct ocxl_afu *find_and_get_afu(dev_t devno)
+{
+	struct ocxl_afu *afu;
+	int afu_minor;
+
+	afu_minor = MINOR(devno);
+	/*
+	 * We don't declare an RCU critical section here, as our AFU
+	 * is protected by a reference counter on the device. By the time the
+	 * minor number of a device is removed from the idr, the ref count of
+	 * the device is already at 0, so no user API will access that AFU and
+	 * this function can't return it.
+	 */
+	afu = idr_find(&minors_idr, afu_minor);
+	if (afu)
+		ocxl_afu_get(afu);
+	return afu;
+}
+
+static int allocate_afu_minor(struct ocxl_afu *afu)
+{
+	int minor;
+
+	mutex_lock(&minors_idr_lock);
+	minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
+	mutex_unlock(&minors_idr_lock);
+	return minor;
+}
+
+static void free_afu_minor(struct ocxl_afu *afu)
+{
+	mutex_lock(&minors_idr_lock);
+	idr_remove(&minors_idr, MINOR(afu->dev.devt));
+	mutex_unlock(&minors_idr_lock);
+}
+
+static int afu_open(struct inode *inode, struct file *file)
+{
+	struct ocxl_afu *afu;
+	struct ocxl_context *ctx;
+	int rc;
+
+	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+
+	afu = find_and_get_afu(inode->i_rdev);
+	if (!afu)
+		return -ENODEV;
+
+	ctx = ocxl_context_alloc();
+	if (!ctx) {
+		rc = -ENOMEM;
+		goto put_afu;
+	}
+
+	rc = ocxl_context_init(ctx, afu, inode->i_mapping);
+	if (rc)
+		goto put_afu;
+	file->private_data = ctx;
+	ocxl_afu_put(afu);
+	return 0;
+
+put_afu:
+	ocxl_afu_put(afu);
+	return rc;
+}
+
+static long afu_ioctl_attach(struct ocxl_context *ctx,
+			struct ocxl_ioctl_attach __user *uarg)
+{
+	struct ocxl_ioctl_attach arg;
+	u64 amr = 0;
+	int rc;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	/* Make sure reserved fields are not set for forward compatibility */
+	if (arg.reserved1 || arg.reserved2 || arg.reserved3)
+		return -EINVAL;
+
+	amr = arg.amr & mfspr(SPRN_UAMOR);
+	rc = ocxl_context_attach(ctx, amr);
+	return rc;
+}
+
+#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :			\
+			x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :	\
+			x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :		\
+			x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :	\
+			"UNKNOWN")
+
+static long afu_ioctl(struct file *file, unsigned int cmd,
+		unsigned long args)
+{
+	struct ocxl_context *ctx = file->private_data;
+	struct ocxl_ioctl_irq_fd irq_fd;
+	u64 irq_offset;
+	long rc;
+
+	pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
+		CMD_STR(cmd));
+
+	if (ctx->status == CLOSED)
+		return -EIO;
+
+	switch (cmd) {
+	case OCXL_IOCTL_ATTACH:
+		rc = afu_ioctl_attach(ctx,
+				(struct ocxl_ioctl_attach __user *) args);
+		break;
+
+	case OCXL_IOCTL_IRQ_ALLOC:
+		rc = ocxl_afu_irq_alloc(ctx, &irq_offset);
+		if (!rc) {
+			rc = copy_to_user((u64 __user *) args, &irq_offset,
+					sizeof(irq_offset));
+			if (rc)
+				ocxl_afu_irq_free(ctx, irq_offset);
+		}
+		break;
+
+	case OCXL_IOCTL_IRQ_FREE:
+		rc = copy_from_user(&irq_offset, (u64 __user *) args,
+				sizeof(irq_offset));
+		if (rc)
+			return -EFAULT;
+		rc = ocxl_afu_irq_free(ctx, irq_offset);
+		break;
+
+	case OCXL_IOCTL_IRQ_SET_FD:
+		rc = copy_from_user(&irq_fd, (u64 __user *) args,
+				sizeof(irq_fd));
+		if (rc)
+			return -EFAULT;
+		if (irq_fd.reserved)
+			return -EINVAL;
+		rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset,
+					irq_fd.eventfd);
+		break;
+
+	default:
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
+static long afu_compat_ioctl(struct file *file, unsigned int cmd,
+			unsigned long args)
+{
+	return afu_ioctl(file, cmd, args);
+}
+
+static int afu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct ocxl_context *ctx = file->private_data;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+	return ocxl_context_mmap(ctx, vma);
+}
+
+static bool has_xsl_error(struct ocxl_context *ctx)
+{
+	bool ret;
+
+	mutex_lock(&ctx->xsl_error_lock);
+	ret = !!ctx->xsl_error.addr;
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	return ret;
+}
+
+/*
+ * Are there any events pending on the AFU
+ * ctx: The AFU context
+ * Returns: true if there are events pending
+ */
+static bool afu_events_pending(struct ocxl_context *ctx)
+{
+	if (has_xsl_error(ctx))
+		return true;
+	return false;
+}
+
+static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
+{
+	struct ocxl_context *ctx = file->private_data;
+	unsigned int mask = 0;
+	bool closed;
+
+	pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+	poll_wait(file, &ctx->events_wq, wait);
+
+	mutex_lock(&ctx->status_mutex);
+	closed = (ctx->status == CLOSED);
+	mutex_unlock(&ctx->status_mutex);
+
+	if (afu_events_pending(ctx))
+		mask = POLLIN | POLLRDNORM;
+	else if (closed)
+		mask = POLLERR;
+
+	return mask;
+}
+
+/*
+ * Populate the supplied buffer with a single XSL error
+ * ctx:	The AFU context to report the error from
+ * header: the event header to populate
+ * buf: The buffer to write the body into (should be at least
+ *      AFU_EVENT_BODY_XSL_ERROR_SIZE)
+ * Return: the amount of buffer that was populated
+ */
+static ssize_t append_xsl_error(struct ocxl_context *ctx,
+				struct ocxl_kernel_event_header *header,
+				char __user *buf)
+{
+	struct ocxl_kernel_event_xsl_fault_error body;
+
+	memset(&body, 0, sizeof(body));
+
+	mutex_lock(&ctx->xsl_error_lock);
+	if (!ctx->xsl_error.addr) {
+		mutex_unlock(&ctx->xsl_error_lock);
+		return 0;
+	}
+
+	body.addr = ctx->xsl_error.addr;
+	body.dsisr = ctx->xsl_error.dsisr;
+	body.count = ctx->xsl_error.count;
+
+	ctx->xsl_error.addr = 0;
+	ctx->xsl_error.dsisr = 0;
+	ctx->xsl_error.count = 0;
+
+	mutex_unlock(&ctx->xsl_error_lock);
+
+	header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR;
+
+	if (copy_to_user(buf, &body, sizeof(body)))
+		return -EFAULT;
+
+	return sizeof(body);
+}
+
+#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error)
+
+/*
+ * Reports events on the AFU
+ * Format:
+ *	Header (struct ocxl_kernel_event_header)
+ *	Body (struct ocxl_kernel_event_*)
+ *	Header...
+ */
+static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	struct ocxl_context *ctx = file->private_data;
+	struct ocxl_kernel_event_header header;
+	ssize_t rc;
+	size_t used = 0;
+	DEFINE_WAIT(event_wait);
+
+	memset(&header, 0, sizeof(header));
+
+	/* Require offset to be 0 */
+	if (*off != 0)
+		return -EINVAL;
+
+	if (count < (sizeof(struct ocxl_kernel_event_header) +
+			AFU_EVENT_BODY_MAX_SIZE))
+		return -EINVAL;
+
+	for (;;) {
+		prepare_to_wait(&ctx->events_wq, &event_wait,
+				TASK_INTERRUPTIBLE);
+
+		if (afu_events_pending(ctx))
+			break;
+
+		if (ctx->status == CLOSED)
+			break;
+
+		if (file->f_flags & O_NONBLOCK) {
+			finish_wait(&ctx->events_wq, &event_wait);
+			return -EAGAIN;
+		}
+
+		if (signal_pending(current)) {
+			finish_wait(&ctx->events_wq, &event_wait);
+			return -ERESTARTSYS;
+		}
+
+		schedule();
+	}
+
+	finish_wait(&ctx->events_wq, &event_wait);
+
+	if (has_xsl_error(ctx)) {
+		used = append_xsl_error(ctx, &header, buf + sizeof(header));
+		if (used < 0)
+			return used;
+	}
+
+	if (!afu_events_pending(ctx))
+		header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST;
+
+	if (copy_to_user(buf, &header, sizeof(header)))
+		return -EFAULT;
+
+	used += sizeof(header);
+
+	rc = (ssize_t) used;
+	return rc;
+}
+
+static int afu_release(struct inode *inode, struct file *file)
+{
+	struct ocxl_context *ctx = file->private_data;
+	int rc;
+
+	pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+	rc = ocxl_context_detach(ctx);
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	wake_up_all(&ctx->events_wq);
+	if (rc != -EBUSY)
+		ocxl_context_free(ctx);
+	return 0;
+}
+
+static const struct file_operations ocxl_afu_fops = {
+	.owner		= THIS_MODULE,
+	.open           = afu_open,
+	.unlocked_ioctl = afu_ioctl,
+	.compat_ioctl   = afu_compat_ioctl,
+	.mmap           = afu_mmap,
+	.poll           = afu_poll,
+	.read           = afu_read,
+	.release        = afu_release,
+};
+
+int ocxl_create_cdev(struct ocxl_afu *afu)
+{
+	int rc;
+
+	cdev_init(&afu->cdev, &ocxl_afu_fops);
+	rc = cdev_add(&afu->cdev, afu->dev.devt, 1);
+	if (rc) {
+		dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc);
+		return rc;
+	}
+	return 0;
+}
+
+void ocxl_destroy_cdev(struct ocxl_afu *afu)
+{
+	cdev_del(&afu->cdev);
+}
+
+int ocxl_register_afu(struct ocxl_afu *afu)
+{
+	int minor;
+
+	minor = allocate_afu_minor(afu);
+	if (minor < 0)
+		return minor;
+	afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
+	afu->dev.class = ocxl_class;
+	return device_register(&afu->dev);
+}
+
+void ocxl_unregister_afu(struct ocxl_afu *afu)
+{
+	free_afu_minor(afu);
+}
+
+static char *ocxl_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
+}
+
+int ocxl_file_init(void)
+{
+	int rc;
+
+	mutex_init(&minors_idr_lock);
+	idr_init(&minors_idr);
+
+	rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
+	if (rc) {
+		pr_err("Unable to allocate ocxl major number: %d\n", rc);
+		return rc;
+	}
+
+	ocxl_class = class_create(THIS_MODULE, "ocxl");
+	if (IS_ERR(ocxl_class)) {
+		pr_err("Unable to create ocxl class\n");
+		unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+		return PTR_ERR(ocxl_class);
+	}
+
+	ocxl_class->devnode = ocxl_devnode;
+	return 0;
+}
+
+void ocxl_file_exit(void)
+{
+	class_destroy(ocxl_class);
+	unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
+	idr_destroy(&minors_idr);
+}
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
new file mode 100644
index 000000000000..f30790582dc0
--- /dev/null
+++ b/drivers/misc/ocxl/link.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include <linux/mutex.h>
+#include <linux/mmu_context.h>
+#include <asm/copro.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl.h>
+#include "ocxl_internal.h"
+#include "trace.h"
+
+
+#define SPA_PASID_BITS		15
+#define SPA_PASID_MAX		((1 << SPA_PASID_BITS) - 1)
+#define SPA_PE_MASK		SPA_PASID_MAX
+#define SPA_SPA_SIZE_LOG	22 /* Each SPA is 4 Mb */
+
+#define SPA_CFG_SF		(1ull << (63-0))
+#define SPA_CFG_TA		(1ull << (63-1))
+#define SPA_CFG_HV		(1ull << (63-3))
+#define SPA_CFG_UV		(1ull << (63-4))
+#define SPA_CFG_XLAT_hpt	(0ull << (63-6)) /* Hashed page table (HPT) mode */
+#define SPA_CFG_XLAT_roh	(2ull << (63-6)) /* Radix on HPT mode */
+#define SPA_CFG_XLAT_ror	(3ull << (63-6)) /* Radix on Radix mode */
+#define SPA_CFG_PR		(1ull << (63-49))
+#define SPA_CFG_TC		(1ull << (63-54))
+#define SPA_CFG_DR		(1ull << (63-59))
+
+#define SPA_XSL_TF		(1ull << (63-3))  /* Translation fault */
+#define SPA_XSL_S		(1ull << (63-38)) /* Store operation */
+
+#define SPA_PE_VALID		0x80000000
+
+
+struct pe_data {
+	struct mm_struct *mm;
+	/* callback to trigger when a translation fault occurs */
+	void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
+	/* opaque pointer to be passed to the above callback */
+	void *xsl_err_data;
+	struct rcu_head rcu;
+};
+
+struct spa {
+	struct ocxl_process_element *spa_mem;
+	int spa_order;
+	struct mutex spa_lock;
+	struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
+	char *irq_name;
+	int virq;
+	void __iomem *reg_dsisr;
+	void __iomem *reg_dar;
+	void __iomem *reg_tfc;
+	void __iomem *reg_pe_handle;
+	/*
+	 * The following field are used by the memory fault
+	 * interrupt handler. We can only have one interrupt at a
+	 * time. The NPU won't raise another interrupt until the
+	 * previous one has been ack'd by writing to the TFC register
+	 */
+	struct xsl_fault {
+		struct work_struct fault_work;
+		u64 pe;
+		u64 dsisr;
+		u64 dar;
+		struct pe_data pe_data;
+	} xsl_fault;
+};
+
+/*
+ * A opencapi link can be used be by several PCI functions. We have
+ * one link per device slot.
+ *
+ * A linked list of opencapi links should suffice, as there's a
+ * limited number of opencapi slots on a system and lookup is only
+ * done when the device is probed
+ */
+struct link {
+	struct list_head list;
+	struct kref ref;
+	int domain;
+	int bus;
+	int dev;
+	atomic_t irq_available;
+	struct spa *spa;
+	void *platform_data;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+enum xsl_response {
+	CONTINUE,
+	ADDRESS_ERROR,
+	RESTART,
+};
+
+
+static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
+{
+	u64 reg;
+
+	*dsisr = in_be64(spa->reg_dsisr);
+	*dar = in_be64(spa->reg_dar);
+	reg = in_be64(spa->reg_pe_handle);
+	*pe = reg & SPA_PE_MASK;
+}
+
+static void ack_irq(struct spa *spa, enum xsl_response r)
+{
+	u64 reg = 0;
+
+	/* continue is not supported */
+	if (r == RESTART)
+		reg = PPC_BIT(31);
+	else if (r == ADDRESS_ERROR)
+		reg = PPC_BIT(30);
+	else
+		WARN(1, "Invalid irq response %d\n", r);
+
+	if (reg) {
+		trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe,
+				spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg);
+		out_be64(spa->reg_tfc, reg);
+	}
+}
+
+static void xsl_fault_handler_bh(struct work_struct *fault_work)
+{
+	unsigned int flt = 0;
+	unsigned long access, flags, inv_flags = 0;
+	enum xsl_response r;
+	struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
+					fault_work);
+	struct spa *spa = container_of(fault, struct spa, xsl_fault);
+
+	int rc;
+
+	/*
+	 * We need to release a reference on the mm whenever exiting this
+	 * function (taken in the memory fault interrupt handler)
+	 */
+	rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
+				&flt);
+	if (rc) {
+		pr_debug("copro_handle_mm_fault failed: %d\n", rc);
+		if (fault->pe_data.xsl_err_cb) {
+			fault->pe_data.xsl_err_cb(
+				fault->pe_data.xsl_err_data,
+				fault->dar, fault->dsisr);
+		}
+		r = ADDRESS_ERROR;
+		goto ack;
+	}
+
+	if (!radix_enabled()) {
+		/*
+		 * update_mmu_cache() will not have loaded the hash
+		 * since current->trap is not a 0x400 or 0x300, so
+		 * just call hash_page_mm() here.
+		 */
+		access = _PAGE_PRESENT | _PAGE_READ;
+		if (fault->dsisr & SPA_XSL_S)
+			access |= _PAGE_WRITE;
+
+		if (REGION_ID(fault->dar) != USER_REGION_ID)
+			access |= _PAGE_PRIVILEGED;
+
+		local_irq_save(flags);
+		hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
+			inv_flags);
+		local_irq_restore(flags);
+	}
+	r = RESTART;
+ack:
+	mmdrop(fault->pe_data.mm);
+	ack_irq(spa, r);
+}
+
+static irqreturn_t xsl_fault_handler(int irq, void *data)
+{
+	struct link *link = (struct link *) data;
+	struct spa *spa = link->spa;
+	u64 dsisr, dar, pe_handle;
+	struct pe_data *pe_data;
+	struct ocxl_process_element *pe;
+	int lpid, pid, tid;
+
+	read_irq(spa, &dsisr, &dar, &pe_handle);
+	trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1);
+
+	WARN_ON(pe_handle > SPA_PE_MASK);
+	pe = spa->spa_mem + pe_handle;
+	lpid = be32_to_cpu(pe->lpid);
+	pid = be32_to_cpu(pe->pid);
+	tid = be32_to_cpu(pe->tid);
+	/* We could be reading all null values here if the PE is being
+	 * removed while an interrupt kicks in. It's not supposed to
+	 * happen if the driver notified the AFU to terminate the
+	 * PASID, and the AFU waited for pending operations before
+	 * acknowledging. But even if it happens, we won't find a
+	 * memory context below and fail silently, so it should be ok.
+	 */
+	if (!(dsisr & SPA_XSL_TF)) {
+		WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
+		ack_irq(spa, ADDRESS_ERROR);
+		return IRQ_HANDLED;
+	}
+
+	rcu_read_lock();
+	pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
+	if (!pe_data) {
+		/*
+		 * Could only happen if the driver didn't notify the
+		 * AFU about PASID termination before removing the PE,
+		 * or the AFU didn't wait for all memory access to
+		 * have completed.
+		 *
+		 * Either way, we fail early, but we shouldn't log an
+		 * error message, as it is a valid (if unexpected)
+		 * scenario
+		 */
+		rcu_read_unlock();
+		pr_debug("Unknown mm context for xsl interrupt\n");
+		ack_irq(spa, ADDRESS_ERROR);
+		return IRQ_HANDLED;
+	}
+	WARN_ON(pe_data->mm->context.id != pid);
+
+	spa->xsl_fault.pe = pe_handle;
+	spa->xsl_fault.dar = dar;
+	spa->xsl_fault.dsisr = dsisr;
+	spa->xsl_fault.pe_data = *pe_data;
+	mmgrab(pe_data->mm); /* mm count is released by bottom half */
+
+	rcu_read_unlock();
+	schedule_work(&spa->xsl_fault.fault_work);
+	return IRQ_HANDLED;
+}
+
+static void unmap_irq_registers(struct spa *spa)
+{
+	pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
+				spa->reg_pe_handle);
+}
+
+static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
+{
+	return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
+				&spa->reg_tfc, &spa->reg_pe_handle);
+}
+
+static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
+{
+	struct spa *spa = link->spa;
+	int rc;
+	int hwirq;
+
+	rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
+	if (rc)
+		return rc;
+
+	rc = map_irq_registers(dev, spa);
+	if (rc)
+		return rc;
+
+	spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
+				link->domain, link->bus, link->dev);
+	if (!spa->irq_name) {
+		unmap_irq_registers(spa);
+		dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
+		return -ENOMEM;
+	}
+	/*
+	 * At some point, we'll need to look into allowing a higher
+	 * number of interrupts. Could we have an IRQ domain per link?
+	 */
+	spa->virq = irq_create_mapping(NULL, hwirq);
+	if (!spa->virq) {
+		kfree(spa->irq_name);
+		unmap_irq_registers(spa);
+		dev_err(&dev->dev,
+			"irq_create_mapping failed for translation interrupt\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
+
+	rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
+			link);
+	if (rc) {
+		irq_dispose_mapping(spa->virq);
+		kfree(spa->irq_name);
+		unmap_irq_registers(spa);
+		dev_err(&dev->dev,
+			"request_irq failed for translation interrupt: %d\n",
+			rc);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void release_xsl_irq(struct link *link)
+{
+	struct spa *spa = link->spa;
+
+	if (spa->virq) {
+		free_irq(spa->virq, link);
+		irq_dispose_mapping(spa->virq);
+	}
+	kfree(spa->irq_name);
+	unmap_irq_registers(spa);
+}
+
+static int alloc_spa(struct pci_dev *dev, struct link *link)
+{
+	struct spa *spa;
+
+	spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
+	if (!spa)
+		return -ENOMEM;
+
+	mutex_init(&spa->spa_lock);
+	INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
+	INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
+
+	spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
+	spa->spa_mem = (struct ocxl_process_element *)
+		__get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
+	if (!spa->spa_mem) {
+		dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
+		kfree(spa);
+		return -ENOMEM;
+	}
+	pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
+		link->dev, spa->spa_mem);
+
+	link->spa = spa;
+	return 0;
+}
+
+static void free_spa(struct link *link)
+{
+	struct spa *spa = link->spa;
+
+	pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
+		link->dev);
+
+	if (spa && spa->spa_mem) {
+		free_pages((unsigned long) spa->spa_mem, spa->spa_order);
+		kfree(spa);
+		link->spa = NULL;
+	}
+}
+
+static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
+{
+	struct link *link;
+	int rc;
+
+	link = kzalloc(sizeof(struct link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	kref_init(&link->ref);
+	link->domain = pci_domain_nr(dev->bus);
+	link->bus = dev->bus->number;
+	link->dev = PCI_SLOT(dev->devfn);
+	atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
+
+	rc = alloc_spa(dev, link);
+	if (rc)
+		goto err_free;
+
+	rc = setup_xsl_irq(dev, link);
+	if (rc)
+		goto err_spa;
+
+	/* platform specific hook */
+	rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
+				&link->platform_data);
+	if (rc)
+		goto err_xsl_irq;
+
+	*out_link = link;
+	return 0;
+
+err_xsl_irq:
+	release_xsl_irq(link);
+err_spa:
+	free_spa(link);
+err_free:
+	kfree(link);
+	return rc;
+}
+
+static void free_link(struct link *link)
+{
+	release_xsl_irq(link);
+	free_spa(link);
+	kfree(link);
+}
+
+int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
+{
+	int rc = 0;
+	struct link *link;
+
+	mutex_lock(&links_list_lock);
+	list_for_each_entry(link, &links_list, list) {
+		/* The functions of a device all share the same link */
+		if (link->domain == pci_domain_nr(dev->bus) &&
+			link->bus == dev->bus->number &&
+			link->dev == PCI_SLOT(dev->devfn)) {
+			kref_get(&link->ref);
+			*link_handle = link;
+			goto unlock;
+		}
+	}
+	rc = alloc_link(dev, PE_mask, &link);
+	if (rc)
+		goto unlock;
+
+	list_add(&link->list, &links_list);
+	*link_handle = link;
+unlock:
+	mutex_unlock(&links_list_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_setup);
+
+static void release_xsl(struct kref *ref)
+{
+	struct link *link = container_of(ref, struct link, ref);
+
+	list_del(&link->list);
+	/* call platform code before releasing data */
+	pnv_ocxl_spa_release(link->platform_data);
+	free_link(link);
+}
+
+void ocxl_link_release(struct pci_dev *dev, void *link_handle)
+{
+	struct link *link = (struct link *) link_handle;
+
+	mutex_lock(&links_list_lock);
+	kref_put(&link->ref, release_xsl);
+	mutex_unlock(&links_list_lock);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_release);
+
+static u64 calculate_cfg_state(bool kernel)
+{
+	u64 state;
+
+	state = SPA_CFG_DR;
+	if (mfspr(SPRN_LPCR) & LPCR_TC)
+		state |= SPA_CFG_TC;
+	if (radix_enabled())
+		state |= SPA_CFG_XLAT_ror;
+	else
+		state |= SPA_CFG_XLAT_hpt;
+	state |= SPA_CFG_HV;
+	if (kernel) {
+		if (mfmsr() & MSR_SF)
+			state |= SPA_CFG_SF;
+	} else {
+		state |= SPA_CFG_PR;
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			state |= SPA_CFG_SF;
+	}
+	return state;
+}
+
+int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
+		u64 amr, struct mm_struct *mm,
+		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
+		void *xsl_err_data)
+{
+	struct link *link = (struct link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	int pe_handle, rc = 0;
+	struct pe_data *pe_data;
+
+	BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	mutex_lock(&spa->spa_lock);
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	if (pe->software_state) {
+		rc = -EBUSY;
+		goto unlock;
+	}
+
+	pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
+	if (!pe_data) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	pe_data->mm = mm;
+	pe_data->xsl_err_cb = xsl_err_cb;
+	pe_data->xsl_err_data = xsl_err_data;
+
+	memset(pe, 0, sizeof(struct ocxl_process_element));
+	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
+	pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
+	pe->pid = cpu_to_be32(pidr);
+	pe->tid = cpu_to_be32(tidr);
+	pe->amr = cpu_to_be64(amr);
+	pe->software_state = cpu_to_be32(SPA_PE_VALID);
+
+	mm_context_add_copro(mm);
+	/*
+	 * Barrier is to make sure PE is visible in the SPA before it
+	 * is used by the device. It also helps with the global TLBI
+	 * invalidation
+	 */
+	mb();
+	radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
+
+	/*
+	 * The mm must stay valid for as long as the device uses it. We
+	 * lower the count when the context is removed from the SPA.
+	 *
+	 * We grab mm_count (and not mm_users), as we don't want to
+	 * end up in a circular dependency if a process mmaps its
+	 * mmio, therefore incrementing the file ref count when
+	 * calling mmap(), and forgets to unmap before exiting. In
+	 * that scenario, when the kernel handles the death of the
+	 * process, the file is not cleaned because unmap was not
+	 * called, and the mm wouldn't be freed because we would still
+	 * have a reference on mm_users. Incrementing mm_count solves
+	 * the problem.
+	 */
+	mmgrab(mm);
+	trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
+unlock:
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
+
+int ocxl_link_remove_pe(void *link_handle, int pasid)
+{
+	struct link *link = (struct link *) link_handle;
+	struct spa *spa = link->spa;
+	struct ocxl_process_element *pe;
+	struct pe_data *pe_data;
+	int pe_handle, rc;
+
+	if (pasid > SPA_PASID_MAX)
+		return -EINVAL;
+
+	/*
+	 * About synchronization with our memory fault handler:
+	 *
+	 * Before removing the PE, the driver is supposed to have
+	 * notified the AFU, which should have cleaned up and make
+	 * sure the PASID is no longer in use, including pending
+	 * interrupts. However, there's no way to be sure...
+	 *
+	 * We clear the PE and remove the context from our radix
+	 * tree. From that point on, any new interrupt for that
+	 * context will fail silently, which is ok. As mentioned
+	 * above, that's not expected, but it could happen if the
+	 * driver or AFU didn't do the right thing.
+	 *
+	 * There could still be a bottom half running, but we don't
+	 * need to wait/flush, as it is managing a reference count on
+	 * the mm it reads from the radix tree.
+	 */
+	pe_handle = pasid & SPA_PE_MASK;
+	pe = spa->spa_mem + pe_handle;
+
+	mutex_lock(&spa->spa_lock);
+
+	if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid,
+				be32_to_cpu(pe->pid), be32_to_cpu(pe->tid));
+
+	memset(pe, 0, sizeof(struct ocxl_process_element));
+	/*
+	 * The barrier makes sure the PE is removed from the SPA
+	 * before we clear the NPU context cache below, so that the
+	 * old PE cannot be reloaded erroneously.
+	 */
+	mb();
+
+	/*
+	 * hook to platform code
+	 * On powerpc, the entry needs to be cleared from the context
+	 * cache of the NPU.
+	 */
+	rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
+	WARN_ON(rc);
+
+	pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
+	if (!pe_data) {
+		WARN(1, "Couldn't find pe data when removing PE\n");
+	} else {
+		mm_context_remove_copro(pe_data->mm);
+		mmdrop(pe_data->mm);
+		kfree_rcu(pe_data, rcu);
+	}
+unlock:
+	mutex_unlock(&spa->spa_lock);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
+
+int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
+{
+	struct link *link = (struct link *) link_handle;
+	int rc, irq;
+	u64 addr;
+
+	if (atomic_dec_if_positive(&link->irq_available) < 0)
+		return -ENOSPC;
+
+	rc = pnv_ocxl_alloc_xive_irq(&irq, &addr);
+	if (rc) {
+		atomic_inc(&link->irq_available);
+		return rc;
+	}
+
+	*hw_irq = irq;
+	*trigger_addr = addr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
+
+void ocxl_link_free_irq(void *link_handle, int hw_irq)
+{
+	struct link *link = (struct link *) link_handle;
+
+	pnv_ocxl_free_xive_irq(hw_irq);
+	atomic_inc(&link->irq_available);
+}
+EXPORT_SYMBOL_GPL(ocxl_link_free_irq);
diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c
new file mode 100644
index 000000000000..7210d9e059be
--- /dev/null
+++ b/drivers/misc/ocxl/main.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "ocxl_internal.h"
+
+static int __init init_ocxl(void)
+{
+	int rc = 0;
+
+	rc = ocxl_file_init();
+	if (rc)
+		return rc;
+
+	rc = pci_register_driver(&ocxl_pci_driver);
+	if (rc) {
+		ocxl_file_exit();
+		return rc;
+	}
+	return 0;
+}
+
+static void exit_ocxl(void)
+{
+	pci_unregister_driver(&ocxl_pci_driver);
+	ocxl_file_exit();
+}
+
+module_init(init_ocxl);
+module_exit(exit_ocxl);
+
+MODULE_DESCRIPTION("Open Coherent Accelerator");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
new file mode 100644
index 000000000000..5d421824afd9
--- /dev/null
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef _OCXL_INTERNAL_H_
+#define _OCXL_INTERNAL_H_
+
+#include <linux/pci.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <misc/ocxl.h>
+
+#define MAX_IRQ_PER_LINK	2000
+#define MAX_IRQ_PER_CONTEXT	MAX_IRQ_PER_LINK
+
+#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev)
+#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev)
+
+extern struct pci_driver ocxl_pci_driver;
+
+
+struct ocxl_fn {
+	struct device dev;
+	int bar_used[3];
+	struct ocxl_fn_config config;
+	struct list_head afu_list;
+	int pasid_base;
+	int actag_base;
+	int actag_enabled;
+	int actag_supported;
+	struct list_head pasid_list;
+	struct list_head actag_list;
+	void *link;
+};
+
+struct ocxl_afu {
+	struct ocxl_fn *fn;
+	struct list_head list;
+	struct device dev;
+	struct cdev cdev;
+	struct ocxl_afu_config config;
+	int pasid_base;
+	int pasid_count; /* opened contexts */
+	int pasid_max; /* maximum number of contexts */
+	int actag_base;
+	int actag_enabled;
+	struct mutex contexts_lock;
+	struct idr contexts_idr;
+	struct mutex afu_control_lock;
+	u64 global_mmio_start;
+	u64 irq_base_offset;
+	void __iomem *global_mmio_ptr;
+	u64 pp_mmio_start;
+	struct bin_attribute attr_global_mmio;
+};
+
+enum ocxl_context_status {
+	CLOSED,
+	OPENED,
+	ATTACHED,
+};
+
+// Contains metadata about a translation fault
+struct ocxl_xsl_error {
+	u64 addr; // The address that triggered the fault
+	u64 dsisr; // the value of the dsisr register
+	u64 count; // The number of times this fault has been triggered
+};
+
+struct ocxl_context {
+	struct ocxl_afu *afu;
+	int pasid;
+	struct mutex status_mutex;
+	enum ocxl_context_status status;
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+	wait_queue_head_t events_wq;
+	struct mutex xsl_error_lock;
+	struct ocxl_xsl_error xsl_error;
+	struct mutex irq_lock;
+	struct idr irq_idr;
+};
+
+struct ocxl_process_element {
+	__be64 config_state;
+	__be32 reserved1[11];
+	__be32 lpid;
+	__be32 tid;
+	__be32 pid;
+	__be32 reserved2[10];
+	__be64 amr;
+	__be32 reserved3[3];
+	__be32 software_state;
+};
+
+
+extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
+extern void ocxl_afu_put(struct ocxl_afu *afu);
+
+extern int ocxl_create_cdev(struct ocxl_afu *afu);
+extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
+extern int ocxl_register_afu(struct ocxl_afu *afu);
+extern void ocxl_unregister_afu(struct ocxl_afu *afu);
+
+extern int ocxl_file_init(void);
+extern void ocxl_file_exit(void);
+
+extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
+extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
+
+extern struct ocxl_context *ocxl_context_alloc(void);
+extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+			struct address_space *mapping);
+extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
+extern int ocxl_context_mmap(struct ocxl_context *ctx,
+			struct vm_area_struct *vma);
+extern int ocxl_context_detach(struct ocxl_context *ctx);
+extern void ocxl_context_detach_all(struct ocxl_afu *afu);
+extern void ocxl_context_free(struct ocxl_context *ctx);
+
+extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
+extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
+
+extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
+extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
+extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
+extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
+			int eventfd);
+extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
+
+#endif /* _OCXL_INTERNAL_H_ */
diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
new file mode 100644
index 000000000000..d14cb56e6920
--- /dev/null
+++ b/drivers/misc/ocxl/pasid.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include "ocxl_internal.h"
+
+
+struct id_range {
+	struct list_head list;
+	u32 start;
+	u32 end;
+};
+
+#ifdef DEBUG
+static void dump_list(struct list_head *head, char *type_str)
+{
+	struct id_range *cur;
+
+	pr_debug("%s ranges allocated:\n", type_str);
+	list_for_each_entry(cur, head, list) {
+		pr_debug("Range %d->%d\n", cur->start, cur->end);
+	}
+}
+#endif
+
+static int range_alloc(struct list_head *head, u32 size, int max_id,
+		char *type_str)
+{
+	struct list_head *pos;
+	struct id_range *cur, *new;
+	int rc, last_end;
+
+	new = kmalloc(sizeof(struct id_range), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	pos = head;
+	last_end = -1;
+	list_for_each_entry(cur, head, list) {
+		if ((cur->start - last_end) > size)
+			break;
+		last_end = cur->end;
+		pos = &cur->list;
+	}
+
+	new->start = last_end + 1;
+	new->end = new->start + size - 1;
+
+	if (new->end > max_id) {
+		kfree(new);
+		rc = -ENOSPC;
+	} else {
+		list_add(&new->list, pos);
+		rc = new->start;
+	}
+
+#ifdef DEBUG
+	dump_list(head, type_str);
+#endif
+	return rc;
+}
+
+static void range_free(struct list_head *head, u32 start, u32 size,
+		char *type_str)
+{
+	bool found = false;
+	struct id_range *cur, *tmp;
+
+	list_for_each_entry_safe(cur, tmp, head, list) {
+		if (cur->start == start && cur->end == (start + size - 1)) {
+			found = true;
+			list_del(&cur->list);
+			kfree(cur);
+			break;
+		}
+	}
+	WARN_ON(!found);
+#ifdef DEBUG
+	dump_list(head, type_str);
+#endif
+}
+
+int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+	int max_pasid;
+
+	if (fn->config.max_pasid_log < 0)
+		return -ENOSPC;
+	max_pasid = 1 << fn->config.max_pasid_log;
+	return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid");
+}
+
+void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+	return range_free(&fn->pasid_list, start, size, "afu pasid");
+}
+
+int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size)
+{
+	int max_actag;
+
+	max_actag = fn->actag_enabled;
+	return range_alloc(&fn->actag_list, size, max_actag, "afu actag");
+}
+
+void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
+{
+	return range_free(&fn->actag_list, start, size, "afu actag");
+}
diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
new file mode 100644
index 000000000000..0051d9ec76cc
--- /dev/null
+++ b/drivers/misc/ocxl/pci.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/idr.h>
+#include <asm/pnv-ocxl.h>
+#include "ocxl_internal.h"
+
+/*
+ * Any opencapi device which wants to use this 'generic' driver should
+ * use the 0x062B device ID. Vendors should define the subsystem
+ * vendor/device ID to help differentiate devices.
+ */
+static const struct pci_device_id ocxl_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
+
+
+static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
+{
+	return (get_device(&fn->dev) == NULL) ? NULL : fn;
+}
+
+static void ocxl_fn_put(struct ocxl_fn *fn)
+{
+	put_device(&fn->dev);
+}
+
+struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
+{
+	return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+
+void ocxl_afu_put(struct ocxl_afu *afu)
+{
+	put_device(&afu->dev);
+}
+
+static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
+{
+	struct ocxl_afu *afu;
+
+	afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
+	if (!afu)
+		return NULL;
+
+	mutex_init(&afu->contexts_lock);
+	mutex_init(&afu->afu_control_lock);
+	idr_init(&afu->contexts_idr);
+	afu->fn = fn;
+	ocxl_fn_get(fn);
+	return afu;
+}
+
+static void free_afu(struct ocxl_afu *afu)
+{
+	idr_destroy(&afu->contexts_idr);
+	ocxl_fn_put(afu->fn);
+	kfree(afu);
+}
+
+static void free_afu_dev(struct device *dev)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(dev);
+
+	ocxl_unregister_afu(afu);
+	free_afu(afu);
+}
+
+static int set_afu_device(struct ocxl_afu *afu, const char *location)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int rc;
+
+	afu->dev.parent = &fn->dev;
+	afu->dev.release = free_afu_dev;
+	rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
+		afu->config.idx);
+	return rc;
+}
+
+static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int actag_count, actag_offset;
+
+	/*
+	 * if there were not enough actags for the function, each afu
+	 * reduces its count as well
+	 */
+	actag_count = afu->config.actag_supported *
+		fn->actag_enabled / fn->actag_supported;
+	actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
+	if (actag_offset < 0) {
+		dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
+			actag_count, actag_offset);
+		return actag_offset;
+	}
+	afu->actag_base = fn->actag_base + actag_offset;
+	afu->actag_enabled = actag_count;
+
+	ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
+				afu->actag_base, afu->actag_enabled);
+	dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
+		afu->actag_base, afu->actag_enabled);
+	return 0;
+}
+
+static void reclaim_afu_actag(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int start_offset, size;
+
+	start_offset = afu->actag_base - fn->actag_base;
+	size = afu->actag_enabled;
+	ocxl_actag_afu_free(afu->fn, start_offset, size);
+}
+
+static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int pasid_count, pasid_offset;
+
+	/*
+	 * We only support the case where the function configuration
+	 * requested enough PASIDs to cover all AFUs.
+	 */
+	pasid_count = 1 << afu->config.pasid_supported_log;
+	pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
+	if (pasid_offset < 0) {
+		dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
+			pasid_count, pasid_offset);
+		return pasid_offset;
+	}
+	afu->pasid_base = fn->pasid_base + pasid_offset;
+	afu->pasid_count = 0;
+	afu->pasid_max = pasid_count;
+
+	ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,
+				afu->pasid_base,
+				afu->config.pasid_supported_log);
+	dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
+		afu->pasid_base, pasid_count);
+	return 0;
+}
+
+static void reclaim_afu_pasid(struct ocxl_afu *afu)
+{
+	struct ocxl_fn *fn = afu->fn;
+	int start_offset, size;
+
+	start_offset = afu->pasid_base - fn->pasid_base;
+	size = 1 << afu->config.pasid_supported_log;
+	ocxl_pasid_afu_free(afu->fn, start_offset, size);
+}
+
+static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int rc, idx;
+
+	if (bar != 0 && bar != 2 && bar != 4)
+		return -EINVAL;
+
+	idx = bar >> 1;
+	if (fn->bar_used[idx]++ == 0) {
+		rc = pci_request_region(dev, bar, "ocxl");
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static void release_fn_bar(struct ocxl_fn *fn, int bar)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int idx;
+
+	if (bar != 0 && bar != 2 && bar != 4)
+		return;
+
+	idx = bar >> 1;
+	if (--fn->bar_used[idx] == 0)
+		pci_release_region(dev, bar);
+	WARN_ON(fn->bar_used[idx] < 0);
+}
+
+static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
+	if (rc)
+		return rc;
+
+	rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+	if (rc) {
+		release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+		return rc;
+	}
+
+	afu->global_mmio_start =
+		pci_resource_start(dev, afu->config.global_mmio_bar) +
+		afu->config.global_mmio_offset;
+	afu->pp_mmio_start =
+		pci_resource_start(dev, afu->config.pp_mmio_bar) +
+		afu->config.pp_mmio_offset;
+
+	afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
+				afu->config.global_mmio_size);
+	if (!afu->global_mmio_ptr) {
+		release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+		release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+		dev_err(&dev->dev, "Error mapping global mmio area\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Leave an empty page between the per-process mmio area and
+	 * the AFU interrupt mappings
+	 */
+	afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
+	return 0;
+}
+
+static void unmap_mmio_areas(struct ocxl_afu *afu)
+{
+	if (afu->global_mmio_ptr) {
+		iounmap(afu->global_mmio_ptr);
+		afu->global_mmio_ptr = NULL;
+	}
+	afu->global_mmio_start = 0;
+	afu->pp_mmio_start = 0;
+	release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
+	release_fn_bar(afu->fn, afu->config.global_mmio_bar);
+}
+
+static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
+	if (rc)
+		return rc;
+
+	rc = set_afu_device(afu, dev_name(&dev->dev));
+	if (rc)
+		return rc;
+
+	rc = assign_afu_actag(afu, dev);
+	if (rc)
+		return rc;
+
+	rc = assign_afu_pasid(afu, dev);
+	if (rc) {
+		reclaim_afu_actag(afu);
+		return rc;
+	}
+
+	rc = map_mmio_areas(afu, dev);
+	if (rc) {
+		reclaim_afu_pasid(afu);
+		reclaim_afu_actag(afu);
+		return rc;
+	}
+	return 0;
+}
+
+static void deconfigure_afu(struct ocxl_afu *afu)
+{
+	unmap_mmio_areas(afu);
+	reclaim_afu_pasid(afu);
+	reclaim_afu_actag(afu);
+}
+
+static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
+{
+	int rc;
+
+	ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
+	/*
+	 * Char device creation is the last step, as processes can
+	 * call our driver immediately, so all our inits must be finished.
+	 */
+	rc = ocxl_create_cdev(afu);
+	if (rc)
+		return rc;
+	return 0;
+}
+
+static void deactivate_afu(struct ocxl_afu *afu)
+{
+	struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
+
+	ocxl_destroy_cdev(afu);
+	ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
+}
+
+static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
+{
+	int rc;
+	struct ocxl_afu *afu;
+
+	afu = alloc_afu(fn);
+	if (!afu)
+		return -ENOMEM;
+
+	rc = configure_afu(afu, afu_idx, dev);
+	if (rc) {
+		free_afu(afu);
+		return rc;
+	}
+
+	rc = ocxl_register_afu(afu);
+	if (rc)
+		goto err;
+
+	rc = ocxl_sysfs_add_afu(afu);
+	if (rc)
+		goto err;
+
+	rc = activate_afu(dev, afu);
+	if (rc)
+		goto err_sys;
+
+	list_add_tail(&afu->list, &fn->afu_list);
+	return 0;
+
+err_sys:
+	ocxl_sysfs_remove_afu(afu);
+err:
+	deconfigure_afu(afu);
+	device_unregister(&afu->dev);
+	return rc;
+}
+
+static void remove_afu(struct ocxl_afu *afu)
+{
+	list_del(&afu->list);
+	ocxl_context_detach_all(afu);
+	deactivate_afu(afu);
+	ocxl_sysfs_remove_afu(afu);
+	deconfigure_afu(afu);
+	device_unregister(&afu->dev);
+}
+
+static struct ocxl_fn *alloc_function(struct pci_dev *dev)
+{
+	struct ocxl_fn *fn;
+
+	fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
+	if (!fn)
+		return NULL;
+
+	INIT_LIST_HEAD(&fn->afu_list);
+	INIT_LIST_HEAD(&fn->pasid_list);
+	INIT_LIST_HEAD(&fn->actag_list);
+	return fn;
+}
+
+static void free_function(struct ocxl_fn *fn)
+{
+	WARN_ON(!list_empty(&fn->afu_list));
+	WARN_ON(!list_empty(&fn->pasid_list));
+	kfree(fn);
+}
+
+static void free_function_dev(struct device *dev)
+{
+	struct ocxl_fn *fn = to_ocxl_function(dev);
+
+	free_function(fn);
+}
+
+static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+	int rc;
+
+	fn->dev.parent = &dev->dev;
+	fn->dev.release = free_function_dev;
+	rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
+	if (rc)
+		return rc;
+	pci_set_drvdata(dev, fn);
+	return 0;
+}
+
+static int assign_function_actag(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	u16 base, enabled, supported;
+	int rc;
+
+	rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
+	if (rc)
+		return rc;
+
+	fn->actag_base = base;
+	fn->actag_enabled = enabled;
+	fn->actag_supported = supported;
+
+	ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
+			fn->actag_base,	fn->actag_enabled);
+	dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
+		fn->actag_base, fn->actag_enabled);
+	return 0;
+}
+
+static int set_function_pasid(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+	int rc, desired_count, max_count;
+
+	/* A function may not require any PASID */
+	if (fn->config.max_pasid_log < 0)
+		return 0;
+
+	rc = ocxl_config_get_pasid_info(dev, &max_count);
+	if (rc)
+		return rc;
+
+	desired_count = 1 << fn->config.max_pasid_log;
+
+	if (desired_count > max_count) {
+		dev_err(&fn->dev,
+			"Function requires more PASIDs than is available (%d vs. %d)\n",
+			desired_count, max_count);
+		return -ENOSPC;
+	}
+
+	fn->pasid_base = 0;
+	return 0;
+}
+
+static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
+{
+	int rc;
+
+	rc = pci_enable_device(dev);
+	if (rc) {
+		dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
+		return rc;
+	}
+
+	/*
+	 * Once it has been confirmed to work on our hardware, we
+	 * should reset the function, to force the adapter to restart
+	 * from scratch.
+	 * A function reset would also reset all its AFUs.
+	 *
+	 * Some hints for implementation:
+	 *
+	 * - there's not status bit to know when the reset is done. We
+	 *   should try reading the config space to know when it's
+	 *   done.
+	 * - probably something like:
+	 *	Reset
+	 *	wait 100ms
+	 *	issue config read
+	 *	allow device up to 1 sec to return success on config
+	 *	read before declaring it broken
+	 *
+	 * Some shared logic on the card (CFG, TLX) won't be reset, so
+	 * there's no guarantee that it will be enough.
+	 */
+	rc = ocxl_config_read_function(dev, &fn->config);
+	if (rc)
+		return rc;
+
+	rc = set_function_device(fn, dev);
+	if (rc)
+		return rc;
+
+	rc = assign_function_actag(fn);
+	if (rc)
+		return rc;
+
+	rc = set_function_pasid(fn);
+	if (rc)
+		return rc;
+
+	rc = ocxl_link_setup(dev, 0, &fn->link);
+	if (rc)
+		return rc;
+
+	rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
+	if (rc) {
+		ocxl_link_release(dev, fn->link);
+		return rc;
+	}
+	return 0;
+}
+
+static void deconfigure_function(struct ocxl_fn *fn)
+{
+	struct pci_dev *dev = to_pci_dev(fn->dev.parent);
+
+	ocxl_link_release(dev, fn->link);
+	pci_disable_device(dev);
+}
+
+static struct ocxl_fn *init_function(struct pci_dev *dev)
+{
+	struct ocxl_fn *fn;
+	int rc;
+
+	fn = alloc_function(dev);
+	if (!fn)
+		return ERR_PTR(-ENOMEM);
+
+	rc = configure_function(fn, dev);
+	if (rc) {
+		free_function(fn);
+		return ERR_PTR(rc);
+	}
+
+	rc = device_register(&fn->dev);
+	if (rc) {
+		deconfigure_function(fn);
+		device_unregister(&fn->dev);
+		return ERR_PTR(rc);
+	}
+	return fn;
+}
+
+static void remove_function(struct ocxl_fn *fn)
+{
+	deconfigure_function(fn);
+	device_unregister(&fn->dev);
+}
+
+static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	int rc, afu_count = 0;
+	u8 afu;
+	struct ocxl_fn *fn;
+
+	if (!radix_enabled()) {
+		dev_err(&dev->dev, "Unsupported memory model (hash)\n");
+		return -ENODEV;
+	}
+
+	fn = init_function(dev);
+	if (IS_ERR(fn)) {
+		dev_err(&dev->dev, "function init failed: %li\n",
+			PTR_ERR(fn));
+		return PTR_ERR(fn);
+	}
+
+	for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
+		rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
+		if (rc > 0) {
+			rc = init_afu(dev, fn, afu);
+			if (rc) {
+				dev_err(&dev->dev,
+					"Can't initialize AFU index %d\n", afu);
+				continue;
+			}
+			afu_count++;
+		}
+	}
+	dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
+	return 0;
+}
+
+static void ocxl_remove(struct pci_dev *dev)
+{
+	struct ocxl_afu *afu, *tmp;
+	struct ocxl_fn *fn = pci_get_drvdata(dev);
+
+	list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
+		remove_afu(afu);
+	}
+	remove_function(fn);
+}
+
+struct pci_driver ocxl_pci_driver = {
+	.name = "ocxl",
+	.id_table = ocxl_pci_tbl,
+	.probe = ocxl_probe,
+	.remove = ocxl_remove,
+	.shutdown = ocxl_remove,
+};
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
new file mode 100644
index 000000000000..d9753a1db14b
--- /dev/null
+++ b/drivers/misc/ocxl/sysfs.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sysfs.h>
+#include "ocxl_internal.h"
+
+static ssize_t global_mmio_size_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			afu->config.global_mmio_size);
+}
+
+static ssize_t pp_mmio_size_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			afu->config.pp_mmio_stride);
+}
+
+static ssize_t afu_version_show(struct device *device,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
+			afu->config.version_major,
+			afu->config.version_minor);
+}
+
+static ssize_t contexts_show(struct device *device,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
+			afu->pasid_count, afu->pasid_max);
+}
+
+static struct device_attribute afu_attrs[] = {
+	__ATTR_RO(global_mmio_size),
+	__ATTR_RO(pp_mmio_size),
+	__ATTR_RO(afu_version),
+	__ATTR_RO(contexts),
+};
+
+static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+	if (count == 0 || off < 0 ||
+		off >= afu->config.global_mmio_size)
+		return 0;
+	memcpy_fromio(buf, afu->global_mmio_ptr + off, count);
+	return count;
+}
+
+static int global_mmio_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ocxl_afu *afu = vma->vm_private_data;
+	unsigned long offset;
+
+	if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT))
+		return VM_FAULT_SIGBUS;
+
+	offset = vmf->pgoff;
+	offset += (afu->global_mmio_start >> PAGE_SHIFT);
+	vm_insert_pfn(vma, vmf->address, offset);
+	return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct global_mmio_vmops = {
+	.fault = global_mmio_fault,
+};
+
+static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
+			struct bin_attribute *bin_attr,
+			struct vm_area_struct *vma)
+{
+	struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
+
+	if ((vma_pages(vma) + vma->vm_pgoff) >
+		(afu->config.global_mmio_size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_ops = &global_mmio_vmops;
+	vma->vm_private_data = afu;
+	return 0;
+}
+
+int ocxl_sysfs_add_afu(struct ocxl_afu *afu)
+{
+	int i, rc;
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
+		rc = device_create_file(&afu->dev, &afu_attrs[i]);
+		if (rc)
+			goto err;
+	}
+
+	sysfs_attr_init(&afu->attr_global_mmio.attr);
+	afu->attr_global_mmio.attr.name = "global_mmio_area";
+	afu->attr_global_mmio.attr.mode = 0600;
+	afu->attr_global_mmio.size = afu->config.global_mmio_size;
+	afu->attr_global_mmio.read = global_mmio_read;
+	afu->attr_global_mmio.mmap = global_mmio_mmap;
+	rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio);
+	if (rc) {
+		dev_err(&afu->dev,
+			"Unable to create global mmio attr for afu: %d\n",
+			rc);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		device_remove_file(&afu->dev, &afu_attrs[i]);
+	return rc;
+}
+
+void ocxl_sysfs_remove_afu(struct ocxl_afu *afu)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
+		device_remove_file(&afu->dev, &afu_attrs[i]);
+	device_remove_bin_file(&afu->dev, &afu->attr_global_mmio);
+}
diff --git a/drivers/misc/ocxl/trace.c b/drivers/misc/ocxl/trace.c
new file mode 100644
index 000000000000..1e6947049697
--- /dev/null
+++ b/drivers/misc/ocxl/trace.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
new file mode 100644
index 000000000000..bcb7ff330c1e
--- /dev/null
+++ b/drivers/misc/ocxl/trace.h
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ocxl
+
+#if !defined(_TRACE_OCXL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OCXL_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(ocxl_context,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr),
+
+	TP_STRUCT__entry(
+		__field(pid_t, pid)
+		__field(void*, spa)
+		__field(int, pasid)
+		__field(u32, pidr)
+		__field(u32, tidr)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+		__entry->spa = spa;
+		__entry->pasid = pasid;
+		__entry->pidr = pidr;
+		__entry->tidr = tidr;
+	),
+
+	TP_printk("linux pid=%d spa=0x%p pasid=0x%x pidr=0x%x tidr=0x%x",
+		__entry->pid,
+		__entry->spa,
+		__entry->pasid,
+		__entry->pidr,
+		__entry->tidr
+	)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_add,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+DEFINE_EVENT(ocxl_context, ocxl_context_remove,
+	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
+	TP_ARGS(pid, spa, pasid, pidr, tidr)
+);
+
+TRACE_EVENT(ocxl_terminate_pasid,
+	TP_PROTO(int pasid, int rc),
+	TP_ARGS(pasid, rc),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, rc)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->rc = rc;
+	),
+
+	TP_printk("pasid=0x%x rc=%d",
+		__entry->pasid,
+		__entry->rc
+	)
+);
+
+DECLARE_EVENT_CLASS(ocxl_fault_handler,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc),
+
+	TP_STRUCT__entry(
+		__field(void *, spa)
+		__field(u64, pe)
+		__field(u64, dsisr)
+		__field(u64, dar)
+		__field(u64, tfc)
+	),
+
+	TP_fast_assign(
+		__entry->spa = spa;
+		__entry->pe = pe;
+		__entry->dsisr = dsisr;
+		__entry->dar = dar;
+		__entry->tfc = tfc;
+	),
+
+	TP_printk("spa=%p pe=0x%llx dsisr=0x%llx dar=0x%llx tfc=0x%llx",
+		__entry->spa,
+		__entry->pe,
+		__entry->dsisr,
+		__entry->dar,
+		__entry->tfc
+	)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack,
+	TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc),
+	TP_ARGS(spa, pe, dsisr, dar, tfc)
+);
+
+TRACE_EVENT(ocxl_afu_irq_alloc,
+	TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq,
+		u64 irq_offset),
+	TP_ARGS(pasid, irq_id, virq, hw_irq, irq_offset),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, irq_id)
+		__field(unsigned int, virq)
+		__field(int, hw_irq)
+		__field(u64, irq_offset)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->irq_id = irq_id;
+		__entry->virq = virq;
+		__entry->hw_irq = hw_irq;
+		__entry->irq_offset = irq_offset;
+	),
+
+	TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d irq_offset=0x%llx",
+		__entry->pasid,
+		__entry->irq_id,
+		__entry->virq,
+		__entry->hw_irq,
+		__entry->irq_offset
+	)
+);
+
+TRACE_EVENT(ocxl_afu_irq_free,
+	TP_PROTO(int pasid, int irq_id),
+	TP_ARGS(pasid, irq_id),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(int, irq_id)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->irq_id = irq_id;
+	),
+
+	TP_printk("pasid=0x%x irq_id=%d",
+		__entry->pasid,
+		__entry->irq_id
+	)
+);
+
+TRACE_EVENT(ocxl_afu_irq_receive,
+	TP_PROTO(int virq),
+	TP_ARGS(virq),
+
+	TP_STRUCT__entry(
+		__field(int, virq)
+	),
+
+	TP_fast_assign(
+		__entry->virq = virq;
+	),
+
+	TP_printk("virq=%d",
+		__entry->virq
+	)
+);
+
+#endif /* _TRACE_OCXL_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>