Merge tag 'char-misc-5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc

Pull char/misc driver updates from Greg KH: "Here is the large set of char/misc/whatever driver subsystem updates for 5.12-rc1. Over time it seems like this tree is collecting more and more tiny driver subsystems in one place, making it easier for those maintainers, which is why this is getting larger. Included in here are: - coresight driver updates - habannalabs driver updates - virtual acrn driver addition (proper acks from the x86 maintainers) - broadcom misc driver addition - speakup driver updates - soundwire driver updates - fpga driver updates - amba driver updates - mei driver updates - vfio driver updates - greybus driver updates - nvmeem driver updates - phy driver updates - mhi driver updates - interconnect driver udpates - fsl-mc bus driver updates - random driver fix - some small misc driver updates (rtsx, pvpanic, etc.) All of these have been in linux-next for a while, with the only reported issue being a merge conflict due to the dfl_device_id addition from the fpga subsystem in here" * tag 'char-misc-5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (311 commits) spmi: spmi-pmic-arb: Fix hw_irq overflow Documentation: coresight: Add PID tracing description coresight: etm-perf: Support PID tracing for kernel at EL2 coresight: etm-perf: Clarify comment on perf options ACRN: update MAINTAINERS: mailing list is subscribers-only regmap: sdw-mbq: use MODULE_LICENSE("GPL") regmap: sdw: use no_pm routines for SoundWire 1.2 MBQ regmap: sdw: use _no_pm functions in regmap_read/write soundwire: intel: fix possible crash when no device is detected MAINTAINERS: replace my with email with replacements mhi: Fix double dma free uapi: map_to_7segment: Update example in documentation uio: uio_pci_generic: don't fail probe if pdev->irq equals to IRQ_NOTCONNECTED drivers/misc/vmw_vmci: restrict too big queue size in qp_host_alloc_queue firewire: replace tricky statement by two simple ones vme: make remove callback return void firmware: google: make coreboot driver's remove callback return void firmware: xilinx: Use explicit values for all enum values sample/acrn: Introduce a sample of HSM ioctl interface usage virt: acrn: Introduce an interface for Service VM to control vCPU ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2021-02-24 10:25:37 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2021-02-24 10:25:37 -0800
commit: e229b429bb4af24d9828758c0c851bb6a4169400 (patch)
tree: 95e49922f6c68b5f81cbf7a39349cfad42c5a0f1 /drivers
parent: 7ac1161c2789be25d0d206e831b051f43028866e (diff)
parent: d19db80a366576d3ffadf2508ed876b4c1faf959 (diff)
202 files changed, 15511 insertions, 4090 deletions
diff --git a/drivers/accessibility/speakup/serialio.c b/drivers/accessibility/speakup/serialio.c
index 403b01d66367..53580bdc5baa 100644
--- a/drivers/accessibility/speakup/serialio.c
+++ b/drivers/accessibility/speakup/serialio.c
@@ -27,11 +27,11 @@ static const struct old_serial_port *serstate;
 static int timeouts;
 
 static int spk_serial_out(struct spk_synth *in_synth, const char ch);
-static void spk_serial_send_xchar(char ch);
-static void spk_serial_tiocmset(unsigned int set, unsigned int clear);
-static unsigned char spk_serial_in(void);
-static unsigned char spk_serial_in_nowait(void);
-static void spk_serial_flush_buffer(void);
+static void spk_serial_send_xchar(struct spk_synth *in_synth, char ch);
+static void spk_serial_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear);
+static unsigned char spk_serial_in(struct spk_synth *in_synth);
+static unsigned char spk_serial_in_nowait(struct spk_synth *in_synth);
+static void spk_serial_flush_buffer(struct spk_synth *in_synth);
 static int spk_serial_wait_for_xmitr(struct spk_synth *in_synth);
 
 struct spk_io_ops spk_serial_io_ops = {
@@ -150,7 +150,7 @@ static void start_serial_interrupt(int irq)
 	outb(1, speakup_info.port_tts + UART_FCR);	/* Turn FIFO On */
 }
 
-static void spk_serial_send_xchar(char ch)
+static void spk_serial_send_xchar(struct spk_synth *synth, char ch)
 {
 	int timeout = SPK_XMITR_TIMEOUT;
 
@@ -162,7 +162,7 @@ static void spk_serial_send_xchar(char ch)
 	outb(ch, speakup_info.port_tts);
 }
 
-static void spk_serial_tiocmset(unsigned int set, unsigned int clear)
+static void spk_serial_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear)
 {
 	int old = inb(speakup_info.port_tts + UART_MCR);
 
@@ -251,7 +251,7 @@ static int spk_serial_wait_for_xmitr(struct spk_synth *in_synth)
 	return 1;
 }
 
-static unsigned char spk_serial_in(void)
+static unsigned char spk_serial_in(struct spk_synth *in_synth)
 {
 	int tmout = SPK_SERIAL_TIMEOUT;
 
@@ -265,7 +265,7 @@ static unsigned char spk_serial_in(void)
 	return inb_p(speakup_info.port_tts + UART_RX);
 }
 
-static unsigned char spk_serial_in_nowait(void)
+static unsigned char spk_serial_in_nowait(struct spk_synth *in_synth)
 {
 	unsigned char lsr;
 
@@ -275,7 +275,7 @@ static unsigned char spk_serial_in_nowait(void)
 	return inb_p(speakup_info.port_tts + UART_RX);
 }
 
-static void spk_serial_flush_buffer(void)
+static void spk_serial_flush_buffer(struct spk_synth *in_synth)
 {
 	/* TODO: flush the UART 16550 buffer */
 }
@@ -307,7 +307,7 @@ const char *spk_serial_synth_immediate(struct spk_synth *synth,
 }
 EXPORT_SYMBOL_GPL(spk_serial_synth_immediate);
 
-void spk_serial_release(void)
+void spk_serial_release(struct spk_synth *synth)
 {
 	spk_stop_serial_interrupt();
 	if (speakup_info.port_tts == 0)
diff --git a/drivers/accessibility/speakup/speakup_acntpc.c b/drivers/accessibility/speakup/speakup_acntpc.c
index c94328a5bd4a..c1ec087dca13 100644
--- a/drivers/accessibility/speakup/speakup_acntpc.c
+++ b/drivers/accessibility/speakup/speakup_acntpc.c
@@ -25,7 +25,7 @@
 #define PROCSPEECH '\r'
 
 static int synth_probe(struct spk_synth *synth);
-static void accent_release(void);
+static void accent_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -294,7 +294,7 @@ static int synth_probe(struct spk_synth *synth)
 	return 0;
 }
 
-static void accent_release(void)
+static void accent_release(struct spk_synth *synth)
 {
 	spk_stop_serial_interrupt();
 	if (speakup_info.port_tts)
diff --git a/drivers/accessibility/speakup/speakup_apollo.c b/drivers/accessibility/speakup/speakup_apollo.c
index 0877b4044c28..cd63581b2e99 100644
--- a/drivers/accessibility/speakup/speakup_apollo.c
+++ b/drivers/accessibility/speakup/speakup_apollo.c
@@ -163,8 +163,8 @@ static void do_catch_up(struct spk_synth *synth)
 		full_time_val = full_time->u.n.value;
 		spin_unlock_irqrestore(&speakup_info.spinlock, flags);
 		if (!synth->io_ops->synth_out(synth, ch)) {
-			synth->io_ops->tiocmset(0, UART_MCR_RTS);
-			synth->io_ops->tiocmset(UART_MCR_RTS, 0);
+			synth->io_ops->tiocmset(synth, 0, UART_MCR_RTS);
+			synth->io_ops->tiocmset(synth, UART_MCR_RTS, 0);
 			schedule_timeout(msecs_to_jiffies(full_time_val));
 			continue;
 		}
diff --git a/drivers/accessibility/speakup/speakup_audptr.c b/drivers/accessibility/speakup/speakup_audptr.c
index e6a6a9665d8f..e89fd72579e6 100644
--- a/drivers/accessibility/speakup/speakup_audptr.c
+++ b/drivers/accessibility/speakup/speakup_audptr.c
@@ -119,8 +119,8 @@ static struct spk_synth synth_audptr = {
 
 static void synth_flush(struct spk_synth *synth)
 {
-	synth->io_ops->flush_buffer();
-	synth->io_ops->send_xchar(SYNTH_CLEAR);
+	synth->io_ops->flush_buffer(synth);
+	synth->io_ops->send_xchar(synth, SYNTH_CLEAR);
 	synth->io_ops->synth_out(synth, PROCSPEECH);
 }
 
@@ -130,11 +130,11 @@ static void synth_version(struct spk_synth *synth)
 	char synth_id[40] = "";
 
 	synth->synth_immediate(synth, "\x05[Q]");
-	synth_id[test] = synth->io_ops->synth_in();
+	synth_id[test] = synth->io_ops->synth_in(synth);
 	if (synth_id[test] == 'A') {
 		do {
 			/* read version string from synth */
-			synth_id[++test] = synth->io_ops->synth_in();
+			synth_id[++test] = synth->io_ops->synth_in(synth);
 		} while (synth_id[test] != '\n' && test < 32);
 		synth_id[++test] = 0x00;
 	}
diff --git a/drivers/accessibility/speakup/speakup_decext.c b/drivers/accessibility/speakup/speakup_decext.c
index 7408eb29cf38..092cfd08a9e1 100644
--- a/drivers/accessibility/speakup/speakup_decext.c
+++ b/drivers/accessibility/speakup/speakup_decext.c
@@ -218,7 +218,7 @@ static void do_catch_up(struct spk_synth *synth)
 static void synth_flush(struct spk_synth *synth)
 {
 	in_escape = 0;
-	synth->io_ops->flush_buffer();
+	synth->io_ops->flush_buffer(synth);
 	synth->synth_immediate(synth, "\033P;10z\033\\");
 }
 
diff --git a/drivers/accessibility/speakup/speakup_decpc.c b/drivers/accessibility/speakup/speakup_decpc.c
index 96f24c848cc5..dec314dee214 100644
--- a/drivers/accessibility/speakup/speakup_decpc.c
+++ b/drivers/accessibility/speakup/speakup_decpc.c
@@ -125,7 +125,7 @@ enum {	PRIMARY_DIC	= 0, USER_DIC, COMMAND_DIC, ABBREV_DIC };
 #define SYNTH_IO_EXTENT 8
 
 static int synth_probe(struct spk_synth *synth);
-static void dtpc_release(void);
+static void dtpc_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -474,7 +474,7 @@ static int synth_probe(struct spk_synth *synth)
 	return 0;
 }
 
-static void dtpc_release(void)
+static void dtpc_release(struct spk_synth *synth)
 {
 	spk_stop_serial_interrupt();
 	if (speakup_info.port_tts)
diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c
index ab6d61e80b1c..580ec796816b 100644
--- a/drivers/accessibility/speakup/speakup_dectlk.c
+++ b/drivers/accessibility/speakup/speakup_dectlk.c
@@ -78,6 +78,8 @@ static struct kobj_attribute direct_attribute =
 	__ATTR(direct, 0644, spk_var_show, spk_var_store);
 static struct kobj_attribute full_time_attribute =
 	__ATTR(full_time, 0644, spk_var_show, spk_var_store);
+static struct kobj_attribute flush_time_attribute =
+	__ATTR(flush_time, 0644, spk_var_show, spk_var_store);
 static struct kobj_attribute jiffy_delta_attribute =
 	__ATTR(jiffy_delta, 0644, spk_var_show, spk_var_store);
 static struct kobj_attribute trigger_time_attribute =
@@ -99,6 +101,7 @@ static struct attribute *synth_attrs[] = {
 	&delay_time_attribute.attr,
 	&direct_attribute.attr,
 	&full_time_attribute.attr,
+	&flush_time_attribute.attr,
 	&jiffy_delta_attribute.attr,
 	&trigger_time_attribute.attr,
 	NULL,	/* need to NULL terminate the list of attributes */
@@ -118,6 +121,7 @@ static struct spk_synth synth_dectlk = {
 	.trigger = 50,
 	.jiffies = 50,
 	.full = 40000,
+	.flush_time = 4000,
 	.dev_name = SYNTH_DEFAULT_DEV,
 	.startup = SYNTH_START,
 	.checkval = SYNTH_CHECK,
@@ -200,18 +204,23 @@ static void do_catch_up(struct spk_synth *synth)
 	static u_char last = '\0';
 	unsigned long flags;
 	unsigned long jiff_max;
-	unsigned long timeout = msecs_to_jiffies(4000);
+	unsigned long timeout;
 	DEFINE_WAIT(wait);
 	struct var_t *jiffy_delta;
 	struct var_t *delay_time;
+	struct var_t *flush_time;
 	int jiffy_delta_val;
 	int delay_time_val;
+	int timeout_val;
 
 	jiffy_delta = spk_get_var(JIFFY);
 	delay_time = spk_get_var(DELAY);
+	flush_time = spk_get_var(FLUSH);
 	spin_lock_irqsave(&speakup_info.spinlock, flags);
 	jiffy_delta_val = jiffy_delta->u.n.value;
+	timeout_val = flush_time->u.n.value;
 	spin_unlock_irqrestore(&speakup_info.spinlock, flags);
+	timeout = msecs_to_jiffies(timeout_val);
 	jiff_max = jiffies + jiffy_delta_val;
 
 	while (!kthread_should_stop()) {
@@ -289,7 +298,7 @@ static void synth_flush(struct spk_synth *synth)
 		synth->io_ops->synth_out(synth, ']');
 	in_escape = 0;
 	is_flushing = 1;
-	synth->io_ops->flush_buffer();
+	synth->io_ops->flush_buffer(synth);
 	synth->io_ops->synth_out(synth, SYNTH_CLEAR);
 }
 
diff --git a/drivers/accessibility/speakup/speakup_dtlk.c b/drivers/accessibility/speakup/speakup_dtlk.c
index dbebed0eeeec..92838d3ae9eb 100644
--- a/drivers/accessibility/speakup/speakup_dtlk.c
+++ b/drivers/accessibility/speakup/speakup_dtlk.c
@@ -24,7 +24,7 @@
 #define PROCSPEECH 0x00
 
 static int synth_probe(struct spk_synth *synth);
-static void dtlk_release(void);
+static void dtlk_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -365,7 +365,7 @@ static int synth_probe(struct spk_synth *synth)
 	return 0;
 }
 
-static void dtlk_release(void)
+static void dtlk_release(struct spk_synth *synth)
 {
 	spk_stop_serial_interrupt();
 	if (speakup_info.port_tts)
diff --git a/drivers/accessibility/speakup/speakup_keypc.c b/drivers/accessibility/speakup/speakup_keypc.c
index 414827e888fc..311f4aa0be22 100644
--- a/drivers/accessibility/speakup/speakup_keypc.c
+++ b/drivers/accessibility/speakup/speakup_keypc.c
@@ -24,7 +24,7 @@
 #define SYNTH_CLEAR 0x03
 
 static int synth_probe(struct spk_synth *synth);
-static void keynote_release(void);
+static void keynote_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -295,7 +295,7 @@ static int synth_probe(struct spk_synth *synth)
 	return 0;
 }
 
-static void keynote_release(void)
+static void keynote_release(struct spk_synth *synth)
 {
 	spk_stop_serial_interrupt();
 	if (synth_port)
diff --git a/drivers/accessibility/speakup/speakup_ltlk.c b/drivers/accessibility/speakup/speakup_ltlk.c
index 3c59519a871f..3e59b387d0c4 100644
--- a/drivers/accessibility/speakup/speakup_ltlk.c
+++ b/drivers/accessibility/speakup/speakup_ltlk.c
@@ -132,7 +132,7 @@ static void synth_interrogate(struct spk_synth *synth)
 
 	synth->synth_immediate(synth, "\x18\x01?");
 	for (i = 0; i < 50; i++) {
-		buf[i] = synth->io_ops->synth_in();
+		buf[i] = synth->io_ops->synth_in(synth);
 		if (i > 2 && buf[i] == 0x7f)
 			break;
 	}
diff --git a/drivers/accessibility/speakup/speakup_soft.c b/drivers/accessibility/speakup/speakup_soft.c
index 9a7029539f35..c3f97c572fb6 100644
--- a/drivers/accessibility/speakup/speakup_soft.c
+++ b/drivers/accessibility/speakup/speakup_soft.c
@@ -24,7 +24,7 @@
 #define CLEAR_SYNTH 0x18
 
 static int softsynth_probe(struct spk_synth *synth);
-static void softsynth_release(void);
+static void softsynth_release(struct spk_synth *synth);
 static int softsynth_is_alive(struct spk_synth *synth);
 static unsigned char get_index(struct spk_synth *synth);
 
@@ -402,7 +402,7 @@ static int softsynth_probe(struct spk_synth *synth)
 	return 0;
 }
 
-static void softsynth_release(void)
+static void softsynth_release(struct spk_synth *synth)
 {
 	misc_deregister(&synth_device);
 	misc_deregister(&synthu_device);
diff --git a/drivers/accessibility/speakup/speakup_spkout.c b/drivers/accessibility/speakup/speakup_spkout.c
index 6e933bf1de2e..bd3d8dc300ff 100644
--- a/drivers/accessibility/speakup/speakup_spkout.c
+++ b/drivers/accessibility/speakup/speakup_spkout.c
@@ -117,8 +117,8 @@ static struct spk_synth synth_spkout = {
 
 static void synth_flush(struct spk_synth *synth)
 {
-	synth->io_ops->flush_buffer();
-	synth->io_ops->send_xchar(SYNTH_CLEAR);
+	synth->io_ops->flush_buffer(synth);
+	synth->io_ops->send_xchar(synth, SYNTH_CLEAR);
 }
 
 module_param_named(ser, synth_spkout.ser, int, 0444);
diff --git a/drivers/accessibility/speakup/spk_priv.h b/drivers/accessibility/speakup/spk_priv.h
index 0f4bcbe5ddb9..9da57ead17cb 100644
--- a/drivers/accessibility/speakup/spk_priv.h
+++ b/drivers/accessibility/speakup/spk_priv.h
@@ -34,8 +34,8 @@
 
 const struct old_serial_port *spk_serial_init(int index);
 void spk_stop_serial_interrupt(void);
-void spk_serial_release(void);
-void spk_ttyio_release(void);
+void spk_serial_release(struct spk_synth *synth);
+void spk_ttyio_release(struct spk_synth *synth);
 void spk_ttyio_register_ldisc(void);
 void spk_ttyio_unregister_ldisc(void);
 
diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c
index 835d17455fcd..9af1d4c124d3 100644
--- a/drivers/accessibility/speakup/spk_ttyio.c
+++ b/drivers/accessibility/speakup/spk_ttyio.c
@@ -12,14 +12,15 @@ struct spk_ldisc_data {
 	char buf;
 	struct completion completion;
 	bool buf_free;
+	struct spk_synth *synth;
 };
 
-static struct spk_synth *spk_ttyio_synth;
-static struct tty_struct *speakup_tty;
-/* mutex to protect against speakup_tty disappearing from underneath us while
- * we are using it. this can happen when the device physically unplugged,
- * while in use. it also serialises access to speakup_tty.
+/*
+ * This allows to catch within spk_ttyio_ldisc_open whether it is getting set
+ * on for a speakup-driven device.
  */
+static struct tty_struct *speakup_tty;
+/* This mutex serializes the use of such global speakup_tty variable */
 static DEFINE_MUTEX(speakup_tty_mutex);
 
 static int ser_to_dev(int ser, dev_t *dev_no)
@@ -67,22 +68,20 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty)
 
 static void spk_ttyio_ldisc_close(struct tty_struct *tty)
 {
-	mutex_lock(&speakup_tty_mutex);
-	kfree(speakup_tty->disc_data);
-	speakup_tty = NULL;
-	mutex_unlock(&speakup_tty_mutex);
+	kfree(tty->disc_data);
 }
 
 static int spk_ttyio_receive_buf2(struct tty_struct *tty,
 				  const unsigned char *cp, char *fp, int count)
 {
 	struct spk_ldisc_data *ldisc_data = tty->disc_data;
+	struct spk_synth *synth = ldisc_data->synth;
 
-	if (spk_ttyio_synth->read_buff_add) {
+	if (synth->read_buff_add) {
 		int i;
 
 		for (i = 0; i < count; i++)
-			spk_ttyio_synth->read_buff_add(cp[i]);
+			synth->read_buff_add(cp[i]);
 
 		return count;
 	}
@@ -114,11 +113,11 @@ static struct tty_ldisc_ops spk_ttyio_ldisc_ops = {
 
 static int spk_ttyio_out(struct spk_synth *in_synth, const char ch);
 static int spk_ttyio_out_unicode(struct spk_synth *in_synth, u16 ch);
-static void spk_ttyio_send_xchar(char ch);
-static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear);
-static unsigned char spk_ttyio_in(void);
-static unsigned char spk_ttyio_in_nowait(void);
-static void spk_ttyio_flush_buffer(void);
+static void spk_ttyio_send_xchar(struct spk_synth *in_synth, char ch);
+static void spk_ttyio_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear);
+static unsigned char spk_ttyio_in(struct spk_synth *in_synth);
+static unsigned char spk_ttyio_in_nowait(struct spk_synth *in_synth);
+static void spk_ttyio_flush_buffer(struct spk_synth *in_synth);
 static int spk_ttyio_wait_for_xmitr(struct spk_synth *in_synth);
 
 struct spk_io_ops spk_ttyio_ops = {
@@ -187,13 +186,17 @@ static int spk_ttyio_initialise_ldisc(struct spk_synth *synth)
 	mutex_lock(&speakup_tty_mutex);
 	speakup_tty = tty;
 	ret = tty_set_ldisc(tty, N_SPEAKUP);
-	if (ret)
-		speakup_tty = NULL;
+	speakup_tty = NULL;
 	mutex_unlock(&speakup_tty_mutex);
 
-	if (!ret)
+	if (!ret) {
 		/* Success */
+		struct spk_ldisc_data *ldisc_data = tty->disc_data;
+
+		ldisc_data->synth = synth;
+		synth->dev = tty;
 		return 0;
+	}
 
 	pr_err("speakup: Failed to set N_SPEAKUP on tty\n");
 
@@ -221,29 +224,30 @@ void spk_ttyio_unregister_ldisc(void)
 
 static int spk_ttyio_out(struct spk_synth *in_synth, const char ch)
 {
-	mutex_lock(&speakup_tty_mutex);
-	if (in_synth->alive && speakup_tty && speakup_tty->ops->write) {
-		int ret = speakup_tty->ops->write(speakup_tty, &ch, 1);
-
-		mutex_unlock(&speakup_tty_mutex);
-		if (ret == 0)
-			/* No room */
-			return 0;
-		if (ret < 0) {
-			pr_warn("%s: I/O error, deactivating speakup\n",
-				in_synth->long_name);
-			/* No synth any more, so nobody will restart TTYs,
-			 * and we thus need to do it ourselves.  Now that there
-			 * is no synth we can let application flood anyway
-			 */
-			in_synth->alive = 0;
-			speakup_start_ttys();
-			return 0;
-		}
+	struct tty_struct *tty = in_synth->dev;
+	int ret;
+
+	if (!in_synth->alive || !tty->ops->write)
+		return 0;
+
+	ret = tty->ops->write(tty, &ch, 1);
+
+	if (ret == 0)
+		/* No room */
+		return 0;
+
+	if (ret > 0)
+		/* Success */
 		return 1;
-	}
 
-	mutex_unlock(&speakup_tty_mutex);
+	pr_warn("%s: I/O error, deactivating speakup\n",
+		in_synth->long_name);
+	/* No synth any more, so nobody will restart TTYs,
+	 * and we thus need to do it ourselves.  Now that there
+	 * is no synth we can let application flood anyway
+	 */
+	in_synth->alive = 0;
+	speakup_start_ttys();
 	return 0;
 }
 
@@ -264,47 +268,20 @@ static int spk_ttyio_out_unicode(struct spk_synth *in_synth, u16 ch)
 	return ret;
 }
 
-static int check_tty(struct tty_struct *tty)
-{
-	if (!tty) {
-		pr_warn("%s: I/O error, deactivating speakup\n",
-			spk_ttyio_synth->long_name);
-		/* No synth any more, so nobody will restart TTYs, and we thus
-		 * need to do it ourselves.  Now that there is no synth we can
-		 * let application flood anyway
-		 */
-		spk_ttyio_synth->alive = 0;
-		speakup_start_ttys();
-		return 1;
-	}
-
-	return 0;
-}
-
-static void spk_ttyio_send_xchar(char ch)
+static void spk_ttyio_send_xchar(struct spk_synth *in_synth, char ch)
 {
-	mutex_lock(&speakup_tty_mutex);
-	if (check_tty(speakup_tty)) {
-		mutex_unlock(&speakup_tty_mutex);
-		return;
-	}
+	struct tty_struct *tty = in_synth->dev;
 
-	if (speakup_tty->ops->send_xchar)
-		speakup_tty->ops->send_xchar(speakup_tty, ch);
-	mutex_unlock(&speakup_tty_mutex);
+	if (tty->ops->send_xchar)
+		tty->ops->send_xchar(tty, ch);
 }
 
-static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear)
+static void spk_ttyio_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear)
 {
-	mutex_lock(&speakup_tty_mutex);
-	if (check_tty(speakup_tty)) {
-		mutex_unlock(&speakup_tty_mutex);
-		return;
-	}
+	struct tty_struct *tty = in_synth->dev;
 
-	if (speakup_tty->ops->tiocmset)
-		speakup_tty->ops->tiocmset(speakup_tty, set, clear);
-	mutex_unlock(&speakup_tty_mutex);
+	if (tty->ops->tiocmset)
+		tty->ops->tiocmset(tty, set, clear);
 }
 
 static int spk_ttyio_wait_for_xmitr(struct spk_synth *in_synth)
@@ -312,9 +289,10 @@ static int spk_ttyio_wait_for_xmitr(struct spk_synth *in_synth)
 	return 1;
 }
 
-static unsigned char ttyio_in(int timeout)
+static unsigned char ttyio_in(struct spk_synth *in_synth, int timeout)
 {
-	struct spk_ldisc_data *ldisc_data = speakup_tty->disc_data;
+	struct tty_struct *tty = in_synth->dev;
+	struct spk_ldisc_data *ldisc_data = tty->disc_data;
 	char rv;
 
 	if (!timeout) {
@@ -334,35 +312,29 @@ static unsigned char ttyio_in(int timeout)
 	mb();
 	ldisc_data->buf_free = true;
 	/* Let TTY push more characters */
-	tty_schedule_flip(speakup_tty->port);
+	tty_schedule_flip(tty->port);
 
 	return rv;
 }
 
-static unsigned char spk_ttyio_in(void)
+static unsigned char spk_ttyio_in(struct spk_synth *in_synth)
 {
-	return ttyio_in(SPK_SYNTH_TIMEOUT);
+	return ttyio_in(in_synth, SPK_SYNTH_TIMEOUT);
 }
 
-static unsigned char spk_ttyio_in_nowait(void)
+static unsigned char spk_ttyio_in_nowait(struct spk_synth *in_synth)
 {
-	u8 rv = ttyio_in(0);
+	u8 rv = ttyio_in(in_synth, 0);
 
 	return (rv == 0xff) ? 0 : rv;
 }
 
-static void spk_ttyio_flush_buffer(void)
+static void spk_ttyio_flush_buffer(struct spk_synth *in_synth)
 {
-	mutex_lock(&speakup_tty_mutex);
-	if (check_tty(speakup_tty)) {
-		mutex_unlock(&speakup_tty_mutex);
-		return;
-	}
+	struct tty_struct *tty = in_synth->dev;
 
-	if (speakup_tty->ops->flush_buffer)
-		speakup_tty->ops->flush_buffer(speakup_tty);
-
-	mutex_unlock(&speakup_tty_mutex);
+	if (tty->ops->flush_buffer)
+		tty->ops->flush_buffer(tty);
 }
 
 int spk_ttyio_synth_probe(struct spk_synth *synth)
@@ -373,37 +345,38 @@ int spk_ttyio_synth_probe(struct spk_synth *synth)
 		return rv;
 
 	synth->alive = 1;
-	spk_ttyio_synth = synth;
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(spk_ttyio_synth_probe);
 
-void spk_ttyio_release(void)
+void spk_ttyio_release(struct spk_synth *in_synth)
 {
-	if (!speakup_tty)
-		return;
+	struct tty_struct *tty = in_synth->dev;
 
-	tty_lock(speakup_tty);
+	tty_lock(tty);
 
-	if (speakup_tty->ops->close)
-		speakup_tty->ops->close(speakup_tty, NULL);
+	if (tty->ops->close)
+		tty->ops->close(tty, NULL);
+
+	tty_ldisc_flush(tty);
+	tty_unlock(tty);
+	tty_kclose(tty);
 
-	tty_ldisc_flush(speakup_tty);
-	tty_unlock(speakup_tty);
-	tty_kclose(speakup_tty);
+	in_synth->dev = NULL;
 }
 EXPORT_SYMBOL_GPL(spk_ttyio_release);
 
-const char *spk_ttyio_synth_immediate(struct spk_synth *synth, const char *buff)
+const char *spk_ttyio_synth_immediate(struct spk_synth *in_synth, const char *buff)
 {
+	struct tty_struct *tty = in_synth->dev;
 	u_char ch;
 
 	while ((ch = *buff)) {
 		if (ch == '\n')
-			ch = synth->procspeech;
-		if (tty_write_room(speakup_tty) < 1 ||
-		    !synth->io_ops->synth_out(synth, ch))
+			ch = in_synth->procspeech;
+		if (tty_write_room(tty) < 1 ||
+		    !in_synth->io_ops->synth_out(in_synth, ch))
 			return buff;
 		buff++;
 	}
diff --git a/drivers/accessibility/speakup/spk_types.h b/drivers/accessibility/speakup/spk_types.h
index 91fca3033a45..6a96ad94bc3f 100644
--- a/drivers/accessibility/speakup/spk_types.h
+++ b/drivers/accessibility/speakup/spk_types.h
@@ -48,7 +48,7 @@ enum var_id_t {
 	ATTRIB_BLEEP, BLEEPS,
 	RATE, PITCH, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG,
 	DIRECT, PAUSE,
-	CAPS_START, CAPS_STOP, CHARTAB, INFLECTION,
+	CAPS_START, CAPS_STOP, CHARTAB, INFLECTION, FLUSH,
 	MAXVARS
 };
 
@@ -157,11 +157,11 @@ struct spk_synth;
 struct spk_io_ops {
 	int (*synth_out)(struct spk_synth *synth, const char ch);
 	int (*synth_out_unicode)(struct spk_synth *synth, u16 ch);
-	void (*send_xchar)(char ch);
-	void (*tiocmset)(unsigned int set, unsigned int clear);
-	unsigned char (*synth_in)(void);
-	unsigned char (*synth_in_nowait)(void);
-	void (*flush_buffer)(void);
+	void (*send_xchar)(struct spk_synth *synth, char ch);
+	void (*tiocmset)(struct spk_synth *synth, unsigned int set, unsigned int clear);
+	unsigned char (*synth_in)(struct spk_synth *synth);
+	unsigned char (*synth_in_nowait)(struct spk_synth *synth);
+	void (*flush_buffer)(struct spk_synth *synth);
 	int (*wait_for_xmitr)(struct spk_synth *synth);
 };
 
@@ -178,6 +178,7 @@ struct spk_synth {
 	int trigger;
 	int jiffies;
 	int full;
+	int flush_time;
 	int ser;
 	char *dev_name;
 	short flags;
@@ -188,7 +189,7 @@ struct spk_synth {
 	int *default_vol;
 	struct spk_io_ops *io_ops;
 	int (*probe)(struct spk_synth *synth);
-	void (*release)(void);
+	void (*release)(struct spk_synth *synth);
 	const char *(*synth_immediate)(struct spk_synth *synth,
 				       const char *buff);
 	void (*catch_up)(struct spk_synth *synth);
@@ -200,6 +201,8 @@ struct spk_synth {
 	struct synth_indexing indexing;
 	int alive;
 	struct attribute_group attributes;
+
+	void *dev;
 };
 
 /*
diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c
index ac47dbac7207..2b8699673bac 100644
--- a/drivers/accessibility/speakup/synth.c
+++ b/drivers/accessibility/speakup/synth.c
@@ -137,14 +137,14 @@ EXPORT_SYMBOL_GPL(spk_do_catch_up_unicode);
 
 void spk_synth_flush(struct spk_synth *synth)
 {
-	synth->io_ops->flush_buffer();
+	synth->io_ops->flush_buffer(synth);
 	synth->io_ops->synth_out(synth, synth->clear);
 }
 EXPORT_SYMBOL_GPL(spk_synth_flush);
 
 unsigned char spk_synth_get_index(struct spk_synth *synth)
 {
-	return synth->io_ops->synth_in_nowait();
+	return synth->io_ops->synth_in_nowait(synth);
 }
 EXPORT_SYMBOL_GPL(spk_synth_get_index);
 
@@ -348,6 +348,7 @@ struct var_t synth_time_vars[] = {
 	{ TRIGGER, .u.n = {NULL, 20, 10, 2000, 0, 0, NULL } },
 	{ JIFFY, .u.n = {NULL, 50, 20, 200, 0, 0, NULL } },
 	{ FULL, .u.n = {NULL, 400, 200, 60000, 0, 0, NULL } },
+	{ FLUSH, .u.n = {NULL, 4000, 100, 4000, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -408,6 +409,8 @@ static int do_synth_init(struct spk_synth *in_synth)
 		synth_time_vars[2].u.n.default_val = synth->jiffies;
 	synth_time_vars[3].u.n.value =
 		synth_time_vars[3].u.n.default_val = synth->full;
+	synth_time_vars[4].u.n.value =
+		synth_time_vars[4].u.n.default_val = synth->flush_time;
 	synth_printf("%s", synth->init);
 	for (var = synth->vars;
 		(var->var_id >= 0) && (var->var_id < MAXVARS); var++)
@@ -440,7 +443,7 @@ void synth_release(void)
 		sysfs_remove_group(speakup_kobj, &synth->attributes);
 	for (var = synth->vars; var->var_id != MAXVARS; var++)
 		speakup_unregister_var(var->var_id);
-	synth->release();
+	synth->release(synth);
 	synth = NULL;
 }
 
diff --git a/drivers/accessibility/speakup/varhandlers.c b/drivers/accessibility/speakup/varhandlers.c
index d7f6bec7ff06..067c0da97dcb 100644
--- a/drivers/accessibility/speakup/varhandlers.c
+++ b/drivers/accessibility/speakup/varhandlers.c
@@ -23,6 +23,7 @@ static struct st_var_header var_headers[] = {
 	{ "trigger_time", TRIGGER, VAR_TIME, NULL, NULL },
 	{ "jiffy_delta", JIFFY, VAR_TIME, NULL, NULL },
 	{ "full_time", FULL, VAR_TIME, NULL, NULL },
+	{ "flush_time", FLUSH, VAR_TIME, NULL, NULL },
 	{ "spell_delay", SPELL_DELAY, VAR_NUM, &spk_spell_delay, NULL },
 	{ "bleeps", BLEEPS, VAR_NUM, &spk_bleeps, NULL },
 	{ "attrib_bleep", ATTRIB_BLEEP, VAR_NUM, &spk_attrib_bleep, NULL },
diff --git a/drivers/base/regmap/regmap-sdw-mbq.c b/drivers/base/regmap/regmap-sdw-mbq.c
index 8ce30650b97c..fe3ac26b66ad 100644
--- a/drivers/base/regmap/regmap-sdw-mbq.c
+++ b/drivers/base/regmap/regmap-sdw-mbq.c
@@ -15,11 +15,11 @@ static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int va
 	struct sdw_slave *slave = dev_to_sdw_dev(dev);
 	int ret;
 
-	ret = sdw_write(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff);
+	ret = sdw_write_no_pm(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff);
 	if (ret < 0)
 		return ret;
 
-	return sdw_write(slave, reg, val & 0xff);
+	return sdw_write_no_pm(slave, reg, val & 0xff);
 }
 
 static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *val)
@@ -29,11 +29,11 @@ static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *va
 	int read0;
 	int read1;
 
-	read0 = sdw_read(slave, reg);
+	read0 = sdw_read_no_pm(slave, reg);
 	if (read0 < 0)
 		return read0;
 
-	read1 = sdw_read(slave, SDW_SDCA_MBQ_CTL(reg));
+	read1 = sdw_read_no_pm(slave, SDW_SDCA_MBQ_CTL(reg));
 	if (read1 < 0)
 		return read1;
 
@@ -98,4 +98,4 @@ struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw,
 EXPORT_SYMBOL_GPL(__devm_regmap_init_sdw_mbq);
 
 MODULE_DESCRIPTION("Regmap SoundWire MBQ Module");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-sdw.c b/drivers/base/regmap/regmap-sdw.c
index c83be26434e7..966de8a136d9 100644
--- a/drivers/base/regmap/regmap-sdw.c
+++ b/drivers/base/regmap/regmap-sdw.c
@@ -13,7 +13,7 @@ static int regmap_sdw_write(void *context, unsigned int reg, unsigned int val)
 	struct device *dev = context;
 	struct sdw_slave *slave = dev_to_sdw_dev(dev);
 
-	return sdw_write(slave, reg, val);
+	return sdw_write_no_pm(slave, reg, val);
 }
 
 static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val)
@@ -22,7 +22,7 @@ static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val)
 	struct sdw_slave *slave = dev_to_sdw_dev(dev);
 	int read;
 
-	read = sdw_read(slave, reg);
+	read = sdw_read_no_pm(slave, reg);
 	if (read < 0)
 		return read;
 
diff --git a/drivers/bus/fsl-mc/Kconfig b/drivers/bus/fsl-mc/Kconfig
index c23c77c9b705..b1fd55901c50 100644
--- a/drivers/bus/fsl-mc/Kconfig
+++ b/drivers/bus/fsl-mc/Kconfig
@@ -14,3 +14,10 @@ config FSL_MC_BUS
 	  architecture.  The fsl-mc bus driver handles discovery of
 	  DPAA2 objects (which are represented as Linux devices) and
 	  binding objects to drivers.
+
+config FSL_MC_UAPI_SUPPORT
+	bool "Management Complex (MC) userspace support"
+	depends on FSL_MC_BUS
+	help
+	  Provides userspace support for interrogating, creating, destroying or
+	  configuring DPAA2 objects exported by the Management Complex.
diff --git a/drivers/bus/fsl-mc/Makefile b/drivers/bus/fsl-mc/Makefile
index 3c518c7e8374..4ae292a30e53 100644
--- a/drivers/bus/fsl-mc/Makefile
+++ b/drivers/bus/fsl-mc/Makefile
@@ -16,3 +16,6 @@ mc-bus-driver-objs := fsl-mc-bus.o \
 		      fsl-mc-allocator.o \
 		      fsl-mc-msi.o \
 		      dpmcp.o
+
+# MC userspace support
+obj-$(CONFIG_FSL_MC_UAPI_SUPPORT) += fsl-mc-uapi.o
diff --git a/drivers/bus/fsl-mc/dprc-driver.c b/drivers/bus/fsl-mc/dprc-driver.c
index 68488a7ad0d6..e3e2ae41c22b 100644
--- a/drivers/bus/fsl-mc/dprc-driver.c
+++ b/drivers/bus/fsl-mc/dprc-driver.c
@@ -237,8 +237,8 @@ static void dprc_add_new_devices(struct fsl_mc_device *mc_bus_dev,
  * populated before they can get allocation requests from probe callbacks
  * of the device drivers for the non-allocatable devices.
  */
-static int dprc_scan_objects(struct fsl_mc_device *mc_bus_dev,
-			    bool alloc_interrupts)
+int dprc_scan_objects(struct fsl_mc_device *mc_bus_dev,
+		      bool alloc_interrupts)
 {
 	int num_child_objects;
 	int dprc_get_obj_failures;
@@ -458,8 +458,9 @@ out:
 /*
  * Disable and clear interrupt for a given DPRC object
  */
-static int disable_dprc_irq(struct fsl_mc_device *mc_dev)
+int disable_dprc_irq(struct fsl_mc_device *mc_dev)
 {
+	struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
 	int error;
 	struct fsl_mc_io *mc_io = mc_dev->mc_io;
 
@@ -496,9 +497,18 @@ static int disable_dprc_irq(struct fsl_mc_device *mc_dev)
 		return error;
 	}
 
+	mc_bus->irq_enabled = 0;
+
 	return 0;
 }
 
+int get_dprc_irq_state(struct fsl_mc_device *mc_dev)
+{
+	struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
+
+	return mc_bus->irq_enabled;
+}
+
 static int register_dprc_irq_handler(struct fsl_mc_device *mc_dev)
 {
 	int error;
@@ -525,8 +535,9 @@ static int register_dprc_irq_handler(struct fsl_mc_device *mc_dev)
 	return 0;
 }
 
-static int enable_dprc_irq(struct fsl_mc_device *mc_dev)
+int enable_dprc_irq(struct fsl_mc_device *mc_dev)
 {
+	struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
 	int error;
 
 	/*
@@ -554,6 +565,8 @@ static int enable_dprc_irq(struct fsl_mc_device *mc_dev)
 		return error;
 	}
 
+	mc_bus->irq_enabled = 1;
+
 	return 0;
 }
 
@@ -603,6 +616,7 @@ int dprc_setup(struct fsl_mc_device *mc_dev)
 	struct irq_domain *mc_msi_domain;
 	bool mc_io_created = false;
 	bool msi_domain_set = false;
+	bool uapi_created = false;
 	u16 major_ver, minor_ver;
 	size_t region_size;
 	int error;
@@ -635,6 +649,11 @@ int dprc_setup(struct fsl_mc_device *mc_dev)
 			return error;
 
 		mc_io_created = true;
+	} else {
+		error = fsl_mc_uapi_create_device_file(mc_bus);
+		if (error < 0)
+			return -EPROBE_DEFER;
+		uapi_created = true;
 	}
 
 	mc_msi_domain = fsl_mc_find_msi_domain(&mc_dev->dev);
@@ -692,6 +711,9 @@ error_cleanup_msi_domain:
 		mc_dev->mc_io = NULL;
 	}
 
+	if (uapi_created)
+		fsl_mc_uapi_remove_device_file(mc_bus);
+
 	return error;
 }
 EXPORT_SYMBOL_GPL(dprc_setup);
@@ -763,6 +785,7 @@ static void dprc_teardown_irq(struct fsl_mc_device *mc_dev)
 
 int dprc_cleanup(struct fsl_mc_device *mc_dev)
 {
+	struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
 	int error;
 
 	/* this function should be called only for DPRCs, it
@@ -793,6 +816,8 @@ int dprc_cleanup(struct fsl_mc_device *mc_dev)
 	if (!fsl_mc_is_root_dprc(&mc_dev->dev)) {
 		fsl_destroy_mc_io(mc_dev->mc_io);
 		mc_dev->mc_io = NULL;
+	} else {
+		fsl_mc_uapi_remove_device_file(mc_bus);
 	}
 
 	return 0;
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 8af978bd0000..380ad1fdb745 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -41,7 +41,7 @@ struct fsl_mc {
 	struct fsl_mc_device *root_mc_bus_dev;
 	u8 num_translation_ranges;
 	struct fsl_mc_addr_translation_range *translation_ranges;
-	void *fsl_mc_regs;
+	void __iomem *fsl_mc_regs;
 };
 
 /**
@@ -208,12 +208,108 @@ static struct attribute *fsl_mc_dev_attrs[] = {
 
 ATTRIBUTE_GROUPS(fsl_mc_dev);
 
+static int scan_fsl_mc_bus(struct device *dev, void *data)
+{
+	struct fsl_mc_device *root_mc_dev;
+	struct fsl_mc_bus *root_mc_bus;
+
+	if (!fsl_mc_is_root_dprc(dev))
+		goto exit;
+
+	root_mc_dev = to_fsl_mc_device(dev);
+	root_mc_bus = to_fsl_mc_bus(root_mc_dev);
+	mutex_lock(&root_mc_bus->scan_mutex);
+	dprc_scan_objects(root_mc_dev, NULL);
+	mutex_unlock(&root_mc_bus->scan_mutex);
+
+exit:
+	return 0;
+}
+
+static ssize_t rescan_store(struct bus_type *bus,
+			    const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val)
+		bus_for_each_dev(bus, NULL, NULL, scan_fsl_mc_bus);
+
+	return count;
+}
+static BUS_ATTR_WO(rescan);
+
+static int fsl_mc_bus_set_autorescan(struct device *dev, void *data)
+{
+	struct fsl_mc_device *root_mc_dev;
+	unsigned long val;
+	char *buf = data;
+
+	if (!fsl_mc_is_root_dprc(dev))
+		goto exit;
+
+	root_mc_dev = to_fsl_mc_device(dev);
+
+	if (kstrtoul(buf, 0, &val) < 0)
+		return -EINVAL;
+
+	if (val)
+		enable_dprc_irq(root_mc_dev);
+	else
+		disable_dprc_irq(root_mc_dev);
+
+exit:
+	return 0;
+}
+
+static int fsl_mc_bus_get_autorescan(struct device *dev, void *data)
+{
+	struct fsl_mc_device *root_mc_dev;
+	char *buf = data;
+
+	if (!fsl_mc_is_root_dprc(dev))
+		goto exit;
+
+	root_mc_dev = to_fsl_mc_device(dev);
+
+	sprintf(buf, "%d\n", get_dprc_irq_state(root_mc_dev));
+exit:
+	return 0;
+}
+
+static ssize_t autorescan_store(struct bus_type *bus,
+				const char *buf, size_t count)
+{
+	bus_for_each_dev(bus, NULL, (void *)buf, fsl_mc_bus_set_autorescan);
+
+	return count;
+}
+
+static ssize_t autorescan_show(struct bus_type *bus, char *buf)
+{
+	bus_for_each_dev(bus, NULL, (void *)buf, fsl_mc_bus_get_autorescan);
+	return strlen(buf);
+}
+
+static BUS_ATTR_RW(autorescan);
+
+static struct attribute *fsl_mc_bus_attrs[] = {
+	&bus_attr_rescan.attr,
+	&bus_attr_autorescan.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(fsl_mc_bus);
+
 struct bus_type fsl_mc_bus_type = {
 	.name = "fsl-mc",
 	.match = fsl_mc_bus_match,
 	.uevent = fsl_mc_bus_uevent,
 	.dma_configure  = fsl_mc_dma_configure,
 	.dev_groups = fsl_mc_dev_groups,
+	.bus_groups = fsl_mc_bus_groups,
 };
 EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
 
@@ -292,6 +388,11 @@ struct device_type fsl_mc_bus_dpdmai_type = {
 };
 EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdmai_type);
 
+struct device_type fsl_mc_bus_dpdbg_type = {
+	.name = "fsl_mc_bus_dpdbg"
+};
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdbg_type);
+
 static struct device_type *fsl_mc_get_device_type(const char *type)
 {
 	static const struct {
@@ -313,6 +414,7 @@ static struct device_type *fsl_mc_get_device_type(const char *type)
 		{ &fsl_mc_bus_dpaiop_type, "dpaiop" },
 		{ &fsl_mc_bus_dpci_type, "dpci" },
 		{ &fsl_mc_bus_dpdmai_type, "dpdmai" },
+		{ &fsl_mc_bus_dpdbg_type, "dpdbg" },
 		{ NULL, NULL }
 	};
 	int i;
diff --git a/drivers/bus/fsl-mc/fsl-mc-private.h b/drivers/bus/fsl-mc/fsl-mc-private.h
index c932387641fa..1958fa065360 100644
--- a/drivers/bus/fsl-mc/fsl-mc-private.h
+++ b/drivers/bus/fsl-mc/fsl-mc-private.h
@@ -10,6 +10,8 @@
 
 #include <linux/fsl/mc.h>
 #include <linux/mutex.h>
+#include <linux/ioctl.h>
+#include <linux/miscdevice.h>
 
 /*
  * Data Path Management Complex (DPMNG) General API
@@ -543,6 +545,22 @@ struct fsl_mc_resource_pool {
 };
 
 /**
+ * struct fsl_mc_uapi - information associated with a device file
+ * @misc: struct miscdevice linked to the root dprc
+ * @device: newly created device in /dev
+ * @mutex: mutex lock to serialize the open/release operations
+ * @local_instance_in_use: local MC I/O instance in use or not
+ * @static_mc_io: pointer to the static MC I/O object
+ */
+struct fsl_mc_uapi {
+	struct miscdevice misc;
+	struct device *device;
+	struct mutex mutex; /* serialize open/release operations */
+	u32 local_instance_in_use;
+	struct fsl_mc_io *static_mc_io;
+};
+
+/**
  * struct fsl_mc_bus - logical bus that corresponds to a physical DPRC
  * @mc_dev: fsl-mc device for the bus device itself.
  * @resource_pools: array of resource pools (one pool per resource type)
@@ -551,6 +569,7 @@ struct fsl_mc_resource_pool {
  * @irq_resources: Pointer to array of IRQ objects for the IRQ pool
  * @scan_mutex: Serializes bus scanning
  * @dprc_attr: DPRC attributes
+ * @uapi_misc: struct that abstracts the interaction with userspace
  */
 struct fsl_mc_bus {
 	struct fsl_mc_device mc_dev;
@@ -558,6 +577,8 @@ struct fsl_mc_bus {
 	struct fsl_mc_device_irq *irq_resources;
 	struct mutex scan_mutex;    /* serializes bus scanning */
 	struct dprc_attributes dprc_attr;
+	struct fsl_mc_uapi uapi_misc;
+	int irq_enabled;
 };
 
 #define to_fsl_mc_bus(_mc_dev) \
@@ -574,6 +595,9 @@ int __init dprc_driver_init(void);
 
 void dprc_driver_exit(void);
 
+int dprc_scan_objects(struct fsl_mc_device *mc_bus_dev,
+		      bool alloc_interrupts);
+
 int __init fsl_mc_allocator_driver_init(void);
 
 void fsl_mc_allocator_driver_exit(void);
@@ -612,4 +636,29 @@ void fsl_mc_get_root_dprc(struct device *dev,
 struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc *obj_desc,
 					   struct fsl_mc_device *mc_bus_dev);
 
+u16 mc_cmd_hdr_read_cmdid(struct fsl_mc_command *cmd);
+
+#ifdef CONFIG_FSL_MC_UAPI_SUPPORT
+
+int fsl_mc_uapi_create_device_file(struct fsl_mc_bus *mc_bus);
+
+void fsl_mc_uapi_remove_device_file(struct fsl_mc_bus *mc_bus);
+
+#else
+
+static inline int fsl_mc_uapi_create_device_file(struct fsl_mc_bus *mc_bus)
+{
+	return 0;
+}
+
+static inline void fsl_mc_uapi_remove_device_file(struct fsl_mc_bus *mc_bus)
+{
+}
+
+#endif
+
+int disable_dprc_irq(struct fsl_mc_device *mc_dev);
+int enable_dprc_irq(struct fsl_mc_device *mc_dev);
+int get_dprc_irq_state(struct fsl_mc_device *mc_dev);
+
 #endif /* _FSL_MC_PRIVATE_H_ */
diff --git a/drivers/bus/fsl-mc/fsl-mc-uapi.c b/drivers/bus/fsl-mc/fsl-mc-uapi.c
new file mode 100644
index 000000000000..9c4c1395fcdb
--- /dev/null
+++ b/drivers/bus/fsl-mc/fsl-mc-uapi.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Complex (MC) userspace support
+ *
+ * Copyright 2021 NXP
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+
+#include "fsl-mc-private.h"
+
+struct uapi_priv_data {
+	struct fsl_mc_uapi *uapi;
+	struct fsl_mc_io *mc_io;
+};
+
+struct fsl_mc_cmd_desc {
+	u16 cmdid_value;
+	u16 cmdid_mask;
+	int size;
+	bool token;
+	int flags;
+};
+
+#define FSL_MC_CHECK_MODULE_ID		BIT(0)
+#define FSL_MC_CAP_NET_ADMIN_NEEDED	BIT(1)
+
+enum fsl_mc_cmd_index {
+	DPDBG_DUMP = 0,
+	DPDBG_SET,
+	DPRC_GET_CONTAINER_ID,
+	DPRC_CREATE_CONT,
+	DPRC_DESTROY_CONT,
+	DPRC_ASSIGN,
+	DPRC_UNASSIGN,
+	DPRC_GET_OBJ_COUNT,
+	DPRC_GET_OBJ,
+	DPRC_GET_RES_COUNT,
+	DPRC_GET_RES_IDS,
+	DPRC_SET_OBJ_LABEL,
+	DPRC_SET_LOCKED,
+	DPRC_CONNECT,
+	DPRC_DISCONNECT,
+	DPRC_GET_POOL,
+	DPRC_GET_POOL_COUNT,
+	DPRC_GET_CONNECTION,
+	DPCI_GET_LINK_STATE,
+	DPCI_GET_PEER_ATTR,
+	DPAIOP_GET_SL_VERSION,
+	DPAIOP_GET_STATE,
+	DPMNG_GET_VERSION,
+	DPSECI_GET_TX_QUEUE,
+	DPMAC_GET_COUNTER,
+	DPMAC_GET_MAC_ADDR,
+	DPNI_SET_PRIM_MAC,
+	DPNI_GET_PRIM_MAC,
+	DPNI_GET_STATISTICS,
+	DPNI_GET_LINK_STATE,
+	DPNI_GET_MAX_FRAME_LENGTH,
+	DPSW_GET_TAILDROP,
+	DPSW_SET_TAILDROP,
+	DPSW_IF_GET_COUNTER,
+	DPSW_IF_GET_MAX_FRAME_LENGTH,
+	DPDMUX_GET_COUNTER,
+	DPDMUX_IF_GET_MAX_FRAME_LENGTH,
+	GET_ATTR,
+	GET_IRQ_MASK,
+	GET_IRQ_STATUS,
+	CLOSE,
+	OPEN,
+	GET_API_VERSION,
+	DESTROY,
+	CREATE,
+};
+
+static struct fsl_mc_cmd_desc fsl_mc_accepted_cmds[] = {
+	[DPDBG_DUMP] = {
+		.cmdid_value = 0x1300,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 28,
+	},
+	[DPDBG_SET] = {
+		.cmdid_value = 0x1400,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 28,
+	},
+	[DPRC_GET_CONTAINER_ID] = {
+		.cmdid_value = 0x8300,
+		.cmdid_mask = 0xFFF0,
+		.token = false,
+		.size = 8,
+	},
+	[DPRC_CREATE_CONT] = {
+		.cmdid_value = 0x1510,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 40,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_DESTROY_CONT] = {
+		.cmdid_value = 0x1520,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 12,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_ASSIGN] = {
+		.cmdid_value = 0x1570,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 40,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_UNASSIGN] = {
+		.cmdid_value = 0x1580,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 40,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_GET_OBJ_COUNT] = {
+		.cmdid_value = 0x1590,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 16,
+	},
+	[DPRC_GET_OBJ] = {
+		.cmdid_value = 0x15A0,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 12,
+	},
+	[DPRC_GET_RES_COUNT] = {
+		.cmdid_value = 0x15B0,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 32,
+	},
+	[DPRC_GET_RES_IDS] = {
+		.cmdid_value = 0x15C0,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 40,
+	},
+	[DPRC_SET_OBJ_LABEL] = {
+		.cmdid_value = 0x1610,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 48,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_SET_LOCKED] = {
+		.cmdid_value = 0x16B0,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 16,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_CONNECT] = {
+		.cmdid_value = 0x1670,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 56,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_DISCONNECT] = {
+		.cmdid_value = 0x1680,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 32,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPRC_GET_POOL] = {
+		.cmdid_value = 0x1690,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 12,
+	},
+	[DPRC_GET_POOL_COUNT] = {
+		.cmdid_value = 0x16A0,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPRC_GET_CONNECTION] = {
+		.cmdid_value = 0x16C0,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 32,
+	},
+
+	[DPCI_GET_LINK_STATE] = {
+		.cmdid_value = 0x0E10,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPCI_GET_PEER_ATTR] = {
+		.cmdid_value = 0x0E20,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPAIOP_GET_SL_VERSION] = {
+		.cmdid_value = 0x2820,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPAIOP_GET_STATE] = {
+		.cmdid_value = 0x2830,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPMNG_GET_VERSION] = {
+		.cmdid_value = 0x8310,
+		.cmdid_mask = 0xFFF0,
+		.token = false,
+		.size = 8,
+	},
+	[DPSECI_GET_TX_QUEUE] = {
+		.cmdid_value = 0x1970,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 14,
+	},
+	[DPMAC_GET_COUNTER] = {
+		.cmdid_value = 0x0c40,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 9,
+	},
+	[DPMAC_GET_MAC_ADDR] = {
+		.cmdid_value = 0x0c50,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPNI_SET_PRIM_MAC] = {
+		.cmdid_value = 0x2240,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 16,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPNI_GET_PRIM_MAC] = {
+		.cmdid_value = 0x2250,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPNI_GET_STATISTICS] = {
+		.cmdid_value = 0x25D0,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 10,
+	},
+	[DPNI_GET_LINK_STATE] = {
+		.cmdid_value = 0x2150,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPNI_GET_MAX_FRAME_LENGTH] = {
+		.cmdid_value = 0x2170,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[DPSW_GET_TAILDROP] = {
+		.cmdid_value = 0x0A80,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 14,
+	},
+	[DPSW_SET_TAILDROP] = {
+		.cmdid_value = 0x0A90,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 24,
+		.flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[DPSW_IF_GET_COUNTER] = {
+		.cmdid_value = 0x0340,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 11,
+	},
+	[DPSW_IF_GET_MAX_FRAME_LENGTH] = {
+		.cmdid_value = 0x0450,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 10,
+	},
+	[DPDMUX_GET_COUNTER] = {
+		.cmdid_value = 0x0b20,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 11,
+	},
+	[DPDMUX_IF_GET_MAX_FRAME_LENGTH] = {
+		.cmdid_value = 0x0a20,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 10,
+	},
+	[GET_ATTR] = {
+		.cmdid_value = 0x0040,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+	[GET_IRQ_MASK] = {
+		.cmdid_value = 0x0150,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 13,
+	},
+	[GET_IRQ_STATUS] = {
+		.cmdid_value = 0x0160,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 13,
+	},
+	[CLOSE] = {
+		.cmdid_value = 0x8000,
+		.cmdid_mask = 0xFFF0,
+		.token = true,
+		.size = 8,
+	},
+
+	/* Common commands amongst all types of objects. Must be checked last. */
+	[OPEN] = {
+		.cmdid_value = 0x8000,
+		.cmdid_mask = 0xFC00,
+		.token = false,
+		.size = 12,
+		.flags = FSL_MC_CHECK_MODULE_ID,
+	},
+	[GET_API_VERSION] = {
+		.cmdid_value = 0xA000,
+		.cmdid_mask = 0xFC00,
+		.token = false,
+		.size = 8,
+		.flags = FSL_MC_CHECK_MODULE_ID,
+	},
+	[DESTROY] = {
+		.cmdid_value = 0x9800,
+		.cmdid_mask = 0xFC00,
+		.token = true,
+		.size = 12,
+		.flags = FSL_MC_CHECK_MODULE_ID | FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+	[CREATE] = {
+		.cmdid_value = 0x9000,
+		.cmdid_mask = 0xFC00,
+		.token = true,
+		.size = 64,
+		.flags = FSL_MC_CHECK_MODULE_ID | FSL_MC_CAP_NET_ADMIN_NEEDED,
+	},
+};
+
+#define FSL_MC_NUM_ACCEPTED_CMDS ARRAY_SIZE(fsl_mc_accepted_cmds)
+
+#define FSL_MC_MAX_MODULE_ID 0x10
+
+static int fsl_mc_command_check(struct fsl_mc_device *mc_dev,
+				struct fsl_mc_command *mc_cmd)
+{
+	struct fsl_mc_cmd_desc *desc = NULL;
+	int mc_cmd_max_size, i;
+	bool token_provided;
+	u16 cmdid, module_id;
+	char *mc_cmd_end;
+	char sum = 0;
+
+	/* Check if this is an accepted MC command */
+	cmdid = mc_cmd_hdr_read_cmdid(mc_cmd);
+	for (i = 0; i < FSL_MC_NUM_ACCEPTED_CMDS; i++) {
+		desc = &fsl_mc_accepted_cmds[i];
+		if ((cmdid & desc->cmdid_mask) == desc->cmdid_value)
+			break;
+	}
+	if (i == FSL_MC_NUM_ACCEPTED_CMDS) {
+		dev_err(&mc_dev->dev, "MC command 0x%04x: cmdid not accepted\n", cmdid);
+		return -EACCES;
+	}
+
+	/* Check if the size of the command is honored. Anything beyond the
+	 * last valid byte of the command should be zeroed.
+	 */
+	mc_cmd_max_size = sizeof(*mc_cmd);
+	mc_cmd_end = ((char *)mc_cmd) + desc->size;
+	for (i = desc->size; i < mc_cmd_max_size; i++)
+		sum |= *mc_cmd_end++;
+	if (sum) {
+		dev_err(&mc_dev->dev, "MC command 0x%04x: garbage beyond max size of %d bytes!\n",
+			cmdid, desc->size);
+		return -EACCES;
+	}
+
+	/* Some MC commands request a token to be passed so that object
+	 * identification is possible. Check if the token passed in the command
+	 * is as expected.
+	 */
+	token_provided = mc_cmd_hdr_read_token(mc_cmd) ? true : false;
+	if (token_provided != desc->token) {
+		dev_err(&mc_dev->dev, "MC command 0x%04x: token 0x%04x is invalid!\n",
+			cmdid, mc_cmd_hdr_read_token(mc_cmd));
+		return -EACCES;
+	}
+
+	/* If needed, check if the module ID passed is valid */
+	if (desc->flags & FSL_MC_CHECK_MODULE_ID) {
+		/* The module ID is represented by bits [4:9] from the cmdid */
+		module_id = (cmdid & GENMASK(9, 4)) >> 4;
+		if (module_id == 0 || module_id > FSL_MC_MAX_MODULE_ID) {
+			dev_err(&mc_dev->dev, "MC command 0x%04x: unknown module ID 0x%x\n",
+				cmdid, module_id);
+			return -EACCES;
+		}
+	}
+
+	/* Some commands alter how hardware resources are managed. For these
+	 * commands, check for CAP_NET_ADMIN.
+	 */
+	if (desc->flags & FSL_MC_CAP_NET_ADMIN_NEEDED) {
+		if (!capable(CAP_NET_ADMIN)) {
+			dev_err(&mc_dev->dev, "MC command 0x%04x: needs CAP_NET_ADMIN!\n",
+				cmdid);
+			return -EPERM;
+		}
+	}
+
+	return 0;
+}
+
+static int fsl_mc_uapi_send_command(struct fsl_mc_device *mc_dev, unsigned long arg,
+				    struct fsl_mc_io *mc_io)
+{
+	struct fsl_mc_command mc_cmd;
+	int error;
+
+	error = copy_from_user(&mc_cmd, (void __user *)arg, sizeof(mc_cmd));
+	if (error)
+		return -EFAULT;
+
+	error = fsl_mc_command_check(mc_dev, &mc_cmd);
+	if (error)
+		return error;
+
+	error = mc_send_command(mc_io, &mc_cmd);
+	if (error)
+		return error;
+
+	error = copy_to_user((void __user *)arg, &mc_cmd, sizeof(mc_cmd));
+	if (error)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int fsl_mc_uapi_dev_open(struct inode *inode, struct file *filep)
+{
+	struct fsl_mc_device *root_mc_device;
+	struct uapi_priv_data *priv_data;
+	struct fsl_mc_io *dynamic_mc_io;
+	struct fsl_mc_uapi *mc_uapi;
+	struct fsl_mc_bus *mc_bus;
+	int error;
+
+	priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
+	if (!priv_data)
+		return -ENOMEM;
+
+	mc_uapi = container_of(filep->private_data, struct fsl_mc_uapi, misc);
+	mc_bus = container_of(mc_uapi, struct fsl_mc_bus, uapi_misc);
+	root_mc_device = &mc_bus->mc_dev;
+
+	mutex_lock(&mc_uapi->mutex);
+
+	if (!mc_uapi->local_instance_in_use) {
+		priv_data->mc_io = mc_uapi->static_mc_io;
+		mc_uapi->local_instance_in_use = 1;
+	} else {
+		error = fsl_mc_portal_allocate(root_mc_device, 0,
+					       &dynamic_mc_io);
+		if (error) {
+			dev_dbg(&root_mc_device->dev,
+				"Could not allocate MC portal\n");
+			goto error_portal_allocate;
+		}
+
+		priv_data->mc_io = dynamic_mc_io;
+	}
+	priv_data->uapi = mc_uapi;
+	filep->private_data = priv_data;
+
+	mutex_unlock(&mc_uapi->mutex);
+
+	return 0;
+
+error_portal_allocate:
+	mutex_unlock(&mc_uapi->mutex);
+	kfree(priv_data);
+
+	return error;
+}
+
+static int fsl_mc_uapi_dev_release(struct inode *inode, struct file *filep)
+{
+	struct uapi_priv_data *priv_data;
+	struct fsl_mc_uapi *mc_uapi;
+	struct fsl_mc_io *mc_io;
+
+	priv_data = filep->private_data;
+	mc_uapi = priv_data->uapi;
+	mc_io = priv_data->mc_io;
+
+	mutex_lock(&mc_uapi->mutex);
+
+	if (mc_io == mc_uapi->static_mc_io)
+		mc_uapi->local_instance_in_use = 0;
+	else
+		fsl_mc_portal_free(mc_io);
+
+	kfree(filep->private_data);
+	filep->private_data =  NULL;
+
+	mutex_unlock(&mc_uapi->mutex);
+
+	return 0;
+}
+
+static long fsl_mc_uapi_dev_ioctl(struct file *file,
+				  unsigned int cmd,
+				  unsigned long arg)
+{
+	struct uapi_priv_data *priv_data = file->private_data;
+	struct fsl_mc_device *root_mc_device;
+	struct fsl_mc_bus *mc_bus;
+	int error;
+
+	mc_bus = container_of(priv_data->uapi, struct fsl_mc_bus, uapi_misc);
+	root_mc_device = &mc_bus->mc_dev;
+
+	switch (cmd) {
+	case FSL_MC_SEND_MC_COMMAND:
+		error = fsl_mc_uapi_send_command(root_mc_device, arg, priv_data->mc_io);
+		break;
+	default:
+		dev_dbg(&root_mc_device->dev, "unexpected ioctl call number\n");
+		error = -EINVAL;
+	}
+
+	return error;
+}
+
+static const struct file_operations fsl_mc_uapi_dev_fops = {
+	.owner = THIS_MODULE,
+	.open = fsl_mc_uapi_dev_open,
+	.release = fsl_mc_uapi_dev_release,
+	.unlocked_ioctl = fsl_mc_uapi_dev_ioctl,
+};
+
+int fsl_mc_uapi_create_device_file(struct fsl_mc_bus *mc_bus)
+{
+	struct fsl_mc_device *mc_dev = &mc_bus->mc_dev;
+	struct fsl_mc_uapi *mc_uapi = &mc_bus->uapi_misc;
+	int error;
+
+	mc_uapi->misc.minor = MISC_DYNAMIC_MINOR;
+	mc_uapi->misc.name = dev_name(&mc_dev->dev);
+	mc_uapi->misc.fops = &fsl_mc_uapi_dev_fops;
+
+	error = misc_register(&mc_uapi->misc);
+	if (error)
+		return error;
+
+	mc_uapi->static_mc_io = mc_bus->mc_dev.mc_io;
+
+	mutex_init(&mc_uapi->mutex);
+
+	return 0;
+}
+
+void fsl_mc_uapi_remove_device_file(struct fsl_mc_bus *mc_bus)
+{
+	misc_deregister(&mc_bus->uapi_misc.misc);
+}
diff --git a/drivers/bus/fsl-mc/mc-sys.c b/drivers/bus/fsl-mc/mc-sys.c
index 85a0225db522..b291b35e3884 100644
--- a/drivers/bus/fsl-mc/mc-sys.c
+++ b/drivers/bus/fsl-mc/mc-sys.c
@@ -35,7 +35,7 @@ static enum mc_cmd_status mc_cmd_hdr_read_status(struct fsl_mc_command *cmd)
 	return (enum mc_cmd_status)hdr->status;
 }
 
-static u16 mc_cmd_hdr_read_cmdid(struct fsl_mc_command *cmd)
+u16 mc_cmd_hdr_read_cmdid(struct fsl_mc_command *cmd)
 {
 	struct mc_cmd_header *hdr = (struct mc_cmd_header *)&cmd->header;
 	u16 cmd_id = le16_to_cpu(hdr->cmd_id);
diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c
index f0697f433c2f..be4eebb0971b 100644
--- a/drivers/bus/mhi/core/init.c
+++ b/drivers/bus/mhi/core/init.c
@@ -151,12 +151,17 @@ int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl)
 {
 	struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
 	struct device *dev = &mhi_cntrl->mhi_dev->dev;
+	unsigned long irq_flags = IRQF_SHARED | IRQF_NO_SUSPEND;
 	int i, ret;
 
+	/* if controller driver has set irq_flags, use it */
+	if (mhi_cntrl->irq_flags)
+		irq_flags = mhi_cntrl->irq_flags;
+
 	/* Setup BHI_INTVEC IRQ */
 	ret = request_threaded_irq(mhi_cntrl->irq[0], mhi_intvec_handler,
 				   mhi_intvec_threaded_handler,
-				   IRQF_SHARED | IRQF_NO_SUSPEND,
+				   irq_flags,
 				   "bhi", mhi_cntrl);
 	if (ret)
 		return ret;
@@ -174,7 +179,7 @@ int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl)
 
 		ret = request_irq(mhi_cntrl->irq[mhi_event->irq],
 				  mhi_irq_handler,
-				  IRQF_SHARED | IRQF_NO_SUSPEND,
+				  irq_flags,
 				  "mhi", mhi_event);
 		if (ret) {
 			dev_err(dev, "Error requesting irq:%d for ev:%d\n",
@@ -552,6 +557,9 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
 	tre_ring = &mhi_chan->tre_ring;
 	chan_ctxt = &mhi_cntrl->mhi_ctxt->chan_ctxt[mhi_chan->chan];
 
+	if (!chan_ctxt->rbase) /* Already uninitialized */
+		return;
+
 	mhi_free_coherent(mhi_cntrl, tre_ring->alloc_size,
 			  tre_ring->pre_aligned, tre_ring->dma_handle);
 	vfree(buf_ring->base);
diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c
index 1202433ecf98..4e0131b94056 100644
--- a/drivers/bus/mhi/core/main.c
+++ b/drivers/bus/mhi/core/main.c
@@ -111,7 +111,14 @@ void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl,
 	dma_addr_t db;
 
 	db = ring->iommu_base + (ring->wp - ring->base);
+
+	/*
+	 * Writes to the new ring element must be visible to the hardware
+	 * before letting h/w know there is new element to fetch.
+	 */
+	dma_wmb();
 	*ring->ctxt_wp = db;
+
 	mhi_chan->db_cfg.process_db(mhi_cntrl, &mhi_chan->db_cfg,
 				    ring->db_addr, db);
 }
@@ -135,6 +142,19 @@ enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl)
 }
 EXPORT_SYMBOL_GPL(mhi_get_mhi_state);
 
+void mhi_soc_reset(struct mhi_controller *mhi_cntrl)
+{
+	if (mhi_cntrl->reset) {
+		mhi_cntrl->reset(mhi_cntrl);
+		return;
+	}
+
+	/* Generic MHI SoC reset */
+	mhi_write_reg(mhi_cntrl, mhi_cntrl->regs, MHI_SOC_RESET_REQ_OFFSET,
+		      MHI_SOC_RESET_REQ);
+}
+EXPORT_SYMBOL_GPL(mhi_soc_reset);
+
 int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl,
 			 struct mhi_buf_info *buf_info)
 {
@@ -959,118 +979,88 @@ static bool mhi_is_ring_full(struct mhi_controller *mhi_cntrl,
 	return (tmp == ring->rp);
 }
 
-int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir,
-		  struct sk_buff *skb, size_t len, enum mhi_flags mflags)
+static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info,
+		     enum dma_data_direction dir, enum mhi_flags mflags)
 {
 	struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
 	struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
 							     mhi_dev->dl_chan;
 	struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
-	struct mhi_buf_info buf_info = { };
+	unsigned long flags;
 	int ret;
 
-	/* If MHI host pre-allocates buffers then client drivers cannot queue */
-	if (mhi_chan->pre_alloc)
-		return -EINVAL;
+	if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)))
+		return -EIO;
 
-	if (mhi_is_ring_full(mhi_cntrl, tre_ring))
-		return -ENOMEM;
+	read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
 
-	read_lock_bh(&mhi_cntrl->pm_lock);
-	if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))) {
-		read_unlock_bh(&mhi_cntrl->pm_lock);
-		return -EIO;
+	ret = mhi_is_ring_full(mhi_cntrl, tre_ring);
+	if (unlikely(ret)) {
+		ret = -ENOMEM;
+		goto exit_unlock;
 	}
 
-	/* we're in M3 or transitioning to M3 */
+	ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags);
+	if (unlikely(ret))
+		goto exit_unlock;
+
+	/* trigger M3 exit if necessary */
 	if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
 		mhi_trigger_resume(mhi_cntrl);
 
-	/* Toggle wake to exit out of M2 */
+	/* Assert dev_wake (to exit/prevent M1/M2)*/
 	mhi_cntrl->wake_toggle(mhi_cntrl);
 
-	buf_info.v_addr = skb->data;
-	buf_info.cb_buf = skb;
-	buf_info.len = len;
-
-	ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &buf_info, mflags);
-	if (unlikely(ret)) {
-		read_unlock_bh(&mhi_cntrl->pm_lock);
-		return ret;
-	}
-
 	if (mhi_chan->dir == DMA_TO_DEVICE)
 		atomic_inc(&mhi_cntrl->pending_pkts);
 
-	if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl))) {
-		read_lock_bh(&mhi_chan->lock);
-		mhi_ring_chan_db(mhi_cntrl, mhi_chan);
-		read_unlock_bh(&mhi_chan->lock);
+	if (unlikely(!MHI_DB_ACCESS_VALID(mhi_cntrl))) {
+		ret = -EIO;
+		goto exit_unlock;
 	}
 
-	read_unlock_bh(&mhi_cntrl->pm_lock);
+	mhi_ring_chan_db(mhi_cntrl, mhi_chan);
 
-	return 0;
+exit_unlock:
+	read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags);
+
+	return ret;
 }
-EXPORT_SYMBOL_GPL(mhi_queue_skb);
 
-int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir,
-		  struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags)
+int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir,
+		  struct sk_buff *skb, size_t len, enum mhi_flags mflags)
 {
-	struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
 	struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
 							     mhi_dev->dl_chan;
-	struct device *dev = &mhi_cntrl->mhi_dev->dev;
-	struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
 	struct mhi_buf_info buf_info = { };
-	int ret;
-
-	/* If MHI host pre-allocates buffers then client drivers cannot queue */
-	if (mhi_chan->pre_alloc)
-		return -EINVAL;
 
-	if (mhi_is_ring_full(mhi_cntrl, tre_ring))
-		return -ENOMEM;
-
-	read_lock_bh(&mhi_cntrl->pm_lock);
-	if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))) {
-		dev_err(dev, "MHI is not in activate state, PM state: %s\n",
-			to_mhi_pm_state_str(mhi_cntrl->pm_state));
-		read_unlock_bh(&mhi_cntrl->pm_lock);
+	buf_info.v_addr = skb->data;
+	buf_info.cb_buf = skb;
+	buf_info.len = len;
 
-		return -EIO;
-	}
+	if (unlikely(mhi_chan->pre_alloc))
+		return -EINVAL;
 
-	/* we're in M3 or transitioning to M3 */
-	if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
-		mhi_trigger_resume(mhi_cntrl);
+	return mhi_queue(mhi_dev, &buf_info, dir, mflags);
+}
+EXPORT_SYMBOL_GPL(mhi_queue_skb);
 
-	/* Toggle wake to exit out of M2 */
-	mhi_cntrl->wake_toggle(mhi_cntrl);
+int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir,
+		  struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags)
+{
+	struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
+							     mhi_dev->dl_chan;
+	struct mhi_buf_info buf_info = { };
 
 	buf_info.p_addr = mhi_buf->dma_addr;
 	buf_info.cb_buf = mhi_buf;
 	buf_info.pre_mapped = true;
 	buf_info.len = len;
 
-	ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &buf_info, mflags);
-	if (unlikely(ret)) {
-		read_unlock_bh(&mhi_cntrl->pm_lock);
-		return ret;
-	}
-
-	if (mhi_chan->dir == DMA_TO_DEVICE)
-		atomic_inc(&mhi_cntrl->pending_pkts);
-
-	if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl))) {
-		read_lock_bh(&mhi_chan->lock);
-		mhi_ring_chan_db(mhi_cntrl, mhi_chan);
-		read_unlock_bh(&mhi_chan->lock);
-	}
-
-	read_unlock_bh(&mhi_cntrl->pm_lock);
+	if (unlikely(mhi_chan->pre_alloc))
+		return -EINVAL;
 
-	return 0;
+	return mhi_queue(mhi_dev, &buf_info, dir, mflags);
 }
 EXPORT_SYMBOL_GPL(mhi_queue_dma);
 
@@ -1124,57 +1114,13 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
 int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir,
 		  void *buf, size_t len, enum mhi_flags mflags)
 {
-	struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
-	struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
-							     mhi_dev->dl_chan;
-	struct mhi_ring *tre_ring;
 	struct mhi_buf_info buf_info = { };
-	unsigned long flags;
-	int ret;
-
-	/*
-	 * this check here only as a guard, it's always
-	 * possible mhi can enter error while executing rest of function,
-	 * which is not fatal so we do not need to hold pm_lock
-	 */
-	if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)))
-		return -EIO;
-
-	tre_ring = &mhi_chan->tre_ring;
-	if (mhi_is_ring_full(mhi_cntrl, tre_ring))
-		return -ENOMEM;
 
 	buf_info.v_addr = buf;
 	buf_info.cb_buf = buf;
 	buf_info.len = len;
 
-	ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &buf_info, mflags);
-	if (unlikely(ret))
-		return ret;
-
-	read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
-
-	/* we're in M3 or transitioning to M3 */
-	if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
-		mhi_trigger_resume(mhi_cntrl);
-
-	/* Toggle wake to exit out of M2 */
-	mhi_cntrl->wake_toggle(mhi_cntrl);
-
-	if (mhi_chan->dir == DMA_TO_DEVICE)
-		atomic_inc(&mhi_cntrl->pending_pkts);
-
-	if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl))) {
-		unsigned long flags;
-
-		read_lock_irqsave(&mhi_chan->lock, flags);
-		mhi_ring_chan_db(mhi_cntrl, mhi_chan);
-		read_unlock_irqrestore(&mhi_chan->lock, flags);
-	}
-
-	read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags);
-
-	return 0;
+	return mhi_queue(mhi_dev, &buf_info, dir, mflags);
 }
 EXPORT_SYMBOL_GPL(mhi_queue_buf);
 
diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index f5bee76ea061..20673a4b4a3c 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -8,13 +8,21 @@
  * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
  */
 
+#include <linux/aer.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mhi.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #define MHI_PCI_DEFAULT_BAR_NUM 0
 
+#define MHI_POST_RESET_DELAY_MS 500
+
+#define HEALTH_CHECK_PERIOD (HZ * 2)
+
 /**
  * struct mhi_pci_dev_info - MHI PCI device specific information
  * @config: MHI controller configuration
@@ -76,6 +84,36 @@ struct mhi_pci_dev_info {
 		.offload_channel = false,	\
 	}
 
+#define MHI_CHANNEL_CONFIG_HW_UL(ch_num, ch_name, el_count, ev_ring) \
+	{						\
+		.num = ch_num,				\
+		.name = ch_name,			\
+		.num_elements = el_count,		\
+		.event_ring = ev_ring,			\
+		.dir = DMA_TO_DEVICE,			\
+		.ee_mask = BIT(MHI_EE_AMSS),		\
+		.pollcfg = 0,				\
+		.doorbell = MHI_DB_BRST_ENABLE,	\
+		.lpm_notify = false,			\
+		.offload_channel = false,		\
+		.doorbell_mode_switch = true,		\
+	}						\
+
+#define MHI_CHANNEL_CONFIG_HW_DL(ch_num, ch_name, el_count, ev_ring) \
+	{						\
+		.num = ch_num,				\
+		.name = ch_name,			\
+		.num_elements = el_count,		\
+		.event_ring = ev_ring,			\
+		.dir = DMA_FROM_DEVICE,			\
+		.ee_mask = BIT(MHI_EE_AMSS),		\
+		.pollcfg = 0,				\
+		.doorbell = MHI_DB_BRST_ENABLE,	\
+		.lpm_notify = false,			\
+		.offload_channel = false,		\
+		.doorbell_mode_switch = true,		\
+	}
+
 #define MHI_EVENT_CONFIG_DATA(ev_ring)		\
 	{					\
 		.num_elements = 128,		\
@@ -91,8 +129,8 @@ struct mhi_pci_dev_info {
 
 #define MHI_EVENT_CONFIG_HW_DATA(ev_ring, ch_num) \
 	{					\
-		.num_elements = 128,		\
-		.irq_moderation_ms = 5,		\
+		.num_elements = 2048,		\
+		.irq_moderation_ms = 1,		\
 		.irq = (ev_ring) + 1,		\
 		.priority = 1,			\
 		.mode = MHI_DB_BRST_DISABLE,	\
@@ -104,27 +142,31 @@ struct mhi_pci_dev_info {
 	}
 
 static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = {
+	MHI_CHANNEL_CONFIG_UL(4, "DIAG", 16, 1),
+	MHI_CHANNEL_CONFIG_DL(5, "DIAG", 16, 1),
 	MHI_CHANNEL_CONFIG_UL(12, "MBIM", 4, 0),
 	MHI_CHANNEL_CONFIG_DL(13, "MBIM", 4, 0),
 	MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
 	MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
 	MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
 	MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0),
-	MHI_CHANNEL_CONFIG_UL(100, "IP_HW0", 128, 1),
-	MHI_CHANNEL_CONFIG_DL(101, "IP_HW0", 128, 2),
+	MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
+	MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 128, 3),
 };
 
-static const struct mhi_event_config modem_qcom_v1_mhi_events[] = {
+static struct mhi_event_config modem_qcom_v1_mhi_events[] = {
 	/* first ring is control+data ring */
 	MHI_EVENT_CONFIG_CTRL(0),
+	/* DIAG dedicated event ring */
+	MHI_EVENT_CONFIG_DATA(1),
 	/* Hardware channels request dedicated hardware event rings */
-	MHI_EVENT_CONFIG_HW_DATA(1, 100),
-	MHI_EVENT_CONFIG_HW_DATA(2, 101)
+	MHI_EVENT_CONFIG_HW_DATA(2, 100),
+	MHI_EVENT_CONFIG_HW_DATA(3, 101)
 };
 
-static const struct mhi_controller_config modem_qcom_v1_mhiv_config = {
+static struct mhi_controller_config modem_qcom_v1_mhiv_config = {
 	.max_channels = 128,
-	.timeout_ms = 5000,
+	.timeout_ms = 8000,
 	.num_channels = ARRAY_SIZE(modem_qcom_v1_mhi_channels),
 	.ch_cfg = modem_qcom_v1_mhi_channels,
 	.num_events = ARRAY_SIZE(modem_qcom_v1_mhi_events),
@@ -147,6 +189,18 @@ static const struct pci_device_id mhi_pci_id_table[] = {
 };
 MODULE_DEVICE_TABLE(pci, mhi_pci_id_table);
 
+enum mhi_pci_device_status {
+	MHI_PCI_DEV_STARTED,
+};
+
+struct mhi_pci_device {
+	struct mhi_controller mhi_cntrl;
+	struct pci_saved_state *pci_state;
+	struct work_struct recovery_work;
+	struct timer_list health_check_timer;
+	unsigned long status;
+};
+
 static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
 			    void __iomem *addr, u32 *out)
 {
@@ -163,7 +217,31 @@ static void mhi_pci_write_reg(struct mhi_controller *mhi_cntrl,
 static void mhi_pci_status_cb(struct mhi_controller *mhi_cntrl,
 			      enum mhi_callback cb)
 {
+	struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+
 	/* Nothing to do for now */
+	switch (cb) {
+	case MHI_CB_FATAL_ERROR:
+	case MHI_CB_SYS_ERROR:
+		dev_warn(&pdev->dev, "firmware crashed (%u)\n", cb);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool mhi_pci_is_alive(struct mhi_controller *mhi_cntrl)
+{
+	struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+	u16 vendor = 0;
+
+	if (pci_read_config_word(pdev, PCI_VENDOR_ID, &vendor))
+		return false;
+
+	if (vendor == (u16) ~0 || vendor == 0)
+		return false;
+
+	return true;
 }
 
 static int mhi_pci_claim(struct mhi_controller *mhi_cntrl,
@@ -227,8 +305,12 @@ static int mhi_pci_get_irqs(struct mhi_controller *mhi_cntrl,
 	}
 
 	if (nr_vectors < mhi_cntrl->nr_irqs) {
-		dev_warn(&pdev->dev, "Not enough MSI vectors (%d/%d), use shared MSI\n",
-			 nr_vectors, mhi_cntrl_config->num_events);
+		dev_warn(&pdev->dev, "using shared MSI\n");
+
+		/* Patch msi vectors, use only one (shared) */
+		for (i = 0; i < mhi_cntrl_config->num_events; i++)
+			mhi_cntrl_config->event_cfg[i].irq = 0;
+		mhi_cntrl->nr_irqs = 1;
 	}
 
 	irq = devm_kcalloc(&pdev->dev, mhi_cntrl->nr_irqs, sizeof(int), GFP_KERNEL);
@@ -257,20 +339,89 @@ static void mhi_pci_runtime_put(struct mhi_controller *mhi_cntrl)
 	/* no PM for now */
 }
 
+static void mhi_pci_recovery_work(struct work_struct *work)
+{
+	struct mhi_pci_device *mhi_pdev = container_of(work, struct mhi_pci_device,
+						       recovery_work);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+	struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+	int err;
+
+	dev_warn(&pdev->dev, "device recovery started\n");
+
+	del_timer(&mhi_pdev->health_check_timer);
+
+	/* Clean up MHI state */
+	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+		mhi_power_down(mhi_cntrl, false);
+		mhi_unprepare_after_power_down(mhi_cntrl);
+	}
+
+	/* Check if we can recover without full reset */
+	pci_set_power_state(pdev, PCI_D0);
+	pci_load_saved_state(pdev, mhi_pdev->pci_state);
+	pci_restore_state(pdev);
+
+	if (!mhi_pci_is_alive(mhi_cntrl))
+		goto err_try_reset;
+
+	err = mhi_prepare_for_power_up(mhi_cntrl);
+	if (err)
+		goto err_try_reset;
+
+	err = mhi_sync_power_up(mhi_cntrl);
+	if (err)
+		goto err_unprepare;
+
+	dev_dbg(&pdev->dev, "Recovery completed\n");
+
+	set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+	return;
+
+err_unprepare:
+	mhi_unprepare_after_power_down(mhi_cntrl);
+err_try_reset:
+	if (pci_reset_function(pdev))
+		dev_err(&pdev->dev, "Recovery failed\n");
+}
+
+static void health_check(struct timer_list *t)
+{
+	struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	if (!mhi_pci_is_alive(mhi_cntrl)) {
+		dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
+		queue_work(system_long_wq, &mhi_pdev->recovery_work);
+		return;
+	}
+
+	/* reschedule in two seconds */
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+}
+
 static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
 	const struct mhi_controller_config *mhi_cntrl_config;
+	struct mhi_pci_device *mhi_pdev;
 	struct mhi_controller *mhi_cntrl;
 	int err;
 
 	dev_dbg(&pdev->dev, "MHI PCI device found: %s\n", info->name);
 
-	mhi_cntrl = mhi_alloc_controller();
-	if (!mhi_cntrl)
+	/* mhi_pdev.mhi_cntrl must be zero-initialized */
+	mhi_pdev = devm_kzalloc(&pdev->dev, sizeof(*mhi_pdev), GFP_KERNEL);
+	if (!mhi_pdev)
 		return -ENOMEM;
 
+	INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
+	timer_setup(&mhi_pdev->health_check_timer, health_check, 0);
+
 	mhi_cntrl_config = info->config;
+	mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
 	mhi_cntrl->cntrl_dev = &pdev->dev;
 	mhi_cntrl->iova_start = 0;
 	mhi_cntrl->iova_stop = (dma_addr_t)DMA_BIT_MASK(info->dma_data_width);
@@ -285,17 +436,23 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
 	if (err)
-		goto err_release;
+		return err;
 
 	err = mhi_pci_get_irqs(mhi_cntrl, mhi_cntrl_config);
 	if (err)
-		goto err_release;
+		return err;
+
+	pci_set_drvdata(pdev, mhi_pdev);
+
+	/* Have stored pci confspace at hand for restore in sudden PCI error */
+	pci_save_state(pdev);
+	mhi_pdev->pci_state = pci_store_saved_state(pdev);
 
-	pci_set_drvdata(pdev, mhi_cntrl);
+	pci_enable_pcie_error_reporting(pdev);
 
 	err = mhi_register_controller(mhi_cntrl, mhi_cntrl_config);
 	if (err)
-		goto err_release;
+		return err;
 
 	/* MHI bus does not power up the controller by default */
 	err = mhi_prepare_for_power_up(mhi_cntrl);
@@ -310,33 +467,209 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_unprepare;
 	}
 
+	set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+
+	/* start health check */
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
 	return 0;
 
 err_unprepare:
 	mhi_unprepare_after_power_down(mhi_cntrl);
 err_unregister:
 	mhi_unregister_controller(mhi_cntrl);
-err_release:
-	mhi_free_controller(mhi_cntrl);
 
 	return err;
 }
 
 static void mhi_pci_remove(struct pci_dev *pdev)
 {
-	struct mhi_controller *mhi_cntrl = pci_get_drvdata(pdev);
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	del_timer(&mhi_pdev->health_check_timer);
+	cancel_work_sync(&mhi_pdev->recovery_work);
+
+	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+		mhi_power_down(mhi_cntrl, true);
+		mhi_unprepare_after_power_down(mhi_cntrl);
+	}
 
-	mhi_power_down(mhi_cntrl, true);
-	mhi_unprepare_after_power_down(mhi_cntrl);
 	mhi_unregister_controller(mhi_cntrl);
-	mhi_free_controller(mhi_cntrl);
 }
 
+static void mhi_pci_reset_prepare(struct pci_dev *pdev)
+{
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	dev_info(&pdev->dev, "reset\n");
+
+	del_timer(&mhi_pdev->health_check_timer);
+
+	/* Clean up MHI state */
+	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+		mhi_power_down(mhi_cntrl, false);
+		mhi_unprepare_after_power_down(mhi_cntrl);
+	}
+
+	/* cause internal device reset */
+	mhi_soc_reset(mhi_cntrl);
+
+	/* Be sure device reset has been executed */
+	msleep(MHI_POST_RESET_DELAY_MS);
+}
+
+static void mhi_pci_reset_done(struct pci_dev *pdev)
+{
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+	int err;
+
+	/* Restore initial known working PCI state */
+	pci_load_saved_state(pdev, mhi_pdev->pci_state);
+	pci_restore_state(pdev);
+
+	/* Is device status available ? */
+	if (!mhi_pci_is_alive(mhi_cntrl)) {
+		dev_err(&pdev->dev, "reset failed\n");
+		return;
+	}
+
+	err = mhi_prepare_for_power_up(mhi_cntrl);
+	if (err) {
+		dev_err(&pdev->dev, "failed to prepare MHI controller\n");
+		return;
+	}
+
+	err = mhi_sync_power_up(mhi_cntrl);
+	if (err) {
+		dev_err(&pdev->dev, "failed to power up MHI controller\n");
+		mhi_unprepare_after_power_down(mhi_cntrl);
+		return;
+	}
+
+	set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+}
+
+static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
+					       pci_channel_state_t state)
+{
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	dev_err(&pdev->dev, "PCI error detected, state = %u\n", state);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	/* Clean up MHI state */
+	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+		mhi_power_down(mhi_cntrl, false);
+		mhi_unprepare_after_power_down(mhi_cntrl);
+	} else {
+		/* Nothing to do */
+		return PCI_ERS_RESULT_RECOVERED;
+	}
+
+	pci_disable_device(pdev);
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)
+{
+	if (pci_enable_device(pdev)) {
+		dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void mhi_pci_io_resume(struct pci_dev *pdev)
+{
+	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+
+	dev_err(&pdev->dev, "PCI slot reset done\n");
+
+	queue_work(system_long_wq, &mhi_pdev->recovery_work);
+}
+
+static const struct pci_error_handlers mhi_pci_err_handler = {
+	.error_detected = mhi_pci_error_detected,
+	.slot_reset = mhi_pci_slot_reset,
+	.resume = mhi_pci_io_resume,
+	.reset_prepare = mhi_pci_reset_prepare,
+	.reset_done = mhi_pci_reset_done,
+};
+
+static int  __maybe_unused mhi_pci_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	del_timer(&mhi_pdev->health_check_timer);
+	cancel_work_sync(&mhi_pdev->recovery_work);
+
+	/* Transition to M3 state */
+	mhi_pm_suspend(mhi_cntrl);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_wake_from_d3(pdev, true);
+	pci_set_power_state(pdev, PCI_D3hot);
+
+	return 0;
+}
+
+static int __maybe_unused mhi_pci_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_set_master(pdev);
+
+	err = pci_enable_device(pdev);
+	if (err)
+		goto err_recovery;
+
+	/* Exit M3, transition to M0 state */
+	err = mhi_pm_resume(mhi_cntrl);
+	if (err) {
+		dev_err(&pdev->dev, "failed to resume device: %d\n", err);
+		goto err_recovery;
+	}
+
+	/* Resume health check */
+	mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
+	return 0;
+
+err_recovery:
+	/* The device may have loose power or crashed, try recovering it */
+	queue_work(system_long_wq, &mhi_pdev->recovery_work);
+
+	return err;
+}
+
+static const struct dev_pm_ops mhi_pci_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mhi_pci_suspend, mhi_pci_resume)
+};
+
 static struct pci_driver mhi_pci_driver = {
 	.name		= "mhi-pci-generic",
 	.id_table	= mhi_pci_id_table,
 	.probe		= mhi_pci_probe,
-	.remove		= mhi_pci_remove
+	.remove		= mhi_pci_remove,
+	.err_handler	= &mhi_pci_err_handler,
+	.driver.pm	= &mhi_pci_pm_ops
 };
 module_pci_driver(mhi_pci_driver);
 
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 84e24986a97a..0fe9e200e4c8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1959,7 +1959,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -EPERM;
 		if (crng_init < 2)
 			return -ENODATA;
-		crng_reseed(&primary_crng, NULL);
+		crng_reseed(&primary_crng, &input_pool);
 		crng_global_init_time = jiffies - 1;
 		return 0;
 	default:
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 80db43a22069..68216988391f 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -192,7 +192,9 @@ static int fw_unit_remove(struct device *dev)
 	struct fw_driver *driver =
 			container_of(dev->driver, struct fw_driver, driver);
 
-	return driver->remove(fw_unit(dev)), 0;
+	driver->remove(fw_unit(dev));
+
+	return 0;
 }
 
 static int get_modalias(struct fw_unit *unit, char *buffer, size_t buffer_size)
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index 0205987a4fd4..dc83ea118c67 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -46,14 +46,13 @@ static int coreboot_bus_probe(struct device *dev)
 
 static int coreboot_bus_remove(struct device *dev)
 {
-	int ret = 0;
 	struct coreboot_device *device = CB_DEV(dev);
 	struct coreboot_driver *driver = CB_DRV(dev->driver);
 
 	if (driver->remove)
-		ret = driver->remove(device);
+		driver->remove(device);
 
-	return ret;
+	return 0;
 }
 
 static struct bus_type coreboot_bus_type = {
diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h
index 7b7b4a6eedda..beb778674acd 100644
--- a/drivers/firmware/google/coreboot_table.h
+++ b/drivers/firmware/google/coreboot_table.h
@@ -72,7 +72,7 @@ struct coreboot_device {
 /* A driver for handling devices described in coreboot tables. */
 struct coreboot_driver {
 	int (*probe)(struct coreboot_device *);
-	int (*remove)(struct coreboot_device *);
+	void (*remove)(struct coreboot_device *);
 	struct device_driver drv;
 	u32 tag;
 };
diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c
index 916f26adc595..c6dcc1ef93ac 100644
--- a/drivers/firmware/google/framebuffer-coreboot.c
+++ b/drivers/firmware/google/framebuffer-coreboot.c
@@ -72,13 +72,11 @@ static int framebuffer_probe(struct coreboot_device *dev)
 	return PTR_ERR_OR_ZERO(pdev);
 }
 
-static int framebuffer_remove(struct coreboot_device *dev)
+static void framebuffer_remove(struct coreboot_device *dev)
 {
 	struct platform_device *pdev = dev_get_drvdata(&dev->dev);
 
 	platform_device_unregister(pdev);
-
-	return 0;
 }
 
 static struct coreboot_driver framebuffer_driver = {
diff --git a/drivers/firmware/google/memconsole-coreboot.c b/drivers/firmware/google/memconsole-coreboot.c
index d17e4d6ac9bc..74b5286518ee 100644
--- a/drivers/firmware/google/memconsole-coreboot.c
+++ b/drivers/firmware/google/memconsole-coreboot.c
@@ -91,11 +91,9 @@ static int memconsole_probe(struct coreboot_device *dev)
 	return memconsole_sysfs_init();
 }
 
-static int memconsole_remove(struct coreboot_device *dev)
+static void memconsole_remove(struct coreboot_device *dev)
 {
 	memconsole_exit();
-
-	return 0;
 }
 
 static struct coreboot_driver memconsole_driver = {
diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c
index d23c5c69ab52..ee6e08c0592b 100644
--- a/drivers/firmware/google/vpd.c
+++ b/drivers/firmware/google/vpd.c
@@ -298,14 +298,12 @@ static int vpd_probe(struct coreboot_device *dev)
 	return 0;
 }
 
-static int vpd_remove(struct coreboot_device *dev)
+static void vpd_remove(struct coreboot_device *dev)
 {
 	vpd_section_destroy(&ro_vpd);
 	vpd_section_destroy(&rw_vpd);
 
 	kobject_put(vpd_kobj);
-
-	return 0;
 }
 
 static struct coreboot_driver vpd_driver = {
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index 5645226ca3ce..5ff9438b7b46 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -192,6 +192,17 @@ config FPGA_DFL_AFU
 	  to the FPGA infrastructure via a Port. There may be more than one
 	  Port/AFU per DFL based FPGA device.
 
+config FPGA_DFL_NIOS_INTEL_PAC_N3000
+	tristate "FPGA DFL NIOS Driver for Intel PAC N3000"
+	depends on FPGA_DFL
+	select REGMAP
+	help
+	  This is the driver for the N3000 Nios private feature on Intel
+	  PAC (Programmable Acceleration Card) N3000. It communicates
+	  with the embedded Nios processor to configure the retimers on
+	  the card. It also instantiates the SPI master (spi-altera) for
+	  the card's BMC (Board Management Controller).
+
 config FPGA_DFL_PCI
 	tristate "FPGA DFL PCIe Device Driver"
 	depends on PCI && FPGA_DFL
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index d8e21dfc6778..18dc9885883a 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -44,5 +44,7 @@ dfl-fme-objs += dfl-fme-perf.o
 dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
 dfl-afu-objs += dfl-afu-error.o
 
+obj-$(CONFIG_FPGA_DFL_NIOS_INTEL_PAC_N3000)	+= dfl-n3000-nios.o
+
 # Drivers for FPGAs which implement DFL
 obj-$(CONFIG_FPGA_DFL_PCI)		+= dfl-pci.o
diff --git a/drivers/fpga/dfl-fme-perf.c b/drivers/fpga/dfl-fme-perf.c
index 531266287eee..4299145ef347 100644
--- a/drivers/fpga/dfl-fme-perf.c
+++ b/drivers/fpga/dfl-fme-perf.c
@@ -192,7 +192,7 @@ static struct attribute *fme_perf_cpumask_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group fme_perf_cpumask_group = {
+static const struct attribute_group fme_perf_cpumask_group = {
 	.attrs = fme_perf_cpumask_attrs,
 };
 
@@ -225,7 +225,7 @@ static struct attribute *fme_perf_format_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group fme_perf_format_group = {
+static const struct attribute_group fme_perf_format_group = {
 	.name = "format",
 	.attrs = fme_perf_format_attrs,
 };
@@ -239,7 +239,7 @@ static struct attribute *fme_perf_events_attrs_empty[] = {
 	NULL,
 };
 
-static struct attribute_group fme_perf_events_group = {
+static const struct attribute_group fme_perf_events_group = {
 	.name = "events",
 	.attrs = fme_perf_events_attrs_empty,
 };
diff --git a/drivers/fpga/dfl-n3000-nios.c b/drivers/fpga/dfl-n3000-nios.c
new file mode 100644
index 000000000000..7a95366f6516
--- /dev/null
+++ b/drivers/fpga/dfl-n3000-nios.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DFL device driver for Nios private feature on Intel PAC (Programmable
+ * Acceleration Card) N3000
+ *
+ * Copyright (C) 2019-2020 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Wu Hao <hao.wu@intel.com>
+ *   Xu Yilun <yilun.xu@intel.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/dfl.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/stddef.h>
+#include <linux/spi/altera.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+
+/*
+ * N3000 Nios private feature registers, named as NIOS_SPI_XX on spec.
+ * NS is the abbreviation of NIOS_SPI.
+ */
+#define N3000_NS_PARAM				0x8
+#define N3000_NS_PARAM_SHIFT_MODE_MSK		BIT_ULL(1)
+#define N3000_NS_PARAM_SHIFT_MODE_MSB		0
+#define N3000_NS_PARAM_SHIFT_MODE_LSB		1
+#define N3000_NS_PARAM_DATA_WIDTH		GENMASK_ULL(7, 2)
+#define N3000_NS_PARAM_NUM_CS			GENMASK_ULL(13, 8)
+#define N3000_NS_PARAM_CLK_POL			BIT_ULL(14)
+#define N3000_NS_PARAM_CLK_PHASE		BIT_ULL(15)
+#define N3000_NS_PARAM_PERIPHERAL_ID		GENMASK_ULL(47, 32)
+
+#define N3000_NS_CTRL				0x10
+#define N3000_NS_CTRL_WR_DATA			GENMASK_ULL(31, 0)
+#define N3000_NS_CTRL_ADDR			GENMASK_ULL(44, 32)
+#define N3000_NS_CTRL_CMD_MSK			GENMASK_ULL(63, 62)
+#define N3000_NS_CTRL_CMD_NOP			0
+#define N3000_NS_CTRL_CMD_RD			1
+#define N3000_NS_CTRL_CMD_WR			2
+
+#define N3000_NS_STAT				0x18
+#define N3000_NS_STAT_RD_DATA			GENMASK_ULL(31, 0)
+#define N3000_NS_STAT_RW_VAL			BIT_ULL(32)
+
+/* Nios handshake registers, indirect access */
+#define N3000_NIOS_INIT				0x1000
+#define N3000_NIOS_INIT_DONE			BIT(0)
+#define N3000_NIOS_INIT_START			BIT(1)
+/* Mode for retimer A, link 0, the same below */
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK	GENMASK(9, 8)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK	GENMASK(11, 10)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK	GENMASK(13, 12)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK	GENMASK(15, 14)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK	GENMASK(17, 16)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK	GENMASK(19, 18)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK	GENMASK(21, 20)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK	GENMASK(23, 22)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_NO		0x0
+#define N3000_NIOS_INIT_REQ_FEC_MODE_KR		0x1
+#define N3000_NIOS_INIT_REQ_FEC_MODE_RS		0x2
+
+#define N3000_NIOS_FW_VERSION			0x1004
+#define N3000_NIOS_FW_VERSION_PATCH		GENMASK(23, 20)
+#define N3000_NIOS_FW_VERSION_MINOR		GENMASK(27, 24)
+#define N3000_NIOS_FW_VERSION_MAJOR		GENMASK(31, 28)
+
+/* The retimers we use on Intel PAC N3000 is Parkvale, abbreviated to PKVL */
+#define N3000_NIOS_PKVL_A_MODE_STS		0x1020
+#define N3000_NIOS_PKVL_B_MODE_STS		0x1024
+#define N3000_NIOS_PKVL_MODE_STS_GROUP_MSK	GENMASK(15, 8)
+#define N3000_NIOS_PKVL_MODE_STS_GROUP_OK	0x0
+#define N3000_NIOS_PKVL_MODE_STS_ID_MSK		GENMASK(7, 0)
+/* When GROUP MASK field == GROUP_OK  */
+#define N3000_NIOS_PKVL_MODE_ID_RESET		0x0
+#define N3000_NIOS_PKVL_MODE_ID_4X10G		0x1
+#define N3000_NIOS_PKVL_MODE_ID_4X25G		0x2
+#define N3000_NIOS_PKVL_MODE_ID_2X25G		0x3
+#define N3000_NIOS_PKVL_MODE_ID_2X25G_2X10G	0x4
+#define N3000_NIOS_PKVL_MODE_ID_1X25G		0x5
+
+#define N3000_NIOS_REGBUS_RETRY_COUNT		10000	/* loop count */
+
+#define N3000_NIOS_INIT_TIMEOUT			10000000	/* usec */
+#define N3000_NIOS_INIT_TIME_INTV		100000		/* usec */
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_MSK_ALL	\
+	(N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK |	\
+	 N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK |	\
+	 N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK |	\
+	 N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK |	\
+	 N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK |	\
+	 N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK |	\
+	 N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK |	\
+	 N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK)
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_NO_ALL			\
+	(FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_NO))
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_KR_ALL			\
+	(FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_KR))
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_RS_ALL			\
+	(FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS) |		\
+	 FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK,	\
+		    N3000_NIOS_INIT_REQ_FEC_MODE_RS))
+
+struct n3000_nios {
+	void __iomem *base;
+	struct regmap *regmap;
+	struct device *dev;
+	struct platform_device *altera_spi;
+};
+
+static ssize_t nios_fw_version_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct n3000_nios *nn = dev_get_drvdata(dev);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(nn->regmap, N3000_NIOS_FW_VERSION, &val);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%x.%x.%x\n",
+			  (u8)FIELD_GET(N3000_NIOS_FW_VERSION_MAJOR, val),
+			  (u8)FIELD_GET(N3000_NIOS_FW_VERSION_MINOR, val),
+			  (u8)FIELD_GET(N3000_NIOS_FW_VERSION_PATCH, val));
+}
+static DEVICE_ATTR_RO(nios_fw_version);
+
+#define IS_MODE_STATUS_OK(mode_stat)					\
+	(FIELD_GET(N3000_NIOS_PKVL_MODE_STS_GROUP_MSK, (mode_stat)) ==	\
+	 N3000_NIOS_PKVL_MODE_STS_GROUP_OK)
+
+#define IS_RETIMER_FEC_SUPPORTED(retimer_mode)			\
+	((retimer_mode) != N3000_NIOS_PKVL_MODE_ID_RESET &&	\
+	 (retimer_mode) != N3000_NIOS_PKVL_MODE_ID_4X10G)
+
+static int get_retimer_mode(struct n3000_nios *nn, unsigned int mode_stat_reg,
+			    unsigned int *retimer_mode)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(nn->regmap, mode_stat_reg, &val);
+	if (ret)
+		return ret;
+
+	if (!IS_MODE_STATUS_OK(val))
+		return -EFAULT;
+
+	*retimer_mode = FIELD_GET(N3000_NIOS_PKVL_MODE_STS_ID_MSK, val);
+
+	return 0;
+}
+
+static ssize_t retimer_A_mode_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct n3000_nios *nn = dev_get_drvdata(dev);
+	unsigned int mode;
+	int ret;
+
+	ret = get_retimer_mode(nn, N3000_NIOS_PKVL_A_MODE_STS, &mode);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "0x%x\n", mode);
+}
+static DEVICE_ATTR_RO(retimer_A_mode);
+
+static ssize_t retimer_B_mode_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct n3000_nios *nn = dev_get_drvdata(dev);
+	unsigned int mode;
+	int ret;
+
+	ret = get_retimer_mode(nn, N3000_NIOS_PKVL_B_MODE_STS, &mode);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "0x%x\n", mode);
+}
+static DEVICE_ATTR_RO(retimer_B_mode);
+
+static ssize_t fec_mode_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	unsigned int val, retimer_a_mode, retimer_b_mode, fec_modes;
+	struct n3000_nios *nn = dev_get_drvdata(dev);
+	int ret;
+
+	/* FEC mode setting is not supported in early FW versions */
+	ret = regmap_read(nn->regmap, N3000_NIOS_FW_VERSION, &val);
+	if (ret)
+		return ret;
+
+	if (FIELD_GET(N3000_NIOS_FW_VERSION_MAJOR, val) < 3)
+		return sysfs_emit(buf, "not supported\n");
+
+	/* If no 25G links, FEC mode setting is not supported either */
+	ret = get_retimer_mode(nn, N3000_NIOS_PKVL_A_MODE_STS, &retimer_a_mode);
+	if (ret)
+		return ret;
+
+	ret = get_retimer_mode(nn, N3000_NIOS_PKVL_B_MODE_STS, &retimer_b_mode);
+	if (ret)
+		return ret;
+
+	if (!IS_RETIMER_FEC_SUPPORTED(retimer_a_mode) &&
+	    !IS_RETIMER_FEC_SUPPORTED(retimer_b_mode))
+		return sysfs_emit(buf, "not supported\n");
+
+	/* get the valid FEC mode for 25G links */
+	ret = regmap_read(nn->regmap, N3000_NIOS_INIT, &val);
+	if (ret)
+		return ret;
+
+	/*
+	 * FEC mode should always be the same for all links, as we set them
+	 * in this way.
+	 */
+	fec_modes = (val & N3000_NIOS_INIT_REQ_FEC_MODE_MSK_ALL);
+	if (fec_modes == N3000_NIOS_INIT_REQ_FEC_MODE_NO_ALL)
+		return sysfs_emit(buf, "no\n");
+	else if (fec_modes == N3000_NIOS_INIT_REQ_FEC_MODE_KR_ALL)
+		return sysfs_emit(buf, "kr\n");
+	else if (fec_modes == N3000_NIOS_INIT_REQ_FEC_MODE_RS_ALL)
+		return sysfs_emit(buf, "rs\n");
+
+	return -EFAULT;
+}
+static DEVICE_ATTR_RO(fec_mode);
+
+static struct attribute *n3000_nios_attrs[] = {
+	&dev_attr_nios_fw_version.attr,
+	&dev_attr_retimer_A_mode.attr,
+	&dev_attr_retimer_B_mode.attr,
+	&dev_attr_fec_mode.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(n3000_nios);
+
+static int n3000_nios_init_done_check(struct n3000_nios *nn)
+{
+	unsigned int val, state_a, state_b;
+	struct device *dev = nn->dev;
+	int ret, ret2;
+
+	/*
+	 * The SPI is shared by the Nios core inside the FPGA, Nios will use
+	 * this SPI master to do some one time initialization after power up,
+	 * and then release the control to OS. The driver needs to poll on
+	 * INIT_DONE to see when driver could take the control.
+	 *
+	 * Please note that after Nios firmware version 3.0.0, INIT_START is
+	 * introduced, so driver needs to trigger START firstly and then check
+	 * INIT_DONE.
+	 */
+
+	ret = regmap_read(nn->regmap, N3000_NIOS_FW_VERSION, &val);
+	if (ret)
+		return ret;
+
+	/*
+	 * If Nios version register is totally uninitialized(== 0x0), then the
+	 * Nios firmware is missing. So host could take control of SPI master
+	 * safely, but initialization work for Nios is not done. To restore the
+	 * card, we need to reprogram a new Nios firmware via the BMC chip on
+	 * SPI bus. So the driver doesn't error out, it continues to create the
+	 * spi controller device and spi_board_info for BMC.
+	 */
+	if (val == 0) {
+		dev_err(dev, "Nios version reg = 0x%x, skip INIT_DONE check, but the retimer may be uninitialized\n",
+			val);
+		return 0;
+	}
+
+	if (FIELD_GET(N3000_NIOS_FW_VERSION_MAJOR, val) >= 3) {
+		/* read NIOS_INIT to check if retimer initialization is done */
+		ret = regmap_read(nn->regmap, N3000_NIOS_INIT, &val);
+		if (ret)
+			return ret;
+
+		/* check if retimers are initialized already */
+		if (val & (N3000_NIOS_INIT_DONE | N3000_NIOS_INIT_START))
+			goto nios_init_done;
+
+		/* configure FEC mode per module param */
+		val = N3000_NIOS_INIT_START;
+
+		/*
+		 * When the retimer is to be set to 10G mode, there is no FEC
+		 * mode setting, so the REQ_FEC_MODE field will be ignored by
+		 * Nios firmware in this case. But we should still fill the FEC
+		 * mode field cause host could not get the retimer working mode
+		 * until the Nios init is done.
+		 *
+		 * For now the driver doesn't support the retimer FEC mode
+		 * switching per user's request. It is always set to Reed
+		 * Solomon FEC.
+		 *
+		 * The driver will set the same FEC mode for all links.
+		 */
+		val |= N3000_NIOS_INIT_REQ_FEC_MODE_RS_ALL;
+
+		ret = regmap_write(nn->regmap, N3000_NIOS_INIT, val);
+		if (ret)
+			return ret;
+	}
+
+nios_init_done:
+	/* polls on NIOS_INIT_DONE */
+	ret = regmap_read_poll_timeout(nn->regmap, N3000_NIOS_INIT, val,
+				       val & N3000_NIOS_INIT_DONE,
+				       N3000_NIOS_INIT_TIME_INTV,
+				       N3000_NIOS_INIT_TIMEOUT);
+	if (ret)
+		dev_err(dev, "NIOS_INIT_DONE %s\n",
+			(ret == -ETIMEDOUT) ? "timed out" : "check error");
+
+	ret2 = regmap_read(nn->regmap, N3000_NIOS_PKVL_A_MODE_STS, &state_a);
+	if (ret2)
+		return ret2;
+
+	ret2 = regmap_read(nn->regmap, N3000_NIOS_PKVL_B_MODE_STS, &state_b);
+	if (ret2)
+		return ret2;
+
+	if (!ret) {
+		/*
+		 * After INIT_DONE is detected, it still needs to check if the
+		 * Nios firmware reports any error during the retimer
+		 * configuration.
+		 */
+		if (IS_MODE_STATUS_OK(state_a) && IS_MODE_STATUS_OK(state_b))
+			return 0;
+
+		/*
+		 * If the retimer configuration is failed, the Nios firmware
+		 * will still release the spi controller for host to
+		 * communicate with the BMC. It makes possible for people to
+		 * reprogram a new Nios firmware and restore the card. So the
+		 * driver doesn't error out, it continues to create the spi
+		 * controller device and spi_board_info for BMC.
+		 */
+		dev_err(dev, "NIOS_INIT_DONE OK, but err on retimer init\n");
+	}
+
+	dev_err(nn->dev, "PKVL_A_MODE_STS 0x%x\n", state_a);
+	dev_err(nn->dev, "PKVL_B_MODE_STS 0x%x\n", state_b);
+
+	return ret;
+}
+
+static struct spi_board_info m10_n3000_info = {
+	.modalias = "m10-n3000",
+	.max_speed_hz = 12500000,
+	.bus_num = 0,
+	.chip_select = 0,
+};
+
+static int create_altera_spi_controller(struct n3000_nios *nn)
+{
+	struct altera_spi_platform_data pdata = { 0 };
+	struct platform_device_info pdevinfo = { 0 };
+	void __iomem *base = nn->base;
+	u64 v;
+
+	v = readq(base + N3000_NS_PARAM);
+
+	pdata.mode_bits = SPI_CS_HIGH;
+	if (FIELD_GET(N3000_NS_PARAM_CLK_POL, v))
+		pdata.mode_bits |= SPI_CPOL;
+	if (FIELD_GET(N3000_NS_PARAM_CLK_PHASE, v))
+		pdata.mode_bits |= SPI_CPHA;
+
+	pdata.num_chipselect = FIELD_GET(N3000_NS_PARAM_NUM_CS, v);
+	pdata.bits_per_word_mask =
+		SPI_BPW_RANGE_MASK(1, FIELD_GET(N3000_NS_PARAM_DATA_WIDTH, v));
+
+	pdata.num_devices = 1;
+	pdata.devices = &m10_n3000_info;
+
+	dev_dbg(nn->dev, "%s cs %u bpm 0x%x mode 0x%x\n", __func__,
+		pdata.num_chipselect, pdata.bits_per_word_mask,
+		pdata.mode_bits);
+
+	pdevinfo.name = "subdev_spi_altera";
+	pdevinfo.id = PLATFORM_DEVID_AUTO;
+	pdevinfo.parent = nn->dev;
+	pdevinfo.data = &pdata;
+	pdevinfo.size_data = sizeof(pdata);
+
+	nn->altera_spi = platform_device_register_full(&pdevinfo);
+	return PTR_ERR_OR_ZERO(nn->altera_spi);
+}
+
+static void destroy_altera_spi_controller(struct n3000_nios *nn)
+{
+	platform_device_unregister(nn->altera_spi);
+}
+
+static int n3000_nios_poll_stat_timeout(void __iomem *base, u64 *v)
+{
+	int loops;
+
+	/*
+	 * We don't use the time based timeout here for performance.
+	 *
+	 * The regbus read/write is on the critical path of Intel PAC N3000
+	 * image programing. The time based timeout checking will add too much
+	 * overhead on it. Usually the state changes in 1 or 2 loops on the
+	 * test server, and we set 10000 times loop here for safety.
+	 */
+	for (loops = N3000_NIOS_REGBUS_RETRY_COUNT; loops > 0 ; loops--) {
+		*v = readq(base + N3000_NS_STAT);
+		if (*v & N3000_NS_STAT_RW_VAL)
+			break;
+		cpu_relax();
+	}
+
+	return (loops > 0) ? 0 : -ETIMEDOUT;
+}
+
+static int n3000_nios_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct n3000_nios *nn = context;
+	u64 v;
+	int ret;
+
+	v = FIELD_PREP(N3000_NS_CTRL_CMD_MSK, N3000_NS_CTRL_CMD_WR) |
+	    FIELD_PREP(N3000_NS_CTRL_ADDR, reg) |
+	    FIELD_PREP(N3000_NS_CTRL_WR_DATA, val);
+	writeq(v, nn->base + N3000_NS_CTRL);
+
+	ret = n3000_nios_poll_stat_timeout(nn->base, &v);
+	if (ret)
+		dev_err(nn->dev, "fail to write reg 0x%x val 0x%x: %d\n",
+			reg, val, ret);
+
+	return ret;
+}
+
+static int n3000_nios_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct n3000_nios *nn = context;
+	u64 v;
+	int ret;
+
+	v = FIELD_PREP(N3000_NS_CTRL_CMD_MSK, N3000_NS_CTRL_CMD_RD) |
+	    FIELD_PREP(N3000_NS_CTRL_ADDR, reg);
+	writeq(v, nn->base + N3000_NS_CTRL);
+
+	ret = n3000_nios_poll_stat_timeout(nn->base, &v);
+	if (ret)
+		dev_err(nn->dev, "fail to read reg 0x%x: %d\n", reg, ret);
+	else
+		*val = FIELD_GET(N3000_NS_STAT_RD_DATA, v);
+
+	return ret;
+}
+
+static const struct regmap_config n3000_nios_regbus_cfg = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.fast_io = true,
+
+	.reg_write = n3000_nios_reg_write,
+	.reg_read = n3000_nios_reg_read,
+};
+
+static int n3000_nios_probe(struct dfl_device *ddev)
+{
+	struct device *dev = &ddev->dev;
+	struct n3000_nios *nn;
+	int ret;
+
+	nn = devm_kzalloc(dev, sizeof(*nn), GFP_KERNEL);
+	if (!nn)
+		return -ENOMEM;
+
+	dev_set_drvdata(&ddev->dev, nn);
+
+	nn->dev = dev;
+
+	nn->base = devm_ioremap_resource(&ddev->dev, &ddev->mmio_res);
+	if (IS_ERR(nn->base))
+		return PTR_ERR(nn->base);
+
+	nn->regmap = devm_regmap_init(dev, NULL, nn, &n3000_nios_regbus_cfg);
+	if (IS_ERR(nn->regmap))
+		return PTR_ERR(nn->regmap);
+
+	ret = n3000_nios_init_done_check(nn);
+	if (ret)
+		return ret;
+
+	ret = create_altera_spi_controller(nn);
+	if (ret)
+		dev_err(dev, "altera spi controller create failed: %d\n", ret);
+
+	return ret;
+}
+
+static void n3000_nios_remove(struct dfl_device *ddev)
+{
+	struct n3000_nios *nn = dev_get_drvdata(&ddev->dev);
+
+	destroy_altera_spi_controller(nn);
+}
+
+#define FME_FEATURE_ID_N3000_NIOS	0xd
+
+static const struct dfl_device_id n3000_nios_ids[] = {
+	{ FME_ID, FME_FEATURE_ID_N3000_NIOS },
+	{ }
+};
+MODULE_DEVICE_TABLE(dfl, n3000_nios_ids);
+
+static struct dfl_driver n3000_nios_driver = {
+	.drv	= {
+		.name       = "dfl-n3000-nios",
+		.dev_groups = n3000_nios_groups,
+	},
+	.id_table = n3000_nios_ids,
+	.probe   = n3000_nios_probe,
+	.remove  = n3000_nios_remove,
+};
+
+module_dfl_driver(n3000_nios_driver);
+
+MODULE_DESCRIPTION("Driver for Nios private feature on Intel PAC N3000");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c
index a2203d03c9e2..04e47e266f26 100644
--- a/drivers/fpga/dfl-pci.c
+++ b/drivers/fpga/dfl-pci.c
@@ -27,6 +27,14 @@
 #define DRV_VERSION	"0.8"
 #define DRV_NAME	"dfl-pci"
 
+#define PCI_VSEC_ID_INTEL_DFLS 0x43
+
+#define PCI_VNDR_DFLS_CNT 0x8
+#define PCI_VNDR_DFLS_RES 0xc
+
+#define PCI_VNDR_DFLS_RES_BAR_MASK GENMASK(2, 0)
+#define PCI_VNDR_DFLS_RES_OFF_MASK GENMASK(31, 3)
+
 struct cci_drvdata {
 	struct dfl_fpga_cdev *cdev;	/* container device */
 };
@@ -119,49 +127,94 @@ static int *cci_pci_create_irq_table(struct pci_dev *pcidev, unsigned int nvec)
 	return table;
 }
 
-/* enumerate feature devices under pci device */
-static int cci_enumerate_feature_devs(struct pci_dev *pcidev)
+static int find_dfls_by_vsec(struct pci_dev *pcidev, struct dfl_fpga_enum_info *info)
 {
-	struct cci_drvdata *drvdata = pci_get_drvdata(pcidev);
-	int port_num, bar, i, nvec, ret = 0;
-	struct dfl_fpga_enum_info *info;
-	struct dfl_fpga_cdev *cdev;
+	u32 bir, offset, vndr_hdr, dfl_cnt, dfl_res;
+	int dfl_res_off, i, bars, voff = 0;
 	resource_size_t start, len;
-	void __iomem *base;
-	int *irq_table;
-	u32 offset;
-	u64 v;
 
-	/* allocate enumeration info via pci_dev */
-	info = dfl_fpga_enum_info_alloc(&pcidev->dev);
-	if (!info)
-		return -ENOMEM;
+	while ((voff = pci_find_next_ext_capability(pcidev, voff, PCI_EXT_CAP_ID_VNDR))) {
+		vndr_hdr = 0;
+		pci_read_config_dword(pcidev, voff + PCI_VNDR_HEADER, &vndr_hdr);
 
-	/* add irq info for enumeration if the device support irq */
-	nvec = cci_pci_alloc_irq(pcidev);
-	if (nvec < 0) {
-		dev_err(&pcidev->dev, "Fail to alloc irq %d.\n", nvec);
-		ret = nvec;
-		goto enum_info_free_exit;
-	} else if (nvec) {
-		irq_table = cci_pci_create_irq_table(pcidev, nvec);
-		if (!irq_table) {
-			ret = -ENOMEM;
-			goto irq_free_exit;
+		if (PCI_VNDR_HEADER_ID(vndr_hdr) == PCI_VSEC_ID_INTEL_DFLS &&
+		    pcidev->vendor == PCI_VENDOR_ID_INTEL)
+			break;
+	}
+
+	if (!voff) {
+		dev_dbg(&pcidev->dev, "%s no DFL VSEC found\n", __func__);
+		return -ENODEV;
+	}
+
+	dfl_cnt = 0;
+	pci_read_config_dword(pcidev, voff + PCI_VNDR_DFLS_CNT, &dfl_cnt);
+	if (dfl_cnt > PCI_STD_NUM_BARS) {
+		dev_err(&pcidev->dev, "%s too many DFLs %d > %d\n",
+			__func__, dfl_cnt, PCI_STD_NUM_BARS);
+		return -EINVAL;
+	}
+
+	dfl_res_off = voff + PCI_VNDR_DFLS_RES;
+	if (dfl_res_off + (dfl_cnt * sizeof(u32)) > PCI_CFG_SPACE_EXP_SIZE) {
+		dev_err(&pcidev->dev, "%s DFL VSEC too big for PCIe config space\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	for (i = 0, bars = 0; i < dfl_cnt; i++, dfl_res_off += sizeof(u32)) {
+		dfl_res = GENMASK(31, 0);
+		pci_read_config_dword(pcidev, dfl_res_off, &dfl_res);
+
+		bir = dfl_res & PCI_VNDR_DFLS_RES_BAR_MASK;
+		if (bir >= PCI_STD_NUM_BARS) {
+			dev_err(&pcidev->dev, "%s bad bir number %d\n",
+				__func__, bir);
+			return -EINVAL;
 		}
 
-		ret = dfl_fpga_enum_info_add_irq(info, nvec, irq_table);
-		kfree(irq_table);
-		if (ret)
-			goto irq_free_exit;
+		if (bars & BIT(bir)) {
+			dev_err(&pcidev->dev, "%s DFL for BAR %d already specified\n",
+				__func__, bir);
+			return -EINVAL;
+		}
+
+		bars |= BIT(bir);
+
+		len = pci_resource_len(pcidev, bir);
+		offset = dfl_res & PCI_VNDR_DFLS_RES_OFF_MASK;
+		if (offset >= len) {
+			dev_err(&pcidev->dev, "%s bad offset %u >= %pa\n",
+				__func__, offset, &len);
+			return -EINVAL;
+		}
+
+		dev_dbg(&pcidev->dev, "%s BAR %d offset 0x%x\n", __func__, bir, offset);
+
+		len -= offset;
+
+		start = pci_resource_start(pcidev, bir) + offset;
+
+		dfl_fpga_enum_info_add_dfl(info, start, len);
 	}
 
-	/* start to find Device Feature List in Bar 0 */
+	return 0;
+}
+
+/* default method of finding dfls starting at offset 0 of bar 0 */
+static int find_dfls_by_default(struct pci_dev *pcidev,
+				struct dfl_fpga_enum_info *info)
+{
+	int port_num, bar, i, ret = 0;
+	resource_size_t start, len;
+	void __iomem *base;
+	u32 offset;
+	u64 v;
+
+	/* start to find Device Feature List from Bar 0 */
 	base = cci_pci_ioremap_bar0(pcidev);
-	if (!base) {
-		ret = -ENOMEM;
-		goto irq_free_exit;
-	}
+	if (!base)
+		return -ENOMEM;
 
 	/*
 	 * PF device has FME and Ports/AFUs, and VF device only has one
@@ -208,12 +261,54 @@ static int cci_enumerate_feature_devs(struct pci_dev *pcidev)
 		dfl_fpga_enum_info_add_dfl(info, start, len);
 	} else {
 		ret = -ENODEV;
-		goto irq_free_exit;
 	}
 
 	/* release I/O mappings for next step enumeration */
 	pcim_iounmap_regions(pcidev, BIT(0));
 
+	return ret;
+}
+
+/* enumerate feature devices under pci device */
+static int cci_enumerate_feature_devs(struct pci_dev *pcidev)
+{
+	struct cci_drvdata *drvdata = pci_get_drvdata(pcidev);
+	struct dfl_fpga_enum_info *info;
+	struct dfl_fpga_cdev *cdev;
+	int nvec, ret = 0;
+	int *irq_table;
+
+	/* allocate enumeration info via pci_dev */
+	info = dfl_fpga_enum_info_alloc(&pcidev->dev);
+	if (!info)
+		return -ENOMEM;
+
+	/* add irq info for enumeration if the device support irq */
+	nvec = cci_pci_alloc_irq(pcidev);
+	if (nvec < 0) {
+		dev_err(&pcidev->dev, "Fail to alloc irq %d.\n", nvec);
+		ret = nvec;
+		goto enum_info_free_exit;
+	} else if (nvec) {
+		irq_table = cci_pci_create_irq_table(pcidev, nvec);
+		if (!irq_table) {
+			ret = -ENOMEM;
+			goto irq_free_exit;
+		}
+
+		ret = dfl_fpga_enum_info_add_irq(info, nvec, irq_table);
+		kfree(irq_table);
+		if (ret)
+			goto irq_free_exit;
+	}
+
+	ret = find_dfls_by_vsec(pcidev, info);
+	if (ret == -ENODEV)
+		ret = find_dfls_by_default(pcidev, info);
+
+	if (ret)
+		goto irq_free_exit;
+
 	/* start enumeration with prepared enumeration information */
 	cdev = dfl_fpga_feature_devs_enumerate(info);
 	if (IS_ERR(cdev)) {
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index b450870b75ed..511b20ff35a3 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -10,6 +10,7 @@
  *   Wu Hao <hao.wu@intel.com>
  *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
  */
+#include <linux/dfl.h>
 #include <linux/fpga-dfl.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
@@ -298,8 +299,7 @@ static int dfl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	struct dfl_device *ddev = to_dfl_dev(dev);
 
-	/* The type has 4 valid bits and feature_id has 12 valid bits */
-	return add_uevent_var(env, "MODALIAS=dfl:t%01Xf%03X",
+	return add_uevent_var(env, "MODALIAS=dfl:t%04Xf%04X",
 			      ddev->type, ddev->feature_id);
 }
 
diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h
index 5dc758f655b7..2b82c96ba56c 100644
--- a/drivers/fpga/dfl.h
+++ b/drivers/fpga/dfl.h
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/iopoll.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/uuid.h>
@@ -516,88 +517,4 @@ long dfl_feature_ioctl_set_irq(struct platform_device *pdev,
 			       struct dfl_feature *feature,
 			       unsigned long arg);
 
-/**
- * enum dfl_id_type - define the DFL FIU types
- */
-enum dfl_id_type {
-	FME_ID,
-	PORT_ID,
-	DFL_ID_MAX,
-};
-
-/**
- * struct dfl_device_id -  dfl device identifier
- * @type: contains 4 bits DFL FIU type of the device. See enum dfl_id_type.
- * @feature_id: contains 12 bits feature identifier local to its DFL FIU type.
- * @driver_data: driver specific data.
- */
-struct dfl_device_id {
-	u8 type;
-	u16 feature_id;
-	unsigned long driver_data;
-};
-
-/**
- * struct dfl_device - represent an dfl device on dfl bus
- *
- * @dev: generic device interface.
- * @id: id of the dfl device.
- * @type: type of DFL FIU of the device. See enum dfl_id_type.
- * @feature_id: 16 bits feature identifier local to its DFL FIU type.
- * @mmio_res: mmio resource of this dfl device.
- * @irqs: list of Linux IRQ numbers of this dfl device.
- * @num_irqs: number of IRQs supported by this dfl device.
- * @cdev: pointer to DFL FPGA container device this dfl device belongs to.
- * @id_entry: matched id entry in dfl driver's id table.
- */
-struct dfl_device {
-	struct device dev;
-	int id;
-	u8 type;
-	u16 feature_id;
-	struct resource mmio_res;
-	int *irqs;
-	unsigned int num_irqs;
-	struct dfl_fpga_cdev *cdev;
-	const struct dfl_device_id *id_entry;
-};
-
-/**
- * struct dfl_driver - represent an dfl device driver
- *
- * @drv: driver model structure.
- * @id_table: pointer to table of device IDs the driver is interested in.
- *	      { } member terminated.
- * @probe: mandatory callback for device binding.
- * @remove: callback for device unbinding.
- */
-struct dfl_driver {
-	struct device_driver drv;
-	const struct dfl_device_id *id_table;
-
-	int (*probe)(struct dfl_device *dfl_dev);
-	void (*remove)(struct dfl_device *dfl_dev);
-};
-
-#define to_dfl_dev(d) container_of(d, struct dfl_device, dev)
-#define to_dfl_drv(d) container_of(d, struct dfl_driver, drv)
-
-/*
- * use a macro to avoid include chaining to get THIS_MODULE.
- */
-#define dfl_driver_register(drv) \
-	__dfl_driver_register(drv, THIS_MODULE)
-int __dfl_driver_register(struct dfl_driver *dfl_drv, struct module *owner);
-void dfl_driver_unregister(struct dfl_driver *dfl_drv);
-
-/*
- * module_dfl_driver() - Helper macro for drivers that don't do
- * anything special in module init/exit.  This eliminates a lot of
- * boilerplate.  Each module may only use this macro once, and
- * calling it replaces module_init() and module_exit().
- */
-#define module_dfl_driver(__dfl_driver) \
-	module_driver(__dfl_driver, dfl_driver_register, \
-		      dfl_driver_unregister)
-
 #endif /* __FPGA_DFL_H */
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index 2deccacc3aa7..e9266b2a357f 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -17,7 +17,7 @@ static DEFINE_IDA(fpga_bridge_ida);
 static struct class *fpga_bridge_class;
 
 /* Lock for adding/removing bridges to linked lists*/
-static spinlock_t bridge_list_lock;
+static DEFINE_SPINLOCK(bridge_list_lock);
 
 /**
  * fpga_bridge_enable - Enable transactions on the bridge
@@ -479,8 +479,6 @@ static void fpga_bridge_dev_release(struct device *dev)
 
 static int __init fpga_bridge_dev_init(void)
 {
-	spin_lock_init(&bridge_list_lock);
-
 	fpga_bridge_class = class_create(THIS_MODULE, "fpga_bridge");
 	if (IS_ERR(fpga_bridge_class))
 		return PTR_ERR(fpga_bridge_class);
diff --git a/drivers/greybus/es2.c b/drivers/greybus/es2.c
index 1df6ab5d339d..48ad154df3a7 100644
--- a/drivers/greybus/es2.c
+++ b/drivers/greybus/es2.c
@@ -567,12 +567,9 @@ static int cport_enable(struct gb_host_device *hd, u16 cport_id,
 			      USB_DIR_OUT | USB_TYPE_VENDOR |
 			      USB_RECIP_INTERFACE, cport_id, 0,
 			      req, sizeof(*req), ES2_USB_CTRL_TIMEOUT);
-	if (ret != sizeof(*req)) {
+	if (ret < 0) {
 		dev_err(&udev->dev, "failed to set cport flags for port %d\n",
 			cport_id);
-		if (ret >= 0)
-			ret = -EIO;
-
 		goto out;
 	}
 
@@ -961,12 +958,10 @@ static int arpc_send(struct es2_ap_dev *es2, struct arpc *rpc, int timeout)
 				 0, 0,
 				 rpc->req, le16_to_cpu(rpc->req->size),
 				 ES2_USB_CTRL_TIMEOUT);
-	if (retval != le16_to_cpu(rpc->req->size)) {
+	if (retval < 0) {
 		dev_err(&udev->dev,
 			"failed to send ARPC request %d: %d\n",
 			rpc->req->type, retval);
-		if (retval > 0)
-			retval = -EIO;
 		return retval;
 	}
 
diff --git a/drivers/greybus/greybus_trace.h b/drivers/greybus/greybus_trace.h
index 1bc9f1275c65..616a3bd61aa6 100644
--- a/drivers/greybus/greybus_trace.h
+++ b/drivers/greybus/greybus_trace.h
@@ -40,7 +40,7 @@ DECLARE_EVENT_CLASS(gb_message,
 		__entry->result = message->header->result;
 	),
 
-	TP_printk("size=%hu operation_id=0x%04x type=0x%02x result=0x%02x",
+	TP_printk("size=%u operation_id=0x%04x type=0x%02x result=0x%02x",
 		  __entry->size, __entry->operation_id,
 		  __entry->type, __entry->result)
 );
@@ -317,7 +317,7 @@ DECLARE_EVENT_CLASS(gb_interface,
 		__entry->mode_switch = intf->mode_switch;
 	),
 
-	TP_printk("intf_id=%hhu device_id=%hhu module_id=%hhu D=%d J=%d A=%d E=%d M=%d",
+	TP_printk("intf_id=%u device_id=%u module_id=%u D=%d J=%d A=%d E=%d M=%d",
 		__entry->id, __entry->device_id, __entry->module_id,
 		__entry->disconnected, __entry->ejected, __entry->active,
 		__entry->enabled, __entry->mode_switch)
@@ -391,7 +391,7 @@ DECLARE_EVENT_CLASS(gb_module,
 		__entry->disconnected = module->disconnected;
 	),
 
-	TP_printk("hd_bus_id=%d module_id=%hhu num_interfaces=%zu disconnected=%d",
+	TP_printk("hd_bus_id=%d module_id=%u num_interfaces=%zu disconnected=%d",
 		__entry->hd_bus_id, __entry->module_id,
 		__entry->num_interfaces, __entry->disconnected)
 );
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
index 8e19e8cdcce5..e0740c6dbd54 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -401,8 +401,9 @@ static const struct attribute_group *catu_groups[] = {
 
 static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
 {
-	return coresight_timeout(drvdata->base,
-				 CATU_STATUS, CATU_STATUS_READY, 1);
+	struct csdev_access *csa = &drvdata->csdev->access;
+
+	return coresight_timeout(csa, CATU_STATUS, CATU_STATUS_READY, 1);
 }
 
 static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
@@ -411,6 +412,7 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
 	u32 control, mode;
 	struct etr_buf *etr_buf = data;
 	struct device *dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	if (catu_wait_for_ready(drvdata))
 		dev_warn(dev, "Timeout while waiting for READY\n");
@@ -421,7 +423,7 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
 		return -EBUSY;
 	}
 
-	rc = coresight_claim_device_unlocked(drvdata->base);
+	rc = coresight_claim_device_unlocked(csdev);
 	if (rc)
 		return rc;
 
@@ -465,9 +467,10 @@ static int catu_disable_hw(struct catu_drvdata *drvdata)
 {
 	int rc = 0;
 	struct device *dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	catu_write_control(drvdata, 0);
-	coresight_disclaim_device_unlocked(drvdata->base);
+	coresight_disclaim_device_unlocked(csdev);
 	if (catu_wait_for_ready(drvdata)) {
 		dev_info(dev, "Timeout while waiting for READY\n");
 		rc = -EAGAIN;
@@ -551,6 +554,7 @@ static int catu_probe(struct amba_device *adev, const struct amba_id *id)
 	dev->platform_data = pdata;
 
 	drvdata->base = base;
+	catu_desc.access = CSDEV_ACCESS_IOMEM(base);
 	catu_desc.pdata = pdata;
 	catu_desc.dev = dev;
 	catu_desc.groups = catu_groups;
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 4ba801dffcb7..0062c8935653 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -145,30 +145,32 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	return -ENODEV;
 }
 
-static inline u32 coresight_read_claim_tags(void __iomem *base)
+static inline u32 coresight_read_claim_tags(struct coresight_device *csdev)
 {
-	return readl_relaxed(base + CORESIGHT_CLAIMCLR);
+	return csdev_access_relaxed_read32(&csdev->access, CORESIGHT_CLAIMCLR);
 }
 
-static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
+static inline bool coresight_is_claimed_self_hosted(struct coresight_device *csdev)
 {
-	return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
+	return coresight_read_claim_tags(csdev) == CORESIGHT_CLAIM_SELF_HOSTED;
 }
 
-static inline bool coresight_is_claimed_any(void __iomem *base)
+static inline bool coresight_is_claimed_any(struct coresight_device *csdev)
 {
-	return coresight_read_claim_tags(base) != 0;
+	return coresight_read_claim_tags(csdev) != 0;
 }
 
-static inline void coresight_set_claim_tags(void __iomem *base)
+static inline void coresight_set_claim_tags(struct coresight_device *csdev)
 {
-	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET);
+	csdev_access_relaxed_write32(&csdev->access, CORESIGHT_CLAIM_SELF_HOSTED,
+				     CORESIGHT_CLAIMSET);
 	isb();
 }
 
-static inline void coresight_clear_claim_tags(void __iomem *base)
+static inline void coresight_clear_claim_tags(struct coresight_device *csdev)
 {
-	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR);
+	csdev_access_relaxed_write32(&csdev->access, CORESIGHT_CLAIM_SELF_HOSTED,
+				     CORESIGHT_CLAIMCLR);
 	isb();
 }
 
@@ -182,27 +184,33 @@ static inline void coresight_clear_claim_tags(void __iomem *base)
  * Called with CS_UNLOCKed for the component.
  * Returns : 0 on success
  */
-int coresight_claim_device_unlocked(void __iomem *base)
+int coresight_claim_device_unlocked(struct coresight_device *csdev)
 {
-	if (coresight_is_claimed_any(base))
+	if (WARN_ON(!csdev))
+		return -EINVAL;
+
+	if (coresight_is_claimed_any(csdev))
 		return -EBUSY;
 
-	coresight_set_claim_tags(base);
-	if (coresight_is_claimed_self_hosted(base))
+	coresight_set_claim_tags(csdev);
+	if (coresight_is_claimed_self_hosted(csdev))
 		return 0;
 	/* There was a race setting the tags, clean up and fail */
-	coresight_clear_claim_tags(base);
+	coresight_clear_claim_tags(csdev);
 	return -EBUSY;
 }
 EXPORT_SYMBOL_GPL(coresight_claim_device_unlocked);
 
-int coresight_claim_device(void __iomem *base)
+int coresight_claim_device(struct coresight_device *csdev)
 {
 	int rc;
 
-	CS_UNLOCK(base);
-	rc = coresight_claim_device_unlocked(base);
-	CS_LOCK(base);
+	if (WARN_ON(!csdev))
+		return -EINVAL;
+
+	CS_UNLOCK(csdev->access.base);
+	rc = coresight_claim_device_unlocked(csdev);
+	CS_LOCK(csdev->access.base);
 
 	return rc;
 }
@@ -212,11 +220,14 @@ EXPORT_SYMBOL_GPL(coresight_claim_device);
  * coresight_disclaim_device_unlocked : Clear the claim tags for the device.
  * Called with CS_UNLOCKed for the component.
  */
-void coresight_disclaim_device_unlocked(void __iomem *base)
+void coresight_disclaim_device_unlocked(struct coresight_device *csdev)
 {
 
-	if (coresight_is_claimed_self_hosted(base))
-		coresight_clear_claim_tags(base);
+	if (WARN_ON(!csdev))
+		return;
+
+	if (coresight_is_claimed_self_hosted(csdev))
+		coresight_clear_claim_tags(csdev);
 	else
 		/*
 		 * The external agent may have not honoured our claim
@@ -227,11 +238,14 @@ void coresight_disclaim_device_unlocked(void __iomem *base)
 }
 EXPORT_SYMBOL_GPL(coresight_disclaim_device_unlocked);
 
-void coresight_disclaim_device(void __iomem *base)
+void coresight_disclaim_device(struct coresight_device *csdev)
 {
-	CS_UNLOCK(base);
-	coresight_disclaim_device_unlocked(base);
-	CS_LOCK(base);
+	if (WARN_ON(!csdev))
+		return;
+
+	CS_UNLOCK(csdev->access.base);
+	coresight_disclaim_device_unlocked(csdev);
+	CS_LOCK(csdev->access.base);
 }
 EXPORT_SYMBOL_GPL(coresight_disclaim_device);
 
@@ -1418,23 +1432,24 @@ static void coresight_remove_conns(struct coresight_device *csdev)
 }
 
 /**
- * coresight_timeout - loop until a bit has changed to a specific state.
- * @addr: base address of the area of interest.
- * @offset: address of a register, starting from @addr.
+ * coresight_timeout - loop until a bit has changed to a specific register
+ *			state.
+ * @csa: coresight device access for the device
+ * @offset: Offset of the register from the base of the device.
  * @position: the position of the bit of interest.
  * @value: the value the bit should have.
  *
  * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
  * TIMEOUT_US has elapsed, which ever happens first.
  */
-
-int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
+int coresight_timeout(struct csdev_access *csa, u32 offset,
+		      int position, int value)
 {
 	int i;
 	u32 val;
 
 	for (i = TIMEOUT_US; i > 0; i--) {
-		val = __raw_readl(addr + offset);
+		val = csdev_access_read32(csa, offset);
 		/* waiting on the bit to go from 0 to 1 */
 		if (value) {
 			if (val & BIT(position))
@@ -1458,6 +1473,48 @@ int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
 }
 EXPORT_SYMBOL_GPL(coresight_timeout);
 
+u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_relaxed_read32(&csdev->access, offset);
+}
+
+u32 coresight_read32(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_read32(&csdev->access, offset);
+}
+
+void coresight_relaxed_write32(struct coresight_device *csdev,
+			       u32 val, u32 offset)
+{
+	csdev_access_relaxed_write32(&csdev->access, val, offset);
+}
+
+void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset)
+{
+	csdev_access_write32(&csdev->access, val, offset);
+}
+
+u64 coresight_relaxed_read64(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_relaxed_read64(&csdev->access, offset);
+}
+
+u64 coresight_read64(struct coresight_device *csdev, u32 offset)
+{
+	return csdev_access_read64(&csdev->access, offset);
+}
+
+void coresight_relaxed_write64(struct coresight_device *csdev,
+			       u64 val, u32 offset)
+{
+	csdev_access_relaxed_write64(&csdev->access, val, offset);
+}
+
+void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset)
+{
+	csdev_access_write64(&csdev->access, val, offset);
+}
+
 /*
  * coresight_release_platform_data: Release references to the devices connected
  * to the output port of this device.
@@ -1522,6 +1579,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 	csdev->type = desc->type;
 	csdev->subtype = desc->subtype;
 	csdev->ops = desc->ops;
+	csdev->access = desc->access;
 	csdev->orphan = false;
 
 	csdev->dev.type = &coresight_dev_type[desc->type];
diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c
index 30e48809ba00..e2a3620cbf48 100644
--- a/drivers/hwtracing/coresight/coresight-cti-core.c
+++ b/drivers/hwtracing/coresight/coresight-cti-core.c
@@ -102,7 +102,7 @@ static int cti_enable_hw(struct cti_drvdata *drvdata)
 		goto cti_state_unchanged;
 
 	/* claim the device */
-	rc = coresight_claim_device(drvdata->base);
+	rc = coresight_claim_device(drvdata->csdev);
 	if (rc)
 		goto cti_err_not_enabled;
 
@@ -136,7 +136,7 @@ static void cti_cpuhp_enable_hw(struct cti_drvdata *drvdata)
 		goto cti_hp_not_enabled;
 
 	/* try to claim the device */
-	if (coresight_claim_device(drvdata->base))
+	if (coresight_claim_device(drvdata->csdev))
 		goto cti_hp_not_enabled;
 
 	cti_write_all_hw_regs(drvdata);
@@ -154,6 +154,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata)
 {
 	struct cti_config *config = &drvdata->config;
 	struct device *dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	spin_lock(&drvdata->spinlock);
 
@@ -171,7 +172,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata)
 	writel_relaxed(0, drvdata->base + CTICONTROL);
 	config->hw_enabled = false;
 
-	coresight_disclaim_device_unlocked(drvdata->base);
+	coresight_disclaim_device_unlocked(csdev);
 	CS_LOCK(drvdata->base);
 	spin_unlock(&drvdata->spinlock);
 	pm_runtime_put(dev);
@@ -655,6 +656,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
 			     void *v)
 {
 	struct cti_drvdata *drvdata;
+	struct coresight_device *csdev;
 	unsigned int cpu = smp_processor_id();
 	int notify_res = NOTIFY_OK;
 
@@ -662,6 +664,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
 		return NOTIFY_OK;
 
 	drvdata = cti_cpu_drvdata[cpu];
+	csdev = drvdata->csdev;
 
 	if (WARN_ON_ONCE(drvdata->ctidev.cpu != cpu))
 		return NOTIFY_BAD;
@@ -673,13 +676,13 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
 		/* CTI regs all static - we have a copy & nothing to save */
 		drvdata->config.hw_powered = false;
 		if (drvdata->config.hw_enabled)
-			coresight_disclaim_device(drvdata->base);
+			coresight_disclaim_device(csdev);
 		break;
 
 	case CPU_PM_ENTER_FAILED:
 		drvdata->config.hw_powered = true;
 		if (drvdata->config.hw_enabled) {
-			if (coresight_claim_device(drvdata->base))
+			if (coresight_claim_device(csdev))
 				drvdata->config.hw_enabled = false;
 		}
 		break;
@@ -692,7 +695,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
 		/* check enable reference count to enable HW */
 		if (atomic_read(&drvdata->config.enable_req_count)) {
 			/* check we can claim the device as we re-power */
-			if (coresight_claim_device(drvdata->base))
+			if (coresight_claim_device(csdev))
 				goto cti_notify_exit;
 
 			drvdata->config.hw_enabled = true;
@@ -736,7 +739,7 @@ static int cti_dying_cpu(unsigned int cpu)
 	spin_lock(&drvdata->spinlock);
 	drvdata->config.hw_powered = false;
 	if (drvdata->config.hw_enabled)
-		coresight_disclaim_device(drvdata->base);
+		coresight_disclaim_device(drvdata->csdev);
 	spin_unlock(&drvdata->spinlock);
 	return 0;
 }
@@ -868,6 +871,7 @@ static int cti_probe(struct amba_device *adev, const struct amba_id *id)
 		return PTR_ERR(base);
 
 	drvdata->base = base;
+	cti_desc.access = CSDEV_ACCESS_IOMEM(base);
 
 	dev_set_drvdata(dev, drvdata);
 
diff --git a/drivers/hwtracing/coresight/coresight-cti-platform.c b/drivers/hwtracing/coresight/coresight-cti-platform.c
index 98f830c6ed50..ccef04f27f12 100644
--- a/drivers/hwtracing/coresight/coresight-cti-platform.c
+++ b/drivers/hwtracing/coresight/coresight-cti-platform.c
@@ -343,7 +343,6 @@ static int cti_plat_create_connection(struct device *dev,
 {
 	struct cti_trig_con *tc = NULL;
 	int cpuid = -1, err = 0;
-	struct fwnode_handle *cs_fwnode = NULL;
 	struct coresight_device *csdev = NULL;
 	const char *assoc_name = "unknown";
 	char cpu_name_str[16];
@@ -397,8 +396,9 @@ static int cti_plat_create_connection(struct device *dev,
 		assoc_name = cpu_name_str;
 	} else {
 		/* associated device ? */
-		cs_fwnode = fwnode_find_reference(fwnode,
-						  CTI_DT_CSDEV_ASSOC, 0);
+		struct fwnode_handle *cs_fwnode = fwnode_find_reference(fwnode,
+									CTI_DT_CSDEV_ASSOC,
+									0);
 		if (!IS_ERR(cs_fwnode)) {
 			assoc_name = cti_plat_get_csdev_or_node_name(cs_fwnode,
 								     &csdev);
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 51c801c05e5c..f775cbee12b8 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -132,7 +132,7 @@ static void __etb_enable_hw(struct etb_drvdata *drvdata)
 
 static int etb_enable_hw(struct etb_drvdata *drvdata)
 {
-	int rc = coresight_claim_device(drvdata->base);
+	int rc = coresight_claim_device(drvdata->csdev);
 
 	if (rc)
 		return rc;
@@ -252,6 +252,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
 {
 	u32 ffcr;
 	struct device *dev = &drvdata->csdev->dev;
+	struct csdev_access *csa = &drvdata->csdev->access;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -263,7 +264,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
 	ffcr |= ETB_FFCR_FON_MAN;
 	writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
 
-	if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
+	if (coresight_timeout(csa, ETB_FFCR, ETB_FFCR_BIT, 0)) {
 		dev_err(dev,
 		"timeout while waiting for completion of Manual Flush\n");
 	}
@@ -271,7 +272,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
 	/* disable trace capture */
 	writel_relaxed(0x0, drvdata->base + ETB_CTL_REG);
 
-	if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
+	if (coresight_timeout(csa, ETB_FFSR, ETB_FFSR_BIT, 1)) {
 		dev_err(dev,
 			"timeout while waiting for Formatter to Stop\n");
 	}
@@ -344,7 +345,7 @@ static void etb_disable_hw(struct etb_drvdata *drvdata)
 {
 	__etb_disable_hw(drvdata);
 	etb_dump_hw(drvdata);
-	coresight_disclaim_device(drvdata->base);
+	coresight_disclaim_device(drvdata->csdev);
 }
 
 static int etb_disable(struct coresight_device *csdev)
@@ -757,6 +758,7 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id)
 		return PTR_ERR(base);
 
 	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
 
 	spin_lock_init(&drvdata->spinlock);
 
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index bdc34ca449f7..0f603b4094f2 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -27,17 +27,45 @@ static bool etm_perf_up;
 static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
 static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
 
-/* ETMv3.5/PTM's ETMCR is 'config' */
+/*
+ * The PMU formats were orignally for ETMv3.5/PTM's ETMCR 'config';
+ * now take them as general formats and apply on all ETMs.
+ */
 PMU_FORMAT_ATTR(cycacc,		"config:" __stringify(ETM_OPT_CYCACC));
-PMU_FORMAT_ATTR(contextid,	"config:" __stringify(ETM_OPT_CTXTID));
+/* contextid1 enables tracing CONTEXTIDR_EL1 for ETMv4 */
+PMU_FORMAT_ATTR(contextid1,	"config:" __stringify(ETM_OPT_CTXTID));
+/* contextid2 enables tracing CONTEXTIDR_EL2 for ETMv4 */
+PMU_FORMAT_ATTR(contextid2,	"config:" __stringify(ETM_OPT_CTXTID2));
 PMU_FORMAT_ATTR(timestamp,	"config:" __stringify(ETM_OPT_TS));
 PMU_FORMAT_ATTR(retstack,	"config:" __stringify(ETM_OPT_RETSTK));
 /* Sink ID - same for all ETMs */
 PMU_FORMAT_ATTR(sinkid,		"config2:0-31");
 
+/*
+ * contextid always traces the "PID".  The PID is in CONTEXTIDR_EL1
+ * when the kernel is running at EL1; when the kernel is at EL2,
+ * the PID is in CONTEXTIDR_EL2.
+ */
+static ssize_t format_attr_contextid_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *page)
+{
+	int pid_fmt = ETM_OPT_CTXTID;
+
+#if defined(CONFIG_CORESIGHT_SOURCE_ETM4X)
+	pid_fmt = is_kernel_in_hyp_mode() ? ETM_OPT_CTXTID2 : ETM_OPT_CTXTID;
+#endif
+	return sprintf(page, "config:%d\n", pid_fmt);
+}
+
+struct device_attribute format_attr_contextid =
+	__ATTR(contextid, 0444, format_attr_contextid_show, NULL);
+
 static struct attribute *etm_config_formats_attr[] = {
 	&format_attr_cycacc.attr,
 	&format_attr_contextid.attr,
+	&format_attr_contextid1.attr,
+	&format_attr_contextid2.attr,
 	&format_attr_timestamp.attr,
 	&format_attr_retstack.attr,
 	&format_attr_sinkid.attr,
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c
index 683a69e88efd..cf64ce73a741 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c
@@ -358,10 +358,11 @@ static int etm_enable_hw(struct etm_drvdata *drvdata)
 	int i, rc;
 	u32 etmcr;
 	struct etm_config *config = &drvdata->config;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	CS_UNLOCK(drvdata->base);
 
-	rc = coresight_claim_device_unlocked(drvdata->base);
+	rc = coresight_claim_device_unlocked(csdev);
 	if (rc)
 		goto done;
 
@@ -566,6 +567,7 @@ static void etm_disable_hw(void *info)
 	int i;
 	struct etm_drvdata *drvdata = info;
 	struct etm_config *config = &drvdata->config;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	CS_UNLOCK(drvdata->base);
 	etm_set_prog(drvdata);
@@ -577,7 +579,7 @@ static void etm_disable_hw(void *info)
 		config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i));
 
 	etm_set_pwrdwn(drvdata);
-	coresight_disclaim_device_unlocked(drvdata->base);
+	coresight_disclaim_device_unlocked(csdev);
 
 	CS_LOCK(drvdata->base);
 
@@ -602,7 +604,7 @@ static void etm_disable_perf(struct coresight_device *csdev)
 	 * power down the tracer.
 	 */
 	etm_set_pwrdwn(drvdata);
-	coresight_disclaim_device_unlocked(drvdata->base);
+	coresight_disclaim_device_unlocked(csdev);
 
 	CS_LOCK(drvdata->base);
 }
@@ -839,6 +841,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
 		return PTR_ERR(base);
 
 	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
 
 	spin_lock_init(&drvdata->spinlock);
 
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 82787cba537d..15016f757828 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
 
@@ -59,32 +60,99 @@ static u64 etm4_get_access_type(struct etmv4_config *config);
 
 static enum cpuhp_state hp_online;
 
-static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+struct etm4_init_arg {
+	unsigned int		pid;
+	struct etmv4_drvdata	*drvdata;
+	struct csdev_access	*csa;
+};
+
+/*
+ * Check if TRCSSPCICRn(i) is implemented for a given instance.
+ *
+ * TRCSSPCICRn is implemented only if :
+ *	TRCSSPCICR<n> is present only if all of the following are true:
+ *		TRCIDR4.NUMSSCC > n.
+ *		TRCIDR4.NUMPC > 0b0000 .
+ *		TRCSSCSR<n>.PC == 0b1
+ */
+static inline bool etm4x_sspcicrn_present(struct etmv4_drvdata *drvdata, int n)
+{
+	return (n < drvdata->nr_ss_cmp) &&
+	       drvdata->nr_pe &&
+	       (drvdata->config.ss_status[n] & TRCSSCSRn_PC);
+}
+
+u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit)
+{
+	u64 res = 0;
+
+	switch (offset) {
+	ETM4x_READ_SYSREG_CASES(res)
+	default :
+		pr_warn_ratelimited("etm4x: trying to read unsupported register @%x\n",
+			 offset);
+	}
+
+	if (!_relaxed)
+		__iormb(res);	/* Imitate the !relaxed I/O helpers */
+
+	return res;
+}
+
+void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit)
+{
+	if (!_relaxed)
+		__iowmb();	/* Imitate the !relaxed I/O helpers */
+	if (!_64bit)
+		val &= GENMASK(31, 0);
+
+	switch (offset) {
+	ETM4x_WRITE_SYSREG_CASES(val)
+	default :
+		pr_warn_ratelimited("etm4x: trying to write to unsupported register @%x\n",
+			offset);
+	}
+}
+
+static void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata, struct csdev_access *csa)
 {
 	/* Writing 0 to TRCOSLAR unlocks the trace registers */
-	writel_relaxed(0x0, drvdata->base + TRCOSLAR);
+	etm4x_relaxed_write32(csa, 0x0, TRCOSLAR);
 	drvdata->os_unlock = true;
 	isb();
 }
 
+static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+{
+	if (!WARN_ON(!drvdata->csdev))
+		etm4_os_unlock_csa(drvdata, &drvdata->csdev->access);
+
+}
+
 static void etm4_os_lock(struct etmv4_drvdata *drvdata)
 {
+	if (WARN_ON(!drvdata->csdev))
+		return;
+
 	/* Writing 0x1 to TRCOSLAR locks the trace registers */
-	writel_relaxed(0x1, drvdata->base + TRCOSLAR);
+	etm4x_relaxed_write32(&drvdata->csdev->access, 0x1, TRCOSLAR);
 	drvdata->os_unlock = false;
 	isb();
 }
 
-static bool etm4_arch_supported(u8 arch)
+static void etm4_cs_lock(struct etmv4_drvdata *drvdata,
+			 struct csdev_access *csa)
 {
-	/* Mask out the minor version number */
-	switch (arch & 0xf0) {
-	case ETM_ARCH_V4:
-		break;
-	default:
-		return false;
-	}
-	return true;
+	/* Software Lock is only accessible via memory mapped interface */
+	if (csa->io_mem)
+		CS_LOCK(csa->base);
+}
+
+static void etm4_cs_unlock(struct etmv4_drvdata *drvdata,
+			   struct csdev_access *csa)
+{
+	if (csa->io_mem)
+		CS_UNLOCK(csa->base);
 }
 
 static int etm4_cpu_id(struct coresight_device *csdev)
@@ -201,57 +269,64 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 {
 	int i, rc;
 	struct etmv4_config *config = &drvdata->config;
-	struct device *etm_dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
+	struct device *etm_dev = &csdev->dev;
+	struct csdev_access *csa = &csdev->access;
+
 
-	CS_UNLOCK(drvdata->base);
+	etm4_cs_unlock(drvdata, csa);
 	etm4_enable_arch_specific(drvdata);
 
 	etm4_os_unlock(drvdata);
 
-	rc = coresight_claim_device_unlocked(drvdata->base);
+	rc = coresight_claim_device_unlocked(csdev);
 	if (rc)
 		goto done;
 
 	/* Disable the trace unit before programming trace registers */
-	writel_relaxed(0, drvdata->base + TRCPRGCTLR);
+	etm4x_relaxed_write32(csa, 0, TRCPRGCTLR);
+
+	/*
+	 * If we use system instructions, we need to synchronize the
+	 * write to the TRCPRGCTLR, before accessing the TRCSTATR.
+	 * See ARM IHI0064F, section
+	 * "4.3.7 Synchronization of register updates"
+	 */
+	if (!csa->io_mem)
+		isb();
 
 	/* wait for TRCSTATR.IDLE to go up */
-	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
 		dev_err(etm_dev,
 			"timeout while waiting for Idle Trace Status\n");
 	if (drvdata->nr_pe)
-		writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR);
-	writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR);
+		etm4x_relaxed_write32(csa, config->pe_sel, TRCPROCSELR);
+	etm4x_relaxed_write32(csa, config->cfg, TRCCONFIGR);
 	/* nothing specific implemented */
-	writel_relaxed(0x0, drvdata->base + TRCAUXCTLR);
-	writel_relaxed(config->eventctrl0, drvdata->base + TRCEVENTCTL0R);
-	writel_relaxed(config->eventctrl1, drvdata->base + TRCEVENTCTL1R);
-	writel_relaxed(config->stall_ctrl, drvdata->base + TRCSTALLCTLR);
-	writel_relaxed(config->ts_ctrl, drvdata->base + TRCTSCTLR);
-	writel_relaxed(config->syncfreq, drvdata->base + TRCSYNCPR);
-	writel_relaxed(config->ccctlr, drvdata->base + TRCCCCTLR);
-	writel_relaxed(config->bb_ctrl, drvdata->base + TRCBBCTLR);
-	writel_relaxed(drvdata->trcid, drvdata->base + TRCTRACEIDR);
-	writel_relaxed(config->vinst_ctrl, drvdata->base + TRCVICTLR);
-	writel_relaxed(config->viiectlr, drvdata->base + TRCVIIECTLR);
-	writel_relaxed(config->vissctlr,
-		       drvdata->base + TRCVISSCTLR);
+	etm4x_relaxed_write32(csa, 0x0, TRCAUXCTLR);
+	etm4x_relaxed_write32(csa, config->eventctrl0, TRCEVENTCTL0R);
+	etm4x_relaxed_write32(csa, config->eventctrl1, TRCEVENTCTL1R);
+	if (drvdata->stallctl)
+		etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
+	etm4x_relaxed_write32(csa, config->ts_ctrl, TRCTSCTLR);
+	etm4x_relaxed_write32(csa, config->syncfreq, TRCSYNCPR);
+	etm4x_relaxed_write32(csa, config->ccctlr, TRCCCCTLR);
+	etm4x_relaxed_write32(csa, config->bb_ctrl, TRCBBCTLR);
+	etm4x_relaxed_write32(csa, drvdata->trcid, TRCTRACEIDR);
+	etm4x_relaxed_write32(csa, config->vinst_ctrl, TRCVICTLR);
+	etm4x_relaxed_write32(csa, config->viiectlr, TRCVIIECTLR);
+	etm4x_relaxed_write32(csa, config->vissctlr, TRCVISSCTLR);
 	if (drvdata->nr_pe_cmp)
-		writel_relaxed(config->vipcssctlr,
-			       drvdata->base + TRCVIPCSSCTLR);
+		etm4x_relaxed_write32(csa, config->vipcssctlr, TRCVIPCSSCTLR);
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
-		writel_relaxed(config->seq_ctrl[i],
-			       drvdata->base + TRCSEQEVRn(i));
-	writel_relaxed(config->seq_rst, drvdata->base + TRCSEQRSTEVR);
-	writel_relaxed(config->seq_state, drvdata->base + TRCSEQSTR);
-	writel_relaxed(config->ext_inp, drvdata->base + TRCEXTINSELR);
+		etm4x_relaxed_write32(csa, config->seq_ctrl[i], TRCSEQEVRn(i));
+	etm4x_relaxed_write32(csa, config->seq_rst, TRCSEQRSTEVR);
+	etm4x_relaxed_write32(csa, config->seq_state, TRCSEQSTR);
+	etm4x_relaxed_write32(csa, config->ext_inp, TRCEXTINSELR);
 	for (i = 0; i < drvdata->nr_cntr; i++) {
-		writel_relaxed(config->cntrldvr[i],
-			       drvdata->base + TRCCNTRLDVRn(i));
-		writel_relaxed(config->cntr_ctrl[i],
-			       drvdata->base + TRCCNTCTLRn(i));
-		writel_relaxed(config->cntr_val[i],
-			       drvdata->base + TRCCNTVRn(i));
+		etm4x_relaxed_write32(csa, config->cntrldvr[i], TRCCNTRLDVRn(i));
+		etm4x_relaxed_write32(csa, config->cntr_ctrl[i], TRCCNTCTLRn(i));
+		etm4x_relaxed_write32(csa, config->cntr_val[i], TRCCNTVRn(i));
 	}
 
 	/*
@@ -259,54 +334,52 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 	 * such start at 2.
 	 */
 	for (i = 2; i < drvdata->nr_resource * 2; i++)
-		writel_relaxed(config->res_ctrl[i],
-			       drvdata->base + TRCRSCTLRn(i));
+		etm4x_relaxed_write32(csa, config->res_ctrl[i], TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
 		/* always clear status bit on restart if using single-shot */
 		if (config->ss_ctrl[i] || config->ss_pe_cmp[i])
 			config->ss_status[i] &= ~BIT(31);
-		writel_relaxed(config->ss_ctrl[i],
-			       drvdata->base + TRCSSCCRn(i));
-		writel_relaxed(config->ss_status[i],
-			       drvdata->base + TRCSSCSRn(i));
-		writel_relaxed(config->ss_pe_cmp[i],
-			       drvdata->base + TRCSSPCICRn(i));
+		etm4x_relaxed_write32(csa, config->ss_ctrl[i], TRCSSCCRn(i));
+		etm4x_relaxed_write32(csa, config->ss_status[i], TRCSSCSRn(i));
+		if (etm4x_sspcicrn_present(drvdata, i))
+			etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
 	}
 	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
-		writeq_relaxed(config->addr_val[i],
-			       drvdata->base + TRCACVRn(i));
-		writeq_relaxed(config->addr_acc[i],
-			       drvdata->base + TRCACATRn(i));
+		etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
+		etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
 	}
 	for (i = 0; i < drvdata->numcidc; i++)
-		writeq_relaxed(config->ctxid_pid[i],
-			       drvdata->base + TRCCIDCVRn(i));
-	writel_relaxed(config->ctxid_mask0, drvdata->base + TRCCIDCCTLR0);
+		etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
+	etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
-		writel_relaxed(config->ctxid_mask1, drvdata->base + TRCCIDCCTLR1);
+		etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
 
 	for (i = 0; i < drvdata->numvmidc; i++)
-		writeq_relaxed(config->vmid_val[i],
-			       drvdata->base + TRCVMIDCVRn(i));
-	writel_relaxed(config->vmid_mask0, drvdata->base + TRCVMIDCCTLR0);
+		etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
+	etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
 	if (drvdata->numvmidc > 4)
-		writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1);
+		etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
 
 	if (!drvdata->skip_power_up) {
+		u32 trcpdcr = etm4x_relaxed_read32(csa, TRCPDCR);
+
 		/*
 		 * Request to keep the trace unit powered and also
 		 * emulation of powerdown
 		 */
-		writel_relaxed(readl_relaxed(drvdata->base + TRCPDCR) |
-			       TRCPDCR_PU, drvdata->base + TRCPDCR);
+		etm4x_relaxed_write32(csa, trcpdcr | TRCPDCR_PU, TRCPDCR);
 	}
 
 	/* Enable the trace unit */
-	writel_relaxed(1, drvdata->base + TRCPRGCTLR);
+	etm4x_relaxed_write32(csa, 1, TRCPRGCTLR);
+
+	/* Synchronize the register updates for sysreg access */
+	if (!csa->io_mem)
+		isb();
 
 	/* wait for TRCSTATR.IDLE to go back down to '0' */
-	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
 		dev_err(etm_dev,
 			"timeout while waiting for Idle Trace Status\n");
 
@@ -318,7 +391,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 	isb();
 
 done:
-	CS_LOCK(drvdata->base);
+	etm4_cs_lock(drvdata, csa);
 
 	dev_dbg(etm_dev, "cpu: %d enable smp call done: %d\n",
 		drvdata->cpu, rc);
@@ -477,6 +550,19 @@ static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
 		/* bit[6], Context ID tracing bit */
 		config->cfg |= BIT(ETM4_CFG_BIT_CTXTID);
 
+	/*
+	 * If set bit ETM_OPT_CTXTID2 in perf config, this asks to trace VMID
+	 * for recording CONTEXTIDR_EL2.  Do not enable VMID tracing if the
+	 * kernel is not running in EL2.
+	 */
+	if (attr->config & BIT(ETM_OPT_CTXTID2)) {
+		if (!is_kernel_in_hyp_mode()) {
+			ret = -EINVAL;
+			goto out;
+		}
+		config->cfg |= BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT);
+	}
+
 	/* return stack - enable if selected and supported */
 	if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack)
 		/* bit[12], Return stack enable bit */
@@ -570,20 +656,22 @@ static void etm4_disable_hw(void *info)
 	u32 control;
 	struct etmv4_drvdata *drvdata = info;
 	struct etmv4_config *config = &drvdata->config;
-	struct device *etm_dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
+	struct device *etm_dev = &csdev->dev;
+	struct csdev_access *csa = &csdev->access;
 	int i;
 
-	CS_UNLOCK(drvdata->base);
+	etm4_cs_unlock(drvdata, csa);
 	etm4_disable_arch_specific(drvdata);
 
 	if (!drvdata->skip_power_up) {
 		/* power can be removed from the trace unit now */
-		control = readl_relaxed(drvdata->base + TRCPDCR);
+		control = etm4x_relaxed_read32(csa, TRCPDCR);
 		control &= ~TRCPDCR_PU;
-		writel_relaxed(control, drvdata->base + TRCPDCR);
+		etm4x_relaxed_write32(csa, control, TRCPDCR);
 	}
 
-	control = readl_relaxed(drvdata->base + TRCPRGCTLR);
+	control = etm4x_relaxed_read32(csa, TRCPRGCTLR);
 
 	/* EN, bit[0] Trace unit enable bit */
 	control &= ~0x1;
@@ -595,29 +683,27 @@ static void etm4_disable_hw(void *info)
 	 */
 	dsb(sy);
 	isb();
-	writel_relaxed(control, drvdata->base + TRCPRGCTLR);
+	etm4x_relaxed_write32(csa, control, TRCPRGCTLR);
 
 	/* wait for TRCSTATR.PMSTABLE to go to '1' */
-	if (coresight_timeout(drvdata->base, TRCSTATR,
-			      TRCSTATR_PMSTABLE_BIT, 1))
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1))
 		dev_err(etm_dev,
 			"timeout while waiting for PM stable Trace Status\n");
 
 	/* read the status of the single shot comparators */
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
 		config->ss_status[i] =
-			readl_relaxed(drvdata->base + TRCSSCSRn(i));
+			etm4x_relaxed_read32(csa, TRCSSCSRn(i));
 	}
 
 	/* read back the current counter values */
 	for (i = 0; i < drvdata->nr_cntr; i++) {
 		config->cntr_val[i] =
-			readl_relaxed(drvdata->base + TRCCNTVRn(i));
+			etm4x_relaxed_read32(csa, TRCCNTVRn(i));
 	}
 
-	coresight_disclaim_device_unlocked(drvdata->base);
-
-	CS_LOCK(drvdata->base);
+	coresight_disclaim_device_unlocked(csdev);
+	etm4_cs_lock(drvdata, csa);
 
 	dev_dbg(&drvdata->csdev->dev,
 		"cpu: %d disable smp call done\n", drvdata->cpu);
@@ -641,7 +727,7 @@ static int etm4_disable_perf(struct coresight_device *csdev,
 	 * scheduled again.  Configuration of the start/stop logic happens in
 	 * function etm4_set_event_filters().
 	 */
-	control = readl_relaxed(drvdata->base + TRCVICTLR);
+	control = etm4x_relaxed_read32(&csdev->access, TRCVICTLR);
 	/* TRCVICTLR::SSSTATUS, bit[9] */
 	filters->ssstatus = (control & BIT(9));
 
@@ -712,24 +798,136 @@ static const struct coresight_ops etm4_cs_ops = {
 	.source_ops	= &etm4_source_ops,
 };
 
+static inline bool cpu_supports_sysreg_trace(void)
+{
+	u64 dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+
+	return ((dfr0 >> ID_AA64DFR0_TRACEVER_SHIFT) & 0xfUL) > 0;
+}
+
+static bool etm4_init_sysreg_access(struct etmv4_drvdata *drvdata,
+				    struct csdev_access *csa)
+{
+	u32 devarch;
+
+	if (!cpu_supports_sysreg_trace())
+		return false;
+
+	/*
+	 * ETMs implementing sysreg access must implement TRCDEVARCH.
+	 */
+	devarch = read_etm4x_sysreg_const_offset(TRCDEVARCH);
+	if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH)
+		return false;
+	*csa = (struct csdev_access) {
+		.io_mem	= false,
+		.read	= etm4x_sysreg_read,
+		.write	= etm4x_sysreg_write,
+	};
+
+	drvdata->arch = etm_devarch_to_arch(devarch);
+	return true;
+}
+
+static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
+				   struct csdev_access *csa)
+{
+	u32 devarch = readl_relaxed(drvdata->base + TRCDEVARCH);
+	u32 idr1 = readl_relaxed(drvdata->base + TRCIDR1);
+
+	/*
+	 * All ETMs must implement TRCDEVARCH to indicate that
+	 * the component is an ETMv4. To support any broken
+	 * implementations we fall back to TRCIDR1 check, which
+	 * is not really reliable.
+	 */
+	if ((devarch & ETM_DEVARCH_ID_MASK) == ETM_DEVARCH_ETMv4x_ARCH) {
+		drvdata->arch = etm_devarch_to_arch(devarch);
+	} else {
+		pr_warn("CPU%d: ETM4x incompatible TRCDEVARCH: %x, falling back to TRCIDR1\n",
+			smp_processor_id(), devarch);
+
+		if (ETM_TRCIDR1_ARCH_MAJOR(idr1) != ETM_TRCIDR1_ARCH_ETMv4)
+			return false;
+		drvdata->arch = etm_trcidr_to_arch(idr1);
+	}
+
+	*csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+	return true;
+}
+
+static bool etm4_init_csdev_access(struct etmv4_drvdata *drvdata,
+				   struct csdev_access *csa)
+{
+	/*
+	 * Always choose the memory mapped io, if there is
+	 * a memory map to prevent sysreg access on broken
+	 * systems.
+	 */
+	if (drvdata->base)
+		return etm4_init_iomem_access(drvdata, csa);
+
+	if (etm4_init_sysreg_access(drvdata, csa))
+		return true;
+
+	return false;
+}
+
+static void cpu_enable_tracing(void)
+{
+	u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+	u64 trfcr;
+
+	if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRACE_FILT_SHIFT))
+		return;
+
+	/*
+	 * If the CPU supports v8.4 SelfHosted Tracing, enable
+	 * tracing at the kernel EL and EL0, forcing to use the
+	 * virtual time as the timestamp.
+	 */
+	trfcr = (TRFCR_ELx_TS_VIRTUAL |
+		 TRFCR_ELx_ExTRE |
+		 TRFCR_ELx_E0TRE);
+
+	/* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */
+	if (is_kernel_in_hyp_mode())
+		trfcr |= TRFCR_EL2_CX;
+
+	write_sysreg_s(trfcr, SYS_TRFCR_EL1);
+}
+
 static void etm4_init_arch_data(void *info)
 {
 	u32 etmidr0;
-	u32 etmidr1;
 	u32 etmidr2;
 	u32 etmidr3;
 	u32 etmidr4;
 	u32 etmidr5;
-	struct etmv4_drvdata *drvdata = info;
+	struct etm4_init_arg *init_arg = info;
+	struct etmv4_drvdata *drvdata;
+	struct csdev_access *csa;
 	int i;
 
+	drvdata = init_arg->drvdata;
+	csa = init_arg->csa;
+
+	/*
+	 * If we are unable to detect the access mechanism,
+	 * or unable to detect the trace unit type, fail
+	 * early.
+	 */
+	if (!etm4_init_csdev_access(drvdata, csa))
+		return;
+
 	/* Make sure all registers are accessible */
-	etm4_os_unlock(drvdata);
+	etm4_os_unlock_csa(drvdata, csa);
+	etm4_cs_unlock(drvdata, csa);
 
-	CS_UNLOCK(drvdata->base);
+	etm4_check_arch_features(drvdata, init_arg->pid);
 
 	/* find all capabilities of the tracing unit */
-	etmidr0 = readl_relaxed(drvdata->base + TRCIDR0);
+	etmidr0 = etm4x_relaxed_read32(csa, TRCIDR0);
 
 	/* INSTP0, bits[2:1] P0 tracing support field */
 	if (BMVAL(etmidr0, 1, 1) && BMVAL(etmidr0, 2, 2))
@@ -768,17 +966,8 @@ static void etm4_init_arch_data(void *info)
 	/* TSSIZE, bits[28:24] Global timestamp size field */
 	drvdata->ts_size = BMVAL(etmidr0, 24, 28);
 
-	/* base architecture of trace unit */
-	etmidr1 = readl_relaxed(drvdata->base + TRCIDR1);
-	/*
-	 * TRCARCHMIN, bits[7:4] architecture the minor version number
-	 * TRCARCHMAJ, bits[11:8] architecture major versin number
-	 */
-	drvdata->arch = BMVAL(etmidr1, 4, 11);
-	drvdata->config.arch = drvdata->arch;
-
 	/* maximum size of resources */
-	etmidr2 = readl_relaxed(drvdata->base + TRCIDR2);
+	etmidr2 = etm4x_relaxed_read32(csa, TRCIDR2);
 	/* CIDSIZE, bits[9:5] Indicates the Context ID size */
 	drvdata->ctxid_size = BMVAL(etmidr2, 5, 9);
 	/* VMIDSIZE, bits[14:10] Indicates the VMID size */
@@ -786,11 +975,12 @@ static void etm4_init_arch_data(void *info)
 	/* CCSIZE, bits[28:25] size of the cycle counter in bits minus 12 */
 	drvdata->ccsize = BMVAL(etmidr2, 25, 28);
 
-	etmidr3 = readl_relaxed(drvdata->base + TRCIDR3);
+	etmidr3 = etm4x_relaxed_read32(csa, TRCIDR3);
 	/* CCITMIN, bits[11:0] minimum threshold value that can be programmed */
 	drvdata->ccitmin = BMVAL(etmidr3, 0, 11);
 	/* EXLEVEL_S, bits[19:16] Secure state instruction tracing */
 	drvdata->s_ex_level = BMVAL(etmidr3, 16, 19);
+	drvdata->config.s_ex_level = drvdata->s_ex_level;
 	/* EXLEVEL_NS, bits[23:20] Non-secure state instruction tracing */
 	drvdata->ns_ex_level = BMVAL(etmidr3, 20, 23);
 
@@ -836,7 +1026,7 @@ static void etm4_init_arch_data(void *info)
 		drvdata->nooverflow = false;
 
 	/* number of resources trace unit supports */
-	etmidr4 = readl_relaxed(drvdata->base + TRCIDR4);
+	etmidr4 = etm4x_relaxed_read32(csa, TRCIDR4);
 	/* NUMACPAIRS, bits[0:3] number of addr comparator pairs for tracing */
 	drvdata->nr_addr_cmp = BMVAL(etmidr4, 0, 3);
 	/* NUMPC, bits[15:12] number of PE comparator inputs for tracing */
@@ -852,7 +1042,7 @@ static void etm4_init_arch_data(void *info)
 	 * Otherwise for values 0x1 and above the number is N + 1 as per v4.2.
 	 */
 	drvdata->nr_resource = BMVAL(etmidr4, 16, 19);
-	if ((drvdata->arch < ETM4X_ARCH_4V3) || (drvdata->nr_resource > 0))
+	if ((drvdata->arch < ETM_ARCH_V4_3) || (drvdata->nr_resource > 0))
 		drvdata->nr_resource += 1;
 	/*
 	 * NUMSSCC, bits[23:20] the number of single-shot
@@ -862,14 +1052,14 @@ static void etm4_init_arch_data(void *info)
 	drvdata->nr_ss_cmp = BMVAL(etmidr4, 20, 23);
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
 		drvdata->config.ss_status[i] =
-			readl_relaxed(drvdata->base + TRCSSCSRn(i));
+			etm4x_relaxed_read32(csa, TRCSSCSRn(i));
 	}
 	/* NUMCIDC, bits[27:24] number of Context ID comparators for tracing */
 	drvdata->numcidc = BMVAL(etmidr4, 24, 27);
 	/* NUMVMIDC, bits[31:28] number of VMID comparators for tracing */
 	drvdata->numvmidc = BMVAL(etmidr4, 28, 31);
 
-	etmidr5 = readl_relaxed(drvdata->base + TRCIDR5);
+	etmidr5 = etm4x_relaxed_read32(csa, TRCIDR5);
 	/* NUMEXTIN, bits[8:0] number of external inputs implemented */
 	drvdata->nr_ext_inp = BMVAL(etmidr5, 0, 8);
 	/* TRACEIDSIZE, bits[21:16] indicates the trace ID width */
@@ -891,23 +1081,20 @@ static void etm4_init_arch_data(void *info)
 	drvdata->nrseqstate = BMVAL(etmidr5, 25, 27);
 	/* NUMCNTR, bits[30:28] number of counters available for tracing */
 	drvdata->nr_cntr = BMVAL(etmidr5, 28, 30);
-	CS_LOCK(drvdata->base);
+	etm4_cs_lock(drvdata, csa);
+	cpu_enable_tracing();
+}
+
+static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config)
+{
+	return etm4_get_access_type(config) << TRCVICTLR_EXLEVEL_SHIFT;
 }
 
 /* Set ELx trace filter access in the TRCVICTLR register */
 static void etm4_set_victlr_access(struct etmv4_config *config)
 {
-	u64 access_type;
-
-	config->vinst_ctrl &= ~(ETM_EXLEVEL_S_VICTLR_MASK | ETM_EXLEVEL_NS_VICTLR_MASK);
-
-	/*
-	 * TRCVICTLR::EXLEVEL_NS:EXLEVELS: Set kernel / user filtering
-	 * bits in vinst_ctrl, same bit pattern as TRCACATRn values returned by
-	 * etm4_get_access_type() but with a relative shift in this register.
-	 */
-	access_type = etm4_get_access_type(config) << ETM_EXLEVEL_LSHIFT_TRCVICTLR;
-	config->vinst_ctrl |= (u32)access_type;
+	config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_MASK;
+	config->vinst_ctrl |= etm4_get_victlr_access_type(config);
 }
 
 static void etm4_set_default_config(struct etmv4_config *config)
@@ -937,12 +1124,9 @@ static u64 etm4_get_ns_access_type(struct etmv4_config *config)
 	u64 access_type = 0;
 
 	/*
-	 * EXLEVEL_NS, bits[15:12]
-	 * The Exception levels are:
-	 *   Bit[12] Exception level 0 - Application
-	 *   Bit[13] Exception level 1 - OS
-	 *   Bit[14] Exception level 2 - Hypervisor
-	 *   Bit[15] Never implemented
+	 * EXLEVEL_NS, for NonSecure Exception levels.
+	 * The mask here is a generic value and must be
+	 * shifted to the corresponding field for the registers
 	 */
 	if (!is_kernel_in_hyp_mode()) {
 		/* Stay away from hypervisor mode for non-VHE */
@@ -959,27 +1143,26 @@ static u64 etm4_get_ns_access_type(struct etmv4_config *config)
 	return access_type;
 }
 
+/*
+ * Construct the exception level masks for a given config.
+ * This must be shifted to the corresponding register field
+ * for usage.
+ */
 static u64 etm4_get_access_type(struct etmv4_config *config)
 {
-	u64 access_type = etm4_get_ns_access_type(config);
-	u64 s_hyp = (config->arch & 0x0f) >= 0x4 ? ETM_EXLEVEL_S_HYP : 0;
-
-	/*
-	 * EXLEVEL_S, bits[11:8], don't trace anything happening
-	 * in secure state.
-	 */
-	access_type |= (ETM_EXLEVEL_S_APP	|
-			ETM_EXLEVEL_S_OS	|
-			s_hyp			|
-			ETM_EXLEVEL_S_MON);
+	/* All Secure exception levels are excluded from the trace */
+	return etm4_get_ns_access_type(config) | (u64)config->s_ex_level;
+}
 
-	return access_type;
+static u64 etm4_get_comparator_access_type(struct etmv4_config *config)
+{
+	return etm4_get_access_type(config) << TRCACATR_EXLEVEL_SHIFT;
 }
 
 static void etm4_set_comparator_filter(struct etmv4_config *config,
 				       u64 start, u64 stop, int comparator)
 {
-	u64 access_type = etm4_get_access_type(config);
+	u64 access_type = etm4_get_comparator_access_type(config);
 
 	/* First half of default address comparator */
 	config->addr_val[comparator] = start;
@@ -1014,7 +1197,7 @@ static void etm4_set_start_stop_filter(struct etmv4_config *config,
 				       enum etm_addr_type type)
 {
 	int shift;
-	u64 access_type = etm4_get_access_type(config);
+	u64 access_type = etm4_get_comparator_access_type(config);
 
 	/* Configure the comparator */
 	config->addr_val[comparator] = address;
@@ -1255,7 +1438,15 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 {
 	int i, ret = 0;
 	struct etmv4_save_state *state;
-	struct device *etm_dev = &drvdata->csdev->dev;
+	struct coresight_device *csdev = drvdata->csdev;
+	struct csdev_access *csa;
+	struct device *etm_dev;
+
+	if (WARN_ON(!csdev))
+		return -ENODEV;
+
+	etm_dev = &csdev->dev;
+	csa = &csdev->access;
 
 	/*
 	 * As recommended by 3.4.1 ("The procedure when powering down the PE")
@@ -1264,14 +1455,12 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	dsb(sy);
 	isb();
 
-	CS_UNLOCK(drvdata->base);
-
+	etm4_cs_unlock(drvdata, csa);
 	/* Lock the OS lock to disable trace and external debugger access */
 	etm4_os_lock(drvdata);
 
 	/* wait for TRCSTATR.PMSTABLE to go up */
-	if (coresight_timeout(drvdata->base, TRCSTATR,
-			      TRCSTATR_PMSTABLE_BIT, 1)) {
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1)) {
 		dev_err(etm_dev,
 			"timeout while waiting for PM Stable Status\n");
 		etm4_os_unlock(drvdata);
@@ -1281,55 +1470,57 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 
 	state = drvdata->save_state;
 
-	state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
+	state->trcprgctlr = etm4x_read32(csa, TRCPRGCTLR);
 	if (drvdata->nr_pe)
-		state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
-	state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
-	state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
-	state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
-	state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
-	state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
-	state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
-	state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
-	state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
-	state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
-	state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
-	state->trcqctlr = readl(drvdata->base + TRCQCTLR);
-
-	state->trcvictlr = readl(drvdata->base + TRCVICTLR);
-	state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
-	state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
+		state->trcprocselr = etm4x_read32(csa, TRCPROCSELR);
+	state->trcconfigr = etm4x_read32(csa, TRCCONFIGR);
+	state->trcauxctlr = etm4x_read32(csa, TRCAUXCTLR);
+	state->trceventctl0r = etm4x_read32(csa, TRCEVENTCTL0R);
+	state->trceventctl1r = etm4x_read32(csa, TRCEVENTCTL1R);
+	if (drvdata->stallctl)
+		state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
+	state->trctsctlr = etm4x_read32(csa, TRCTSCTLR);
+	state->trcsyncpr = etm4x_read32(csa, TRCSYNCPR);
+	state->trcccctlr = etm4x_read32(csa, TRCCCCTLR);
+	state->trcbbctlr = etm4x_read32(csa, TRCBBCTLR);
+	state->trctraceidr = etm4x_read32(csa, TRCTRACEIDR);
+	state->trcqctlr = etm4x_read32(csa, TRCQCTLR);
+
+	state->trcvictlr = etm4x_read32(csa, TRCVICTLR);
+	state->trcviiectlr = etm4x_read32(csa, TRCVIIECTLR);
+	state->trcvissctlr = etm4x_read32(csa, TRCVISSCTLR);
 	if (drvdata->nr_pe_cmp)
-		state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
-	state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
-	state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
-	state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
+		state->trcvipcssctlr = etm4x_read32(csa, TRCVIPCSSCTLR);
+	state->trcvdctlr = etm4x_read32(csa, TRCVDCTLR);
+	state->trcvdsacctlr = etm4x_read32(csa, TRCVDSACCTLR);
+	state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR);
 
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
-		state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
+		state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i));
 
-	state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
-	state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
-	state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
+	state->trcseqrstevr = etm4x_read32(csa, TRCSEQRSTEVR);
+	state->trcseqstr = etm4x_read32(csa, TRCSEQSTR);
+	state->trcextinselr = etm4x_read32(csa, TRCEXTINSELR);
 
 	for (i = 0; i < drvdata->nr_cntr; i++) {
-		state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
-		state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
-		state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
+		state->trccntrldvr[i] = etm4x_read32(csa, TRCCNTRLDVRn(i));
+		state->trccntctlr[i] = etm4x_read32(csa, TRCCNTCTLRn(i));
+		state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_resource * 2; i++)
-		state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
+		state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
-		state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
-		state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
-		state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
+		state->trcssccr[i] = etm4x_read32(csa, TRCSSCCRn(i));
+		state->trcsscsr[i] = etm4x_read32(csa, TRCSSCSRn(i));
+		if (etm4x_sspcicrn_present(drvdata, i))
+			state->trcsspcicr[i] = etm4x_read32(csa, TRCSSPCICRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-		state->trcacvr[i] = readq(drvdata->base + TRCACVRn(i));
-		state->trcacatr[i] = readq(drvdata->base + TRCACATRn(i));
+		state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
+		state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
 	}
 
 	/*
@@ -1340,25 +1531,26 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	 */
 
 	for (i = 0; i < drvdata->numcidc; i++)
-		state->trccidcvr[i] = readq(drvdata->base + TRCCIDCVRn(i));
+		state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
 
 	for (i = 0; i < drvdata->numvmidc; i++)
-		state->trcvmidcvr[i] = readq(drvdata->base + TRCVMIDCVRn(i));
+		state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
 
-	state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
+	state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
-		state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
+		state->trccidcctlr1 = etm4x_read32(csa, TRCCIDCCTLR1);
 
-	state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
+	state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR0);
 	if (drvdata->numvmidc > 4)
-		state->trcvmidcctlr1 = readl(drvdata->base + TRCVMIDCCTLR1);
+		state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR1);
 
-	state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
+	state->trcclaimset = etm4x_read32(csa, TRCCLAIMCLR);
 
-	state->trcpdcr = readl(drvdata->base + TRCPDCR);
+	if (!drvdata->skip_power_up)
+		state->trcpdcr = etm4x_read32(csa, TRCPDCR);
 
 	/* wait for TRCSTATR.IDLE to go up */
-	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) {
+	if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) {
 		dev_err(etm_dev,
 			"timeout while waiting for Idle Trace Status\n");
 		etm4_os_unlock(drvdata);
@@ -1373,11 +1565,11 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	 * potentially save power on systems that respect the TRCPDCR_PU
 	 * despite requesting software to save/restore state.
 	 */
-	writel_relaxed((state->trcpdcr & ~TRCPDCR_PU),
-			drvdata->base + TRCPDCR);
-
+	if (!drvdata->skip_power_up)
+		etm4x_relaxed_write32(csa, (state->trcpdcr & ~TRCPDCR_PU),
+				      TRCPDCR);
 out:
-	CS_LOCK(drvdata->base);
+	etm4_cs_lock(drvdata, csa);
 	return ret;
 }
 
@@ -1385,91 +1577,83 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 {
 	int i;
 	struct etmv4_save_state *state = drvdata->save_state;
+	struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+	struct csdev_access *csa = &tmp_csa;
 
-	CS_UNLOCK(drvdata->base);
-
-	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+	etm4_cs_unlock(drvdata, csa);
+	etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
 
-	writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
+	etm4x_relaxed_write32(csa, state->trcprgctlr, TRCPRGCTLR);
 	if (drvdata->nr_pe)
-		writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
-	writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
-	writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
-	writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
-	writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
-	writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
-	writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
-	writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
-	writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
-	writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
-	writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
-	writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
-
-	writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
-	writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
-	writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
+		etm4x_relaxed_write32(csa, state->trcprocselr, TRCPROCSELR);
+	etm4x_relaxed_write32(csa, state->trcconfigr, TRCCONFIGR);
+	etm4x_relaxed_write32(csa, state->trcauxctlr, TRCAUXCTLR);
+	etm4x_relaxed_write32(csa, state->trceventctl0r, TRCEVENTCTL0R);
+	etm4x_relaxed_write32(csa, state->trceventctl1r, TRCEVENTCTL1R);
+	if (drvdata->stallctl)
+		etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
+	etm4x_relaxed_write32(csa, state->trctsctlr, TRCTSCTLR);
+	etm4x_relaxed_write32(csa, state->trcsyncpr, TRCSYNCPR);
+	etm4x_relaxed_write32(csa, state->trcccctlr, TRCCCCTLR);
+	etm4x_relaxed_write32(csa, state->trcbbctlr, TRCBBCTLR);
+	etm4x_relaxed_write32(csa, state->trctraceidr, TRCTRACEIDR);
+	etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR);
+
+	etm4x_relaxed_write32(csa, state->trcvictlr, TRCVICTLR);
+	etm4x_relaxed_write32(csa, state->trcviiectlr, TRCVIIECTLR);
+	etm4x_relaxed_write32(csa, state->trcvissctlr, TRCVISSCTLR);
 	if (drvdata->nr_pe_cmp)
-		writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
-	writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
-	writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
-	writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
+		etm4x_relaxed_write32(csa, state->trcvipcssctlr, TRCVIPCSSCTLR);
+	etm4x_relaxed_write32(csa, state->trcvdctlr, TRCVDCTLR);
+	etm4x_relaxed_write32(csa, state->trcvdsacctlr, TRCVDSACCTLR);
+	etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR);
 
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
-		writel_relaxed(state->trcseqevr[i],
-			       drvdata->base + TRCSEQEVRn(i));
+		etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i));
 
-	writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
-	writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
-	writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
+	etm4x_relaxed_write32(csa, state->trcseqrstevr, TRCSEQRSTEVR);
+	etm4x_relaxed_write32(csa, state->trcseqstr, TRCSEQSTR);
+	etm4x_relaxed_write32(csa, state->trcextinselr, TRCEXTINSELR);
 
 	for (i = 0; i < drvdata->nr_cntr; i++) {
-		writel_relaxed(state->trccntrldvr[i],
-			       drvdata->base + TRCCNTRLDVRn(i));
-		writel_relaxed(state->trccntctlr[i],
-			       drvdata->base + TRCCNTCTLRn(i));
-		writel_relaxed(state->trccntvr[i],
-			       drvdata->base + TRCCNTVRn(i));
+		etm4x_relaxed_write32(csa, state->trccntrldvr[i], TRCCNTRLDVRn(i));
+		etm4x_relaxed_write32(csa, state->trccntctlr[i], TRCCNTCTLRn(i));
+		etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_resource * 2; i++)
-		writel_relaxed(state->trcrsctlr[i],
-			       drvdata->base + TRCRSCTLRn(i));
+		etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
-		writel_relaxed(state->trcssccr[i],
-			       drvdata->base + TRCSSCCRn(i));
-		writel_relaxed(state->trcsscsr[i],
-			       drvdata->base + TRCSSCSRn(i));
-		writel_relaxed(state->trcsspcicr[i],
-			       drvdata->base + TRCSSPCICRn(i));
+		etm4x_relaxed_write32(csa, state->trcssccr[i], TRCSSCCRn(i));
+		etm4x_relaxed_write32(csa, state->trcsscsr[i], TRCSSCSRn(i));
+		if (etm4x_sspcicrn_present(drvdata, i))
+			etm4x_relaxed_write32(csa, state->trcsspcicr[i], TRCSSPCICRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-		writeq_relaxed(state->trcacvr[i],
-			       drvdata->base + TRCACVRn(i));
-		writeq_relaxed(state->trcacatr[i],
-			       drvdata->base + TRCACATRn(i));
+		etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
+		etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
 	}
 
 	for (i = 0; i < drvdata->numcidc; i++)
-		writeq_relaxed(state->trccidcvr[i],
-			       drvdata->base + TRCCIDCVRn(i));
+		etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
 
 	for (i = 0; i < drvdata->numvmidc; i++)
-		writeq_relaxed(state->trcvmidcvr[i],
-			       drvdata->base + TRCVMIDCVRn(i));
+		etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
 
-	writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
+	etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
-		writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
+		etm4x_relaxed_write32(csa, state->trccidcctlr1, TRCCIDCCTLR1);
 
-	writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
+	etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR0);
 	if (drvdata->numvmidc > 4)
-		writel_relaxed(state->trcvmidcctlr1, drvdata->base + TRCVMIDCCTLR1);
+		etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR1);
 
-	writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+	etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
 
-	writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR);
+	if (!drvdata->skip_power_up)
+		etm4x_relaxed_write32(csa, state->trcpdcr, TRCPDCR);
 
 	drvdata->state_needs_restore = false;
 
@@ -1482,7 +1666,7 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 
 	/* Unlock the OS lock to re-enable trace and external debug access */
 	etm4_os_unlock(drvdata);
-	CS_LOCK(drvdata->base);
+	etm4_cs_lock(drvdata, csa);
 }
 
 static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
@@ -1569,15 +1753,13 @@ static void etm4_pm_clear(void)
 	}
 }
 
-static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
+static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
 {
 	int ret;
-	void __iomem *base;
-	struct device *dev = &adev->dev;
 	struct coresight_platform_data *pdata = NULL;
 	struct etmv4_drvdata *drvdata;
-	struct resource *res = &adev->res;
 	struct coresight_desc desc = { 0 };
+	struct etm4_init_arg init_arg = { 0 };
 
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
@@ -1596,14 +1778,6 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 			return -ENOMEM;
 	}
 
-	if (fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
-		drvdata->skip_power_up = true;
-
-	/* Validity for the resource is already checked by the AMBA core */
-	base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
 	drvdata->base = base;
 
 	spin_lock_init(&drvdata->spinlock);
@@ -1616,13 +1790,22 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 	if (!desc.name)
 		return -ENOMEM;
 
+	init_arg.drvdata = drvdata;
+	init_arg.csa = &desc.access;
+	init_arg.pid = etm_pid;
+
 	if (smp_call_function_single(drvdata->cpu,
-				etm4_init_arch_data,  drvdata, 1))
+				etm4_init_arch_data,  &init_arg, 1))
 		dev_err(dev, "ETM arch init failed\n");
 
-	if (etm4_arch_supported(drvdata->arch) == false)
+	if (!drvdata->arch)
 		return -EINVAL;
 
+	/* TRCPDCR is not accessible with system instructions. */
+	if (!desc.access.io_mem ||
+	    fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
+		drvdata->skip_power_up = true;
+
 	etm4_init_trace_id(drvdata);
 	etm4_set_default(&drvdata->config);
 
@@ -1630,7 +1813,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 	if (IS_ERR(pdata))
 		return PTR_ERR(pdata);
 
-	adev->dev.platform_data = pdata;
+	dev->platform_data = pdata;
 
 	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
 	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
@@ -1650,25 +1833,61 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 
 	etmdrvdata[drvdata->cpu] = drvdata;
 
-	pm_runtime_put(&adev->dev);
 	dev_info(&drvdata->csdev->dev, "CPU%d: ETM v%d.%d initialized\n",
-		 drvdata->cpu, drvdata->arch >> 4, drvdata->arch & 0xf);
+		 drvdata->cpu, ETM_ARCH_MAJOR_VERSION(drvdata->arch),
+		 ETM_ARCH_MINOR_VERSION(drvdata->arch));
 
 	if (boot_enable) {
 		coresight_enable(drvdata->csdev);
 		drvdata->boot_enable = true;
 	}
 
-	etm4_check_arch_features(drvdata, id->id);
-
 	return 0;
 }
 
+static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id)
+{
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct resource *res = &adev->res;
+	int ret;
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	ret = etm4_probe(dev, base, id->id);
+	if (!ret)
+		pm_runtime_put(&adev->dev);
+
+	return ret;
+}
+
+static int etm4_probe_platform_dev(struct platform_device *pdev)
+{
+	int ret;
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	/*
+	 * System register based devices could match the
+	 * HW by reading appropriate registers on the HW
+	 * and thus we could skip the PID.
+	 */
+	ret = etm4_probe(&pdev->dev, NULL, 0);
+
+	pm_runtime_put(&pdev->dev);
+	return ret;
+}
+
 static struct amba_cs_uci_id uci_id_etm4[] = {
 	{
 		/*  ETMv4 UCI data */
-		.devarch	= 0x47704a13,
-		.devarch_mask	= 0xfff0ffff,
+		.devarch	= ETM_DEVARCH_ETMv4x_ARCH,
+		.devarch_mask	= ETM_DEVARCH_ID_MASK,
 		.devtype	= 0x00000013,
 	}
 };
@@ -1680,15 +1899,12 @@ static void clear_etmdrvdata(void *info)
 	etmdrvdata[cpu] = NULL;
 }
 
-static void etm4_remove(struct amba_device *adev)
+static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
 {
-	struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev);
-
 	etm_perf_symlink(drvdata->csdev, false);
-
 	/*
-	 * Taking hotplug lock here to avoid racing between etm4_remove and
-	 * CPU hotplug call backs.
+	 * Taking hotplug lock here to avoid racing between etm4_remove_dev()
+	 * and CPU hotplug call backs.
 	 */
 	cpus_read_lock();
 	/*
@@ -1703,6 +1919,27 @@ static void etm4_remove(struct amba_device *adev)
 	cpus_read_unlock();
 
 	coresight_unregister(drvdata->csdev);
+
+	return 0;
+}
+
+static void __exit etm4_remove_amba(struct amba_device *adev)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+	if (drvdata)
+		etm4_remove_dev(drvdata);
+}
+
+static int __exit etm4_remove_platform_dev(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(&pdev->dev);
+
+	if (drvdata)
+		ret = etm4_remove_dev(drvdata);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
 }
 
 static const struct amba_id etm4_ids[] = {
@@ -1711,6 +1948,8 @@ static const struct amba_id etm4_ids[] = {
 	CS_AMBA_ID(0x000bb95a),			/* Cortex-A72 */
 	CS_AMBA_ID(0x000bb959),			/* Cortex-A73 */
 	CS_AMBA_UCI_ID(0x000bb9da, uci_id_etm4),/* Cortex-A35 */
+	CS_AMBA_UCI_ID(0x000bbd05, uci_id_etm4),/* Cortex-A55 */
+	CS_AMBA_UCI_ID(0x000bbd0a, uci_id_etm4),/* Cortex-A75 */
 	CS_AMBA_UCI_ID(0x000bbd0c, uci_id_etm4),/* Neoverse N1 */
 	CS_AMBA_UCI_ID(0x000f0205, uci_id_etm4),/* Qualcomm Kryo */
 	CS_AMBA_UCI_ID(0x000f0211, uci_id_etm4),/* Qualcomm Kryo */
@@ -1726,17 +1965,32 @@ static const struct amba_id etm4_ids[] = {
 
 MODULE_DEVICE_TABLE(amba, etm4_ids);
 
-static struct amba_driver etm4x_driver = {
+static struct amba_driver etm4x_amba_driver = {
 	.drv = {
 		.name   = "coresight-etm4x",
 		.owner  = THIS_MODULE,
 		.suppress_bind_attrs = true,
 	},
-	.probe		= etm4_probe,
-	.remove         = etm4_remove,
+	.probe		= etm4_probe_amba,
+	.remove         = etm4_remove_amba,
 	.id_table	= etm4_ids,
 };
 
+static const struct of_device_id etm4_sysreg_match[] = {
+	{ .compatible	= "arm,coresight-etm4x-sysreg" },
+	{}
+};
+
+static struct platform_driver etm4_platform_driver = {
+	.probe		= etm4_probe_platform_dev,
+	.remove		= etm4_remove_platform_dev,
+	.driver			= {
+		.name			= "coresight-etm4x",
+		.of_match_table		= etm4_sysreg_match,
+		.suppress_bind_attrs	= true,
+	},
+};
+
 static int __init etm4x_init(void)
 {
 	int ret;
@@ -1747,18 +2001,28 @@ static int __init etm4x_init(void)
 	if (ret)
 		return ret;
 
-	ret = amba_driver_register(&etm4x_driver);
+	ret = amba_driver_register(&etm4x_amba_driver);
 	if (ret) {
-		pr_err("Error registering etm4x driver\n");
-		etm4_pm_clear();
+		pr_err("Error registering etm4x AMBA driver\n");
+		goto clear_pm;
 	}
 
+	ret = platform_driver_register(&etm4_platform_driver);
+	if (!ret)
+		return 0;
+
+	pr_err("Error registering etm4x platform driver\n");
+	amba_driver_unregister(&etm4x_amba_driver);
+
+clear_pm:
+	etm4_pm_clear();
 	return ret;
 }
 
 static void __exit etm4x_exit(void)
 {
-	amba_driver_unregister(&etm4x_driver);
+	amba_driver_unregister(&etm4x_amba_driver);
+	platform_driver_unregister(&etm4_platform_driver);
 	etm4_pm_clear();
 }
 
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 989ce7b8ade7..0995a10790f4 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -389,7 +389,7 @@ static ssize_t mode_store(struct device *dev,
 		config->eventctrl1 &= ~BIT(12);
 
 	/* bit[8], Instruction stall bit */
-	if (config->mode & ETM_MODE_ISTALL_EN)
+	if ((config->mode & ETM_MODE_ISTALL_EN) && (drvdata->stallctl == true))
 		config->stall_ctrl |= BIT(8);
 	else
 		config->stall_ctrl &= ~BIT(8);
@@ -743,7 +743,7 @@ static ssize_t s_exlevel_vinst_show(struct device *dev,
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	struct etmv4_config *config = &drvdata->config;
 
-	val = (config->vinst_ctrl & ETM_EXLEVEL_S_VICTLR_MASK) >> 16;
+	val = (config->vinst_ctrl & TRCVICTLR_EXLEVEL_S_MASK) >> TRCVICTLR_EXLEVEL_S_SHIFT;
 	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
 }
 
@@ -760,10 +760,10 @@ static ssize_t s_exlevel_vinst_store(struct device *dev,
 
 	spin_lock(&drvdata->spinlock);
 	/* clear all EXLEVEL_S bits  */
-	config->vinst_ctrl &= ~(ETM_EXLEVEL_S_VICTLR_MASK);
+	config->vinst_ctrl &= ~(TRCVICTLR_EXLEVEL_S_MASK);
 	/* enable instruction tracing for corresponding exception level */
 	val &= drvdata->s_ex_level;
-	config->vinst_ctrl |= (val << 16);
+	config->vinst_ctrl |= (val << TRCVICTLR_EXLEVEL_S_SHIFT);
 	spin_unlock(&drvdata->spinlock);
 	return size;
 }
@@ -778,7 +778,7 @@ static ssize_t ns_exlevel_vinst_show(struct device *dev,
 	struct etmv4_config *config = &drvdata->config;
 
 	/* EXLEVEL_NS, bits[23:20] */
-	val = (config->vinst_ctrl & ETM_EXLEVEL_NS_VICTLR_MASK) >> 20;
+	val = (config->vinst_ctrl & TRCVICTLR_EXLEVEL_NS_MASK) >> TRCVICTLR_EXLEVEL_NS_SHIFT;
 	return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
 }
 
@@ -795,10 +795,10 @@ static ssize_t ns_exlevel_vinst_store(struct device *dev,
 
 	spin_lock(&drvdata->spinlock);
 	/* clear EXLEVEL_NS bits  */
-	config->vinst_ctrl &= ~(ETM_EXLEVEL_NS_VICTLR_MASK);
+	config->vinst_ctrl &= ~(TRCVICTLR_EXLEVEL_NS_MASK);
 	/* enable instruction tracing for corresponding exception level */
 	val &= drvdata->ns_ex_level;
-	config->vinst_ctrl |= (val << 20);
+	config->vinst_ctrl |= (val << TRCVICTLR_EXLEVEL_NS_SHIFT);
 	spin_unlock(&drvdata->spinlock);
 	return size;
 }
@@ -2319,7 +2319,8 @@ static struct attribute *coresight_etmv4_attrs[] = {
 };
 
 struct etmv4_reg {
-	void __iomem *addr;
+	struct coresight_device *csdev;
+	u32 offset;
 	u32 data;
 };
 
@@ -2327,15 +2328,16 @@ static void do_smp_cross_read(void *data)
 {
 	struct etmv4_reg *reg = data;
 
-	reg->data = readl_relaxed(reg->addr);
+	reg->data = etm4x_relaxed_read32(&reg->csdev->access, reg->offset);
 }
 
-static u32 etmv4_cross_read(const struct device *dev, u32 offset)
+static u32 etmv4_cross_read(const struct etmv4_drvdata *drvdata, u32 offset)
 {
-	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
 	struct etmv4_reg reg;
 
-	reg.addr = drvdata->base + offset;
+	reg.offset = offset;
+	reg.csdev = drvdata->csdev;
+
 	/*
 	 * smp cross call ensures the CPU will be powered up before
 	 * accessing the ETMv4 trace core registers
@@ -2344,72 +2346,120 @@ static u32 etmv4_cross_read(const struct device *dev, u32 offset)
 	return reg.data;
 }
 
-#define coresight_etm4x_reg(name, offset)			\
-	coresight_simple_reg32(struct etmv4_drvdata, name, offset)
+static inline u32 coresight_etm4x_attr_to_offset(struct device_attribute *attr)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	return (u32)(unsigned long)eattr->var;
+}
+
+static ssize_t coresight_etm4x_reg_show(struct device *dev,
+					struct device_attribute *d_attr,
+					char *buf)
+{
+	u32 val, offset;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
 
-#define coresight_etm4x_cross_read(name, offset)			\
-	coresight_simple_func(struct etmv4_drvdata, etmv4_cross_read,	\
-			      name, offset)
+	offset = coresight_etm4x_attr_to_offset(d_attr);
 
-coresight_etm4x_reg(trcpdcr, TRCPDCR);
-coresight_etm4x_reg(trcpdsr, TRCPDSR);
-coresight_etm4x_reg(trclsr, TRCLSR);
-coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS);
-coresight_etm4x_reg(trcdevid, TRCDEVID);
-coresight_etm4x_reg(trcdevtype, TRCDEVTYPE);
-coresight_etm4x_reg(trcpidr0, TRCPIDR0);
-coresight_etm4x_reg(trcpidr1, TRCPIDR1);
-coresight_etm4x_reg(trcpidr2, TRCPIDR2);
-coresight_etm4x_reg(trcpidr3, TRCPIDR3);
-coresight_etm4x_cross_read(trcoslsr, TRCOSLSR);
-coresight_etm4x_cross_read(trcconfig, TRCCONFIGR);
-coresight_etm4x_cross_read(trctraceid, TRCTRACEIDR);
+	pm_runtime_get_sync(dev->parent);
+	val = etmv4_cross_read(drvdata, offset);
+	pm_runtime_put_sync(dev->parent);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", val);
+}
+
+static inline bool
+etm4x_register_implemented(struct etmv4_drvdata *drvdata, u32 offset)
+{
+	switch (offset) {
+	ETM4x_SYSREG_LIST_CASES
+		/*
+		 * Registers accessible via system instructions are always
+		 * implemented.
+		 */
+		return true;
+	ETM4x_MMAP_LIST_CASES
+		/*
+		 * Registers accessible only via memory-mapped registers
+		 * must not be accessed via system instructions.
+		 * We cannot access the drvdata->csdev here, as this
+		 * function is called during the device creation, via
+		 * coresight_register() and the csdev is not initialized
+		 * until that is done. So rely on the drvdata->base to
+		 * detect if we have a memory mapped access.
+		 */
+		return !!drvdata->base;
+	}
+
+	return false;
+}
+
+/*
+ * Hide the ETM4x registers that may not be available on the
+ * hardware.
+ * There are certain management registers unavailable via system
+ * instructions. Make those sysfs attributes hidden on such
+ * systems.
+ */
+static umode_t
+coresight_etm4x_attr_reg_implemented(struct kobject *kobj,
+				     struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+	struct device_attribute *d_attr;
+	u32 offset;
+
+	d_attr = container_of(attr, struct device_attribute, attr);
+	offset = coresight_etm4x_attr_to_offset(d_attr);
+
+	if (etm4x_register_implemented(drvdata, offset))
+		return attr->mode;
+	return 0;
+}
+
+#define coresight_etm4x_reg(name, offset)				\
+	&((struct dev_ext_attribute[]) {				\
+	   {								\
+		__ATTR(name, 0444, coresight_etm4x_reg_show, NULL),	\
+		(void *)(unsigned long)offset				\
+	   }								\
+	})[0].attr.attr
 
 static struct attribute *coresight_etmv4_mgmt_attrs[] = {
-	&dev_attr_trcoslsr.attr,
-	&dev_attr_trcpdcr.attr,
-	&dev_attr_trcpdsr.attr,
-	&dev_attr_trclsr.attr,
-	&dev_attr_trcconfig.attr,
-	&dev_attr_trctraceid.attr,
-	&dev_attr_trcauthstatus.attr,
-	&dev_attr_trcdevid.attr,
-	&dev_attr_trcdevtype.attr,
-	&dev_attr_trcpidr0.attr,
-	&dev_attr_trcpidr1.attr,
-	&dev_attr_trcpidr2.attr,
-	&dev_attr_trcpidr3.attr,
+	coresight_etm4x_reg(trcpdcr, TRCPDCR),
+	coresight_etm4x_reg(trcpdsr, TRCPDSR),
+	coresight_etm4x_reg(trclsr, TRCLSR),
+	coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS),
+	coresight_etm4x_reg(trcdevid, TRCDEVID),
+	coresight_etm4x_reg(trcdevtype, TRCDEVTYPE),
+	coresight_etm4x_reg(trcpidr0, TRCPIDR0),
+	coresight_etm4x_reg(trcpidr1, TRCPIDR1),
+	coresight_etm4x_reg(trcpidr2, TRCPIDR2),
+	coresight_etm4x_reg(trcpidr3, TRCPIDR3),
+	coresight_etm4x_reg(trcoslsr, TRCOSLSR),
+	coresight_etm4x_reg(trcconfig, TRCCONFIGR),
+	coresight_etm4x_reg(trctraceid, TRCTRACEIDR),
+	coresight_etm4x_reg(trcdevarch, TRCDEVARCH),
 	NULL,
 };
 
-coresight_etm4x_cross_read(trcidr0, TRCIDR0);
-coresight_etm4x_cross_read(trcidr1, TRCIDR1);
-coresight_etm4x_cross_read(trcidr2, TRCIDR2);
-coresight_etm4x_cross_read(trcidr3, TRCIDR3);
-coresight_etm4x_cross_read(trcidr4, TRCIDR4);
-coresight_etm4x_cross_read(trcidr5, TRCIDR5);
-/* trcidr[6,7] are reserved */
-coresight_etm4x_cross_read(trcidr8, TRCIDR8);
-coresight_etm4x_cross_read(trcidr9, TRCIDR9);
-coresight_etm4x_cross_read(trcidr10, TRCIDR10);
-coresight_etm4x_cross_read(trcidr11, TRCIDR11);
-coresight_etm4x_cross_read(trcidr12, TRCIDR12);
-coresight_etm4x_cross_read(trcidr13, TRCIDR13);
-
 static struct attribute *coresight_etmv4_trcidr_attrs[] = {
-	&dev_attr_trcidr0.attr,
-	&dev_attr_trcidr1.attr,
-	&dev_attr_trcidr2.attr,
-	&dev_attr_trcidr3.attr,
-	&dev_attr_trcidr4.attr,
-	&dev_attr_trcidr5.attr,
+	coresight_etm4x_reg(trcidr0, TRCIDR0),
+	coresight_etm4x_reg(trcidr1, TRCIDR1),
+	coresight_etm4x_reg(trcidr2, TRCIDR2),
+	coresight_etm4x_reg(trcidr3, TRCIDR3),
+	coresight_etm4x_reg(trcidr4, TRCIDR4),
+	coresight_etm4x_reg(trcidr5, TRCIDR5),
 	/* trcidr[6,7] are reserved */
-	&dev_attr_trcidr8.attr,
-	&dev_attr_trcidr9.attr,
-	&dev_attr_trcidr10.attr,
-	&dev_attr_trcidr11.attr,
-	&dev_attr_trcidr12.attr,
-	&dev_attr_trcidr13.attr,
+	coresight_etm4x_reg(trcidr8, TRCIDR8),
+	coresight_etm4x_reg(trcidr9, TRCIDR9),
+	coresight_etm4x_reg(trcidr10, TRCIDR10),
+	coresight_etm4x_reg(trcidr11, TRCIDR11),
+	coresight_etm4x_reg(trcidr12, TRCIDR12),
+	coresight_etm4x_reg(trcidr13, TRCIDR13),
 	NULL,
 };
 
@@ -2418,6 +2468,7 @@ static const struct attribute_group coresight_etmv4_group = {
 };
 
 static const struct attribute_group coresight_etmv4_mgmt_group = {
+	.is_visible = coresight_etm4x_attr_reg_implemented,
 	.attrs = coresight_etmv4_mgmt_attrs,
 	.name = "mgmt",
 };
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 3dd3e0633328..0af60571aa23 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -45,13 +45,13 @@
 #define TRCVDSACCTLR			0x0A4
 #define TRCVDARCCTLR			0x0A8
 /* Derived resources registers */
-#define TRCSEQEVRn(n)			(0x100 + (n * 4))
+#define TRCSEQEVRn(n)			(0x100 + (n * 4)) /* n = 0-2 */
 #define TRCSEQRSTEVR			0x118
 #define TRCSEQSTR			0x11C
 #define TRCEXTINSELR			0x120
-#define TRCCNTRLDVRn(n)			(0x140 + (n * 4))
-#define TRCCNTCTLRn(n)			(0x150 + (n * 4))
-#define TRCCNTVRn(n)			(0x160 + (n * 4))
+#define TRCCNTRLDVRn(n)			(0x140 + (n * 4)) /* n = 0-3 */
+#define TRCCNTCTLRn(n)			(0x150 + (n * 4)) /* n = 0-3 */
+#define TRCCNTVRn(n)			(0x160 + (n * 4)) /* n = 0-3 */
 /* ID registers */
 #define TRCIDR8				0x180
 #define TRCIDR9				0x184
@@ -60,7 +60,7 @@
 #define TRCIDR12			0x190
 #define TRCIDR13			0x194
 #define TRCIMSPEC0			0x1C0
-#define TRCIMSPECn(n)			(0x1C0 + (n * 4))
+#define TRCIMSPECn(n)			(0x1C0 + (n * 4)) /* n = 1-7 */
 #define TRCIDR0				0x1E0
 #define TRCIDR1				0x1E4
 #define TRCIDR2				0x1E8
@@ -69,9 +69,12 @@
 #define TRCIDR5				0x1F4
 #define TRCIDR6				0x1F8
 #define TRCIDR7				0x1FC
-/* Resource selection registers */
+/*
+ * Resource selection registers, n = 2-31.
+ * First pair (regs 0, 1) is always present and is reserved.
+ */
 #define TRCRSCTLRn(n)			(0x200 + (n * 4))
-/* Single-shot comparator registers */
+/* Single-shot comparator registers, n = 0-7 */
 #define TRCSSCCRn(n)			(0x280 + (n * 4))
 #define TRCSSCSRn(n)			(0x2A0 + (n * 4))
 #define TRCSSPCICRn(n)			(0x2C0 + (n * 4))
@@ -81,11 +84,13 @@
 #define TRCPDCR				0x310
 #define TRCPDSR				0x314
 /* Trace registers (0x318-0xEFC) */
-/* Comparator registers */
+/* Address Comparator registers n = 0-15 */
 #define TRCACVRn(n)			(0x400 + (n * 8))
 #define TRCACATRn(n)			(0x480 + (n * 8))
+/* Data Value Comparator Value registers, n = 0-7 */
 #define TRCDVCVRn(n)			(0x500 + (n * 16))
 #define TRCDVCMRn(n)			(0x580 + (n * 16))
+/* ContextID/Virtual ContextID comparators, n = 0-7 */
 #define TRCCIDCVRn(n)			(0x600 + (n * 8))
 #define TRCVMIDCVRn(n)			(0x640 + (n * 8))
 #define TRCCIDCCTLR0			0x680
@@ -121,6 +126,332 @@
 #define TRCCIDR2			0xFF8
 #define TRCCIDR3			0xFFC
 
+/*
+ * System instructions to access ETM registers.
+ * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions
+ */
+#define ETM4x_OFFSET_TO_REG(x)		((x) >> 2)
+
+#define ETM4x_CRn(n)			(((n) >> 7) & 0x7)
+#define ETM4x_Op2(n)			(((n) >> 4) & 0x7)
+#define ETM4x_CRm(n)			((n) & 0xf)
+
+#include <asm/sysreg.h>
+#define ETM4x_REG_NUM_TO_SYSREG(n)				\
+	sys_reg(2, 1, ETM4x_CRn(n), ETM4x_CRm(n), ETM4x_Op2(n))
+
+#define READ_ETM4x_REG(reg)					\
+	read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
+#define WRITE_ETM4x_REG(val, reg)				\
+	write_sysreg_s(val, ETM4x_REG_NUM_TO_SYSREG((reg)))
+
+#define read_etm4x_sysreg_const_offset(offset)			\
+	READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
+
+#define write_etm4x_sysreg_const_offset(val, offset)		\
+	WRITE_ETM4x_REG(val, ETM4x_OFFSET_TO_REG(offset))
+
+#define CASE_READ(res, x)					\
+	case (x): { (res) = read_etm4x_sysreg_const_offset((x)); break; }
+
+#define CASE_WRITE(val, x)					\
+	case (x): { write_etm4x_sysreg_const_offset((val), (x)); break; }
+
+#define CASE_NOP(__unused, x)					\
+	case (x):	/* fall through */
+
+/* List of registers accessible via System instructions */
+#define ETM_SYSREG_LIST(op, val)		\
+	CASE_##op((val), TRCPRGCTLR)		\
+	CASE_##op((val), TRCPROCSELR)		\
+	CASE_##op((val), TRCSTATR)		\
+	CASE_##op((val), TRCCONFIGR)		\
+	CASE_##op((val), TRCAUXCTLR)		\
+	CASE_##op((val), TRCEVENTCTL0R)		\
+	CASE_##op((val), TRCEVENTCTL1R)		\
+	CASE_##op((val), TRCSTALLCTLR)		\
+	CASE_##op((val), TRCTSCTLR)		\
+	CASE_##op((val), TRCSYNCPR)		\
+	CASE_##op((val), TRCCCCTLR)		\
+	CASE_##op((val), TRCBBCTLR)		\
+	CASE_##op((val), TRCTRACEIDR)		\
+	CASE_##op((val), TRCQCTLR)		\
+	CASE_##op((val), TRCVICTLR)		\
+	CASE_##op((val), TRCVIIECTLR)		\
+	CASE_##op((val), TRCVISSCTLR)		\
+	CASE_##op((val), TRCVIPCSSCTLR)		\
+	CASE_##op((val), TRCVDCTLR)		\
+	CASE_##op((val), TRCVDSACCTLR)		\
+	CASE_##op((val), TRCVDARCCTLR)		\
+	CASE_##op((val), TRCSEQEVRn(0))		\
+	CASE_##op((val), TRCSEQEVRn(1))		\
+	CASE_##op((val), TRCSEQEVRn(2))		\
+	CASE_##op((val), TRCSEQRSTEVR)		\
+	CASE_##op((val), TRCSEQSTR)		\
+	CASE_##op((val), TRCEXTINSELR)		\
+	CASE_##op((val), TRCCNTRLDVRn(0))	\
+	CASE_##op((val), TRCCNTRLDVRn(1))	\
+	CASE_##op((val), TRCCNTRLDVRn(2))	\
+	CASE_##op((val), TRCCNTRLDVRn(3))	\
+	CASE_##op((val), TRCCNTCTLRn(0))	\
+	CASE_##op((val), TRCCNTCTLRn(1))	\
+	CASE_##op((val), TRCCNTCTLRn(2))	\
+	CASE_##op((val), TRCCNTCTLRn(3))	\
+	CASE_##op((val), TRCCNTVRn(0))		\
+	CASE_##op((val), TRCCNTVRn(1))		\
+	CASE_##op((val), TRCCNTVRn(2))		\
+	CASE_##op((val), TRCCNTVRn(3))		\
+	CASE_##op((val), TRCIDR8)		\
+	CASE_##op((val), TRCIDR9)		\
+	CASE_##op((val), TRCIDR10)		\
+	CASE_##op((val), TRCIDR11)		\
+	CASE_##op((val), TRCIDR12)		\
+	CASE_##op((val), TRCIDR13)		\
+	CASE_##op((val), TRCIMSPECn(0))		\
+	CASE_##op((val), TRCIMSPECn(1))		\
+	CASE_##op((val), TRCIMSPECn(2))		\
+	CASE_##op((val), TRCIMSPECn(3))		\
+	CASE_##op((val), TRCIMSPECn(4))		\
+	CASE_##op((val), TRCIMSPECn(5))		\
+	CASE_##op((val), TRCIMSPECn(6))		\
+	CASE_##op((val), TRCIMSPECn(7))		\
+	CASE_##op((val), TRCIDR0)		\
+	CASE_##op((val), TRCIDR1)		\
+	CASE_##op((val), TRCIDR2)		\
+	CASE_##op((val), TRCIDR3)		\
+	CASE_##op((val), TRCIDR4)		\
+	CASE_##op((val), TRCIDR5)		\
+	CASE_##op((val), TRCIDR6)		\
+	CASE_##op((val), TRCIDR7)		\
+	CASE_##op((val), TRCRSCTLRn(2))		\
+	CASE_##op((val), TRCRSCTLRn(3))		\
+	CASE_##op((val), TRCRSCTLRn(4))		\
+	CASE_##op((val), TRCRSCTLRn(5))		\
+	CASE_##op((val), TRCRSCTLRn(6))		\
+	CASE_##op((val), TRCRSCTLRn(7))		\
+	CASE_##op((val), TRCRSCTLRn(8))		\
+	CASE_##op((val), TRCRSCTLRn(9))		\
+	CASE_##op((val), TRCRSCTLRn(10))	\
+	CASE_##op((val), TRCRSCTLRn(11))	\
+	CASE_##op((val), TRCRSCTLRn(12))	\
+	CASE_##op((val), TRCRSCTLRn(13))	\
+	CASE_##op((val), TRCRSCTLRn(14))	\
+	CASE_##op((val), TRCRSCTLRn(15))	\
+	CASE_##op((val), TRCRSCTLRn(16))	\
+	CASE_##op((val), TRCRSCTLRn(17))	\
+	CASE_##op((val), TRCRSCTLRn(18))	\
+	CASE_##op((val), TRCRSCTLRn(19))	\
+	CASE_##op((val), TRCRSCTLRn(20))	\
+	CASE_##op((val), TRCRSCTLRn(21))	\
+	CASE_##op((val), TRCRSCTLRn(22))	\
+	CASE_##op((val), TRCRSCTLRn(23))	\
+	CASE_##op((val), TRCRSCTLRn(24))	\
+	CASE_##op((val), TRCRSCTLRn(25))	\
+	CASE_##op((val), TRCRSCTLRn(26))	\
+	CASE_##op((val), TRCRSCTLRn(27))	\
+	CASE_##op((val), TRCRSCTLRn(28))	\
+	CASE_##op((val), TRCRSCTLRn(29))	\
+	CASE_##op((val), TRCRSCTLRn(30))	\
+	CASE_##op((val), TRCRSCTLRn(31))	\
+	CASE_##op((val), TRCSSCCRn(0))		\
+	CASE_##op((val), TRCSSCCRn(1))		\
+	CASE_##op((val), TRCSSCCRn(2))		\
+	CASE_##op((val), TRCSSCCRn(3))		\
+	CASE_##op((val), TRCSSCCRn(4))		\
+	CASE_##op((val), TRCSSCCRn(5))		\
+	CASE_##op((val), TRCSSCCRn(6))		\
+	CASE_##op((val), TRCSSCCRn(7))		\
+	CASE_##op((val), TRCSSCSRn(0))		\
+	CASE_##op((val), TRCSSCSRn(1))		\
+	CASE_##op((val), TRCSSCSRn(2))		\
+	CASE_##op((val), TRCSSCSRn(3))		\
+	CASE_##op((val), TRCSSCSRn(4))		\
+	CASE_##op((val), TRCSSCSRn(5))		\
+	CASE_##op((val), TRCSSCSRn(6))		\
+	CASE_##op((val), TRCSSCSRn(7))		\
+	CASE_##op((val), TRCSSPCICRn(0))	\
+	CASE_##op((val), TRCSSPCICRn(1))	\
+	CASE_##op((val), TRCSSPCICRn(2))	\
+	CASE_##op((val), TRCSSPCICRn(3))	\
+	CASE_##op((val), TRCSSPCICRn(4))	\
+	CASE_##op((val), TRCSSPCICRn(5))	\
+	CASE_##op((val), TRCSSPCICRn(6))	\
+	CASE_##op((val), TRCSSPCICRn(7))	\
+	CASE_##op((val), TRCOSLAR)		\
+	CASE_##op((val), TRCOSLSR)		\
+	CASE_##op((val), TRCACVRn(0))		\
+	CASE_##op((val), TRCACVRn(1))		\
+	CASE_##op((val), TRCACVRn(2))		\
+	CASE_##op((val), TRCACVRn(3))		\
+	CASE_##op((val), TRCACVRn(4))		\
+	CASE_##op((val), TRCACVRn(5))		\
+	CASE_##op((val), TRCACVRn(6))		\
+	CASE_##op((val), TRCACVRn(7))		\
+	CASE_##op((val), TRCACVRn(8))		\
+	CASE_##op((val), TRCACVRn(9))		\
+	CASE_##op((val), TRCACVRn(10))		\
+	CASE_##op((val), TRCACVRn(11))		\
+	CASE_##op((val), TRCACVRn(12))		\
+	CASE_##op((val), TRCACVRn(13))		\
+	CASE_##op((val), TRCACVRn(14))		\
+	CASE_##op((val), TRCACVRn(15))		\
+	CASE_##op((val), TRCACATRn(0))		\
+	CASE_##op((val), TRCACATRn(1))		\
+	CASE_##op((val), TRCACATRn(2))		\
+	CASE_##op((val), TRCACATRn(3))		\
+	CASE_##op((val), TRCACATRn(4))		\
+	CASE_##op((val), TRCACATRn(5))		\
+	CASE_##op((val), TRCACATRn(6))		\
+	CASE_##op((val), TRCACATRn(7))		\
+	CASE_##op((val), TRCACATRn(8))		\
+	CASE_##op((val), TRCACATRn(9))		\
+	CASE_##op((val), TRCACATRn(10))		\
+	CASE_##op((val), TRCACATRn(11))		\
+	CASE_##op((val), TRCACATRn(12))		\
+	CASE_##op((val), TRCACATRn(13))		\
+	CASE_##op((val), TRCACATRn(14))		\
+	CASE_##op((val), TRCACATRn(15))		\
+	CASE_##op((val), TRCDVCVRn(0))		\
+	CASE_##op((val), TRCDVCVRn(1))		\
+	CASE_##op((val), TRCDVCVRn(2))		\
+	CASE_##op((val), TRCDVCVRn(3))		\
+	CASE_##op((val), TRCDVCVRn(4))		\
+	CASE_##op((val), TRCDVCVRn(5))		\
+	CASE_##op((val), TRCDVCVRn(6))		\
+	CASE_##op((val), TRCDVCVRn(7))		\
+	CASE_##op((val), TRCDVCMRn(0))		\
+	CASE_##op((val), TRCDVCMRn(1))		\
+	CASE_##op((val), TRCDVCMRn(2))		\
+	CASE_##op((val), TRCDVCMRn(3))		\
+	CASE_##op((val), TRCDVCMRn(4))		\
+	CASE_##op((val), TRCDVCMRn(5))		\
+	CASE_##op((val), TRCDVCMRn(6))		\
+	CASE_##op((val), TRCDVCMRn(7))		\
+	CASE_##op((val), TRCCIDCVRn(0))		\
+	CASE_##op((val), TRCCIDCVRn(1))		\
+	CASE_##op((val), TRCCIDCVRn(2))		\
+	CASE_##op((val), TRCCIDCVRn(3))		\
+	CASE_##op((val), TRCCIDCVRn(4))		\
+	CASE_##op((val), TRCCIDCVRn(5))		\
+	CASE_##op((val), TRCCIDCVRn(6))		\
+	CASE_##op((val), TRCCIDCVRn(7))		\
+	CASE_##op((val), TRCVMIDCVRn(0))	\
+	CASE_##op((val), TRCVMIDCVRn(1))	\
+	CASE_##op((val), TRCVMIDCVRn(2))	\
+	CASE_##op((val), TRCVMIDCVRn(3))	\
+	CASE_##op((val), TRCVMIDCVRn(4))	\
+	CASE_##op((val), TRCVMIDCVRn(5))	\
+	CASE_##op((val), TRCVMIDCVRn(6))	\
+	CASE_##op((val), TRCVMIDCVRn(7))	\
+	CASE_##op((val), TRCCIDCCTLR0)		\
+	CASE_##op((val), TRCCIDCCTLR1)		\
+	CASE_##op((val), TRCVMIDCCTLR0)		\
+	CASE_##op((val), TRCVMIDCCTLR1)		\
+	CASE_##op((val), TRCCLAIMSET)		\
+	CASE_##op((val), TRCCLAIMCLR)		\
+	CASE_##op((val), TRCAUTHSTATUS)		\
+	CASE_##op((val), TRCDEVARCH)		\
+	CASE_##op((val), TRCDEVID)
+
+/* List of registers only accessible via memory-mapped interface */
+#define ETM_MMAP_LIST(op, val)			\
+	CASE_##op((val), TRCDEVTYPE)		\
+	CASE_##op((val), TRCPDCR)		\
+	CASE_##op((val), TRCPDSR)		\
+	CASE_##op((val), TRCDEVAFF0)		\
+	CASE_##op((val), TRCDEVAFF1)		\
+	CASE_##op((val), TRCLAR)		\
+	CASE_##op((val), TRCLSR)		\
+	CASE_##op((val), TRCITCTRL)		\
+	CASE_##op((val), TRCPIDR4)		\
+	CASE_##op((val), TRCPIDR0)		\
+	CASE_##op((val), TRCPIDR1)		\
+	CASE_##op((val), TRCPIDR2)		\
+	CASE_##op((val), TRCPIDR3)
+
+#define ETM4x_READ_SYSREG_CASES(res)	ETM_SYSREG_LIST(READ, (res))
+#define ETM4x_WRITE_SYSREG_CASES(val)	ETM_SYSREG_LIST(WRITE, (val))
+
+#define ETM4x_SYSREG_LIST_CASES		ETM_SYSREG_LIST(NOP, __unused)
+#define ETM4x_MMAP_LIST_CASES		ETM_MMAP_LIST(NOP, __unused)
+
+#define read_etm4x_sysreg_offset(offset, _64bit)				\
+	({									\
+		u64 __val;							\
+										\
+		if (__builtin_constant_p((offset)))				\
+			__val = read_etm4x_sysreg_const_offset((offset));	\
+		else								\
+			__val = etm4x_sysreg_read((offset), true, (_64bit));	\
+		__val;								\
+	 })
+
+#define write_etm4x_sysreg_offset(val, offset, _64bit)			\
+	do {								\
+		if (__builtin_constant_p((offset)))			\
+			write_etm4x_sysreg_const_offset((val),		\
+							(offset));	\
+		else							\
+			etm4x_sysreg_write((val), (offset), true,	\
+					   (_64bit));			\
+	} while (0)
+
+
+#define etm4x_relaxed_read32(csa, offset)				\
+	((u32)((csa)->io_mem ?						\
+		 readl_relaxed((csa)->base + (offset)) :		\
+		 read_etm4x_sysreg_offset((offset), false)))
+
+#define etm4x_relaxed_read64(csa, offset)				\
+	((u64)((csa)->io_mem ?						\
+		 readq_relaxed((csa)->base + (offset)) :		\
+		 read_etm4x_sysreg_offset((offset), true)))
+
+#define etm4x_read32(csa, offset)					\
+	({								\
+		u32 __val = etm4x_relaxed_read32((csa), (offset));	\
+		__iormb(__val);						\
+		__val;							\
+	 })
+
+#define etm4x_read64(csa, offset)					\
+	({								\
+		u64 __val = etm4x_relaxed_read64((csa), (offset));	\
+		__iormb(__val);						\
+		__val;							\
+	 })
+
+#define etm4x_relaxed_write32(csa, val, offset)				\
+	do {								\
+		if ((csa)->io_mem)					\
+			writel_relaxed((val), (csa)->base + (offset));	\
+		else							\
+			write_etm4x_sysreg_offset((val), (offset),	\
+						  false);		\
+	} while (0)
+
+#define etm4x_relaxed_write64(csa, val, offset)				\
+	do {								\
+		if ((csa)->io_mem)					\
+			writeq_relaxed((val), (csa)->base + (offset));	\
+		else							\
+			write_etm4x_sysreg_offset((val), (offset),	\
+						  true);		\
+	} while (0)
+
+#define etm4x_write32(csa, val, offset)					\
+	do {								\
+		__iowmb();						\
+		etm4x_relaxed_write32((csa), (val), (offset));		\
+	} while (0)
+
+#define etm4x_write64(csa, val, offset)					\
+	do {								\
+		__iowmb();						\
+		etm4x_relaxed_write64((csa), (val), (offset));		\
+	} while (0)
+
+
 /* ETMv4 resources */
 #define ETM_MAX_NR_PE			8
 #define ETMv4_MAX_CNTR			4
@@ -137,7 +468,6 @@
 #define ETM_MAX_RES_SEL			32
 #define ETM_MAX_SS_CMP			8
 
-#define ETM_ARCH_V4			0x40
 #define ETMv4_SYNC_MASK			0x1F
 #define ETM_CYC_THRESHOLD_MASK		0xFFF
 #define ETM_CYC_THRESHOLD_DEFAULT       0x100
@@ -175,34 +505,150 @@
 					 ETM_MODE_EXCL_KERN | \
 					 ETM_MODE_EXCL_USER)
 
+/*
+ * TRCDEVARCH Bit field definitions
+ * Bits[31:21]	- ARCHITECT = Always Arm Ltd.
+ *                * Bits[31:28] = 0x4
+ *                * Bits[27:21] = 0b0111011
+ * Bit[20]	- PRESENT,  Indicates the presence of this register.
+ *
+ * Bit[19:16]	- REVISION, Revision of the architecture.
+ *
+ * Bit[15:0]	- ARCHID, Identifies this component as an ETM
+ *                * Bits[15:12] - architecture version of ETM
+ *                *             = 4 for ETMv4
+ *                * Bits[11:0] = 0xA13, architecture part number for ETM.
+ */
+#define ETM_DEVARCH_ARCHITECT_MASK		GENMASK(31, 21)
+#define ETM_DEVARCH_ARCHITECT_ARM		((0x4 << 28) | (0b0111011 << 21))
+#define ETM_DEVARCH_PRESENT			BIT(20)
+#define ETM_DEVARCH_REVISION_SHIFT		16
+#define ETM_DEVARCH_REVISION_MASK		GENMASK(19, 16)
+#define ETM_DEVARCH_REVISION(x)			\
+	(((x) & ETM_DEVARCH_REVISION_MASK) >> ETM_DEVARCH_REVISION_SHIFT)
+#define ETM_DEVARCH_ARCHID_MASK			GENMASK(15, 0)
+#define ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT	12
+#define ETM_DEVARCH_ARCHID_ARCH_VER_MASK	GENMASK(15, 12)
+#define ETM_DEVARCH_ARCHID_ARCH_VER(x)		\
+	(((x) & ETM_DEVARCH_ARCHID_ARCH_VER_MASK) >> ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT)
+
+#define ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(ver)			\
+	(((ver) << ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT) & ETM_DEVARCH_ARCHID_ARCH_VER_MASK)
+
+#define ETM_DEVARCH_ARCHID_ARCH_PART(x)		((x) & 0xfffUL)
+
+#define ETM_DEVARCH_MAKE_ARCHID(major)			\
+	((ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(major)) | ETM_DEVARCH_ARCHID_ARCH_PART(0xA13))
+
+#define ETM_DEVARCH_ARCHID_ETMv4x		ETM_DEVARCH_MAKE_ARCHID(0x4)
+
+#define ETM_DEVARCH_ID_MASK						\
+	(ETM_DEVARCH_ARCHITECT_MASK | ETM_DEVARCH_ARCHID_MASK | ETM_DEVARCH_PRESENT)
+#define ETM_DEVARCH_ETMv4x_ARCH						\
+	(ETM_DEVARCH_ARCHITECT_ARM | ETM_DEVARCH_ARCHID_ETMv4x | ETM_DEVARCH_PRESENT)
+
 #define TRCSTATR_IDLE_BIT		0
 #define TRCSTATR_PMSTABLE_BIT		1
 #define ETM_DEFAULT_ADDR_COMP		0
 
+#define TRCSSCSRn_PC			BIT(3)
+
 /* PowerDown Control Register bits */
 #define TRCPDCR_PU			BIT(3)
 
-/* secure state access levels - TRCACATRn */
-#define ETM_EXLEVEL_S_APP		BIT(8)
-#define ETM_EXLEVEL_S_OS		BIT(9)
-#define ETM_EXLEVEL_S_HYP		BIT(10)
-#define ETM_EXLEVEL_S_MON		BIT(11)
-/* non-secure state access levels - TRCACATRn */
-#define ETM_EXLEVEL_NS_APP		BIT(12)
-#define ETM_EXLEVEL_NS_OS		BIT(13)
-#define ETM_EXLEVEL_NS_HYP		BIT(14)
-#define ETM_EXLEVEL_NS_NA		BIT(15)
+#define TRCACATR_EXLEVEL_SHIFT		8
+
+/*
+ * Exception level mask for Secure and Non-Secure ELs.
+ * ETM defines the bits for EL control (e.g, TRVICTLR, TRCACTRn).
+ * The Secure and Non-Secure ELs are always to gether.
+ * Non-secure EL3 is never implemented.
+ * We use the following generic mask as they appear in different
+ * registers and this can be shifted for the appropriate
+ * fields.
+ */
+#define ETM_EXLEVEL_S_APP		BIT(0)	/* Secure EL0		*/
+#define ETM_EXLEVEL_S_OS		BIT(1)	/* Secure EL1		*/
+#define ETM_EXLEVEL_S_HYP		BIT(2)	/* Secure EL2		*/
+#define ETM_EXLEVEL_S_MON		BIT(3)	/* Secure EL3/Monitor	*/
+#define ETM_EXLEVEL_NS_APP		BIT(4)	/* NonSecure EL0	*/
+#define ETM_EXLEVEL_NS_OS		BIT(5)	/* NonSecure EL1	*/
+#define ETM_EXLEVEL_NS_HYP		BIT(6)	/* NonSecure EL2	*/
+
+#define ETM_EXLEVEL_MASK		(GENMASK(6, 0))
+#define ETM_EXLEVEL_S_MASK		(GENMASK(3, 0))
+#define ETM_EXLEVEL_NS_MASK		(GENMASK(6, 4))
 
-/* access level control in TRCVICTLR - same bits as TRCACATRn but shifted */
-#define ETM_EXLEVEL_LSHIFT_TRCVICTLR	8
+/* access level controls in TRCACATRn */
+#define TRCACATR_EXLEVEL_SHIFT		8
+
+/* access level control in TRCVICTLR */
+#define TRCVICTLR_EXLEVEL_SHIFT		16
+#define TRCVICTLR_EXLEVEL_S_SHIFT	16
+#define TRCVICTLR_EXLEVEL_NS_SHIFT	20
 
 /* secure / non secure masks - TRCVICTLR, IDR3 */
-#define ETM_EXLEVEL_S_VICTLR_MASK	GENMASK(19, 16)
-/* NS MON (EL3) mode never implemented */
-#define ETM_EXLEVEL_NS_VICTLR_MASK	GENMASK(22, 20)
+#define TRCVICTLR_EXLEVEL_MASK		(ETM_EXLEVEL_MASK << TRCVICTLR_EXLEVEL_SHIFT)
+#define TRCVICTLR_EXLEVEL_S_MASK	(ETM_EXLEVEL_S_MASK << TRCVICTLR_EXLEVEL_SHIFT)
+#define TRCVICTLR_EXLEVEL_NS_MASK	(ETM_EXLEVEL_NS_MASK << TRCVICTLR_EXLEVEL_SHIFT)
+
+#define ETM_TRCIDR1_ARCH_MAJOR_SHIFT	8
+#define ETM_TRCIDR1_ARCH_MAJOR_MASK	(0xfU << ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MAJOR(x)	\
+	(((x) & ETM_TRCIDR1_ARCH_MAJOR_MASK) >> ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR_SHIFT	4
+#define ETM_TRCIDR1_ARCH_MINOR_MASK	(0xfU << ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR(x)	\
+	(((x) & ETM_TRCIDR1_ARCH_MINOR_MASK) >> ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_SHIFT		ETM_TRCIDR1_ARCH_MINOR_SHIFT
+#define ETM_TRCIDR1_ARCH_MASK		\
+	(ETM_TRCIDR1_ARCH_MAJOR_MASK | ETM_TRCIDR1_ARCH_MINOR_MASK)
 
+#define ETM_TRCIDR1_ARCH_ETMv4		0x4
+
+/*
+ * Driver representation of the ETM architecture.
+ * The version of an ETM component can be detected from
+ *
+ * TRCDEVARCH	- CoreSight architected register
+ *                - Bits[15:12] - Major version
+ *                - Bits[19:16] - Minor version
+ * TRCIDR1	- ETM architected register
+ *                - Bits[11:8] - Major version
+ *                - Bits[7:4]  - Minor version
+ * We must rely on TRCDEVARCH for the version information,
+ * however we don't want to break the support for potential
+ * old implementations which might not implement it. Thus
+ * we fall back to TRCIDR1 if TRCDEVARCH is not implemented
+ * for memory mapped components.
+ * Now to make certain decisions easier based on the version
+ * we use an internal representation of the version in the
+ * driver, as follows :
+ *
+ * ETM_ARCH_VERSION[7:0], where :
+ *      Bits[7:4] - Major version
+ *      Bits[3:0] - Minro version
+ */
+#define ETM_ARCH_VERSION(major, minor)		\
+	((((major) & 0xfU) << 4) | (((minor) & 0xfU)))
+#define ETM_ARCH_MAJOR_VERSION(arch)	(((arch) >> 4) & 0xfU)
+#define ETM_ARCH_MINOR_VERSION(arch)	((arch) & 0xfU)
+
+#define ETM_ARCH_V4	ETM_ARCH_VERSION(4, 0)
 /* Interpretation of resource numbers change at ETM v4.3 architecture */
-#define ETM4X_ARCH_4V3	0x43
+#define ETM_ARCH_V4_3	ETM_ARCH_VERSION(4, 3)
+
+static inline u8 etm_devarch_to_arch(u32 devarch)
+{
+	return ETM_ARCH_VERSION(ETM_DEVARCH_ARCHID_ARCH_VER(devarch),
+				ETM_DEVARCH_REVISION(devarch));
+}
+
+static inline u8 etm_trcidr_to_arch(u32 trcidr1)
+{
+	return ETM_ARCH_VERSION(ETM_TRCIDR1_ARCH_MAJOR(trcidr1),
+				ETM_TRCIDR1_ARCH_MINOR(trcidr1));
+}
 
 enum etm_impdef_type {
 	ETM4_IMPDEF_HISI_CORE_COMMIT,
@@ -256,7 +702,7 @@ enum etm_impdef_type {
  * @vmid_mask0:	VM ID comparator mask for comparator 0-3.
  * @vmid_mask1:	VM ID comparator mask for comparator 4-7.
  * @ext_inp:	External input selection.
- * @arch:	ETM architecture version (for arch dependent config).
+ * @s_ex_level: Secure ELs where tracing is supported.
  */
 struct etmv4_config {
 	u32				mode;
@@ -300,7 +746,7 @@ struct etmv4_config {
 	u32				vmid_mask0;
 	u32				vmid_mask1;
 	u32				ext_inp;
-	u8				arch;
+	u8				s_ex_level;
 };
 
 /**
@@ -369,7 +815,7 @@ struct etmv4_save_state {
  * @spinlock:   Only one at a time pls.
  * @mode:	This tracer's mode, i.e sysFS, Perf or disabled.
  * @cpu:        The cpu this component is affined to.
- * @arch:       ETM version number.
+ * @arch:       ETM architecture version.
  * @nr_pe:	The number of processing entity available for tracing.
  * @nr_pe_cmp:	The number of processing entity comparator inputs that are
  *		available for tracing.
@@ -491,4 +937,7 @@ enum etm_addr_ctxtype {
 
 extern const struct attribute_group *coresight_etmv4_groups[];
 void etm4_config_trace_mode(struct etmv4_config *config);
+
+u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
+void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit);
 #endif
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index 01f8f9285168..b363dd6bc510 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -52,13 +52,14 @@ static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
 {
 	u32 functl;
 	int rc = 0;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	CS_UNLOCK(drvdata->base);
 
 	functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
 	/* Claim the device only when we enable the first slave */
 	if (!(functl & FUNNEL_ENSx_MASK)) {
-		rc = coresight_claim_device_unlocked(drvdata->base);
+		rc = coresight_claim_device_unlocked(csdev);
 		if (rc)
 			goto done;
 	}
@@ -101,6 +102,7 @@ static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata,
 				      int inport)
 {
 	u32 functl;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -110,7 +112,7 @@ static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata,
 
 	/* Disclaim the device if none of the slaves are now active */
 	if (!(functl & FUNNEL_ENSx_MASK))
-		coresight_disclaim_device_unlocked(drvdata->base);
+		coresight_disclaim_device_unlocked(csdev);
 
 	CS_LOCK(drvdata->base);
 }
@@ -242,6 +244,7 @@ static int funnel_probe(struct device *dev, struct resource *res)
 		}
 		drvdata->base = base;
 		desc.groups = coresight_funnel_groups;
+		desc.access = CSDEV_ACCESS_IOMEM(base);
 	}
 
 	dev_set_drvdata(dev, drvdata);
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index 34fc2f6f3ea9..b86acbc74cf0 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -45,12 +45,14 @@ struct replicator_drvdata {
 
 static void dynamic_replicator_reset(struct replicator_drvdata *drvdata)
 {
+	struct coresight_device *csdev = drvdata->csdev;
+
 	CS_UNLOCK(drvdata->base);
 
-	if (!coresight_claim_device_unlocked(drvdata->base)) {
+	if (!coresight_claim_device_unlocked(csdev)) {
 		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
 		writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
-		coresight_disclaim_device_unlocked(drvdata->base);
+		coresight_disclaim_device_unlocked(csdev);
 	}
 
 	CS_LOCK(drvdata->base);
@@ -70,6 +72,7 @@ static int dynamic_replicator_enable(struct replicator_drvdata *drvdata,
 {
 	int rc = 0;
 	u32 id0val, id1val;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -84,7 +87,7 @@ static int dynamic_replicator_enable(struct replicator_drvdata *drvdata,
 		id0val = id1val = 0xff;
 
 	if (id0val == 0xff && id1val == 0xff)
-		rc = coresight_claim_device_unlocked(drvdata->base);
+		rc = coresight_claim_device_unlocked(csdev);
 
 	if (!rc) {
 		switch (outport) {
@@ -140,6 +143,7 @@ static void dynamic_replicator_disable(struct replicator_drvdata *drvdata,
 				       int inport, int outport)
 {
 	u32 reg;
+	struct coresight_device *csdev = drvdata->csdev;
 
 	switch (outport) {
 	case 0:
@@ -160,7 +164,7 @@ static void dynamic_replicator_disable(struct replicator_drvdata *drvdata,
 
 	if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) &&
 	    (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff))
-		coresight_disclaim_device_unlocked(drvdata->base);
+		coresight_disclaim_device_unlocked(csdev);
 	CS_LOCK(drvdata->base);
 }
 
@@ -254,6 +258,7 @@ static int replicator_probe(struct device *dev, struct resource *res)
 		}
 		drvdata->base = base;
 		desc.groups = replicator_groups;
+		desc.access = CSDEV_ACCESS_IOMEM(base);
 	}
 
 	if (fwnode_property_present(dev_fwnode(dev),
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 423df0d30d9c..58062a5a8238 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -258,6 +258,7 @@ static void stm_disable(struct coresight_device *csdev,
 			struct perf_event *event)
 {
 	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct csdev_access *csa = &csdev->access;
 
 	/*
 	 * For as long as the tracer isn't disabled another entity can't
@@ -270,7 +271,7 @@ static void stm_disable(struct coresight_device *csdev,
 		spin_unlock(&drvdata->spinlock);
 
 		/* Wait until the engine has completely stopped */
-		coresight_timeout(drvdata->base, STMTCSR, STMTCSR_BUSY_BIT, 0);
+		coresight_timeout(csa, STMTCSR, STMTCSR_BUSY_BIT, 0);
 
 		pm_runtime_put(csdev->dev.parent);
 
@@ -884,6 +885,7 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
 
 	ret = stm_get_stimulus_area(dev, &ch_res);
 	if (ret)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c
index e29b3914fc0f..74c6323d4d6a 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-core.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-core.c
@@ -33,16 +33,20 @@ DEFINE_CORESIGHT_DEVLIST(etr_devs, "tmc_etr");
 
 void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata)
 {
+	struct coresight_device *csdev = drvdata->csdev;
+	struct csdev_access *csa = &csdev->access;
+
 	/* Ensure formatter, unformatter and hardware fifo are empty */
-	if (coresight_timeout(drvdata->base,
-			      TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
-		dev_err(&drvdata->csdev->dev,
+	if (coresight_timeout(csa, TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
+		dev_err(&csdev->dev,
 			"timeout while waiting for TMC to be Ready\n");
 	}
 }
 
 void tmc_flush_and_stop(struct tmc_drvdata *drvdata)
 {
+	struct coresight_device *csdev = drvdata->csdev;
+	struct csdev_access *csa = &csdev->access;
 	u32 ffcr;
 
 	ffcr = readl_relaxed(drvdata->base + TMC_FFCR);
@@ -51,9 +55,8 @@ void tmc_flush_and_stop(struct tmc_drvdata *drvdata)
 	ffcr |= BIT(TMC_FFCR_FLUSHMAN_BIT);
 	writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
 	/* Ensure flush completes */
-	if (coresight_timeout(drvdata->base,
-			      TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
-		dev_err(&drvdata->csdev->dev,
+	if (coresight_timeout(csa, TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
+		dev_err(&csdev->dev,
 		"timeout while waiting for completion of Manual Flush\n");
 	}
 
@@ -456,6 +459,7 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
 	}
 
 	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
 
 	spin_lock_init(&drvdata->spinlock);
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 989d965f3d90..45b85edfc690 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -37,7 +37,7 @@ static void __tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 
 static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 {
-	int rc = coresight_claim_device(drvdata->base);
+	int rc = coresight_claim_device(drvdata->csdev);
 
 	if (rc)
 		return rc;
@@ -88,7 +88,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 {
 	__tmc_etb_disable_hw(drvdata);
-	coresight_disclaim_device(drvdata->base);
+	coresight_disclaim_device(drvdata->csdev);
 }
 
 static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
@@ -109,7 +109,7 @@ static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 
 static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 {
-	int rc = coresight_claim_device(drvdata->base);
+	int rc = coresight_claim_device(drvdata->csdev);
 
 	if (rc)
 		return rc;
@@ -120,11 +120,13 @@ static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 
 static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
 {
+	struct coresight_device *csdev = drvdata->csdev;
+
 	CS_UNLOCK(drvdata->base);
 
 	tmc_flush_and_stop(drvdata);
 	tmc_disable_hw(drvdata);
-	coresight_disclaim_device_unlocked(drvdata->base);
+	coresight_disclaim_device_unlocked(csdev);
 	CS_LOCK(drvdata->base);
 }
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index bf5230e39c5b..acdb59e0e661 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1040,7 +1040,7 @@ static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata,
 	rc = tmc_etr_enable_catu(drvdata, etr_buf);
 	if (rc)
 		return rc;
-	rc = coresight_claim_device(drvdata->base);
+	rc = coresight_claim_device(drvdata->csdev);
 	if (!rc) {
 		drvdata->etr_buf = etr_buf;
 		__tmc_etr_enable_hw(drvdata);
@@ -1134,7 +1134,7 @@ void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 	__tmc_etr_disable_hw(drvdata);
 	/* Disable CATU device if this ETR is connected to one */
 	tmc_etr_disable_catu(drvdata);
-	coresight_disclaim_device(drvdata->base);
+	coresight_disclaim_device(drvdata->csdev);
 	/* Reset the ETR buf used by hardware */
 	drvdata->etr_buf = NULL;
 }
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index f77c4b0ea4aa..34d37abd2c8d 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -60,49 +60,45 @@ struct tpiu_drvdata {
 	struct coresight_device	*csdev;
 };
 
-static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
+static void tpiu_enable_hw(struct csdev_access *csa)
 {
-	CS_UNLOCK(drvdata->base);
+	CS_UNLOCK(csa->base);
 
 	/* TODO: fill this up */
 
-	CS_LOCK(drvdata->base);
+	CS_LOCK(csa->base);
 }
 
 static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused)
 {
-	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-
-	tpiu_enable_hw(drvdata);
+	tpiu_enable_hw(&csdev->access);
 	atomic_inc(csdev->refcnt);
 	dev_dbg(&csdev->dev, "TPIU enabled\n");
 	return 0;
 }
 
-static void tpiu_disable_hw(struct tpiu_drvdata *drvdata)
+static void tpiu_disable_hw(struct csdev_access *csa)
 {
-	CS_UNLOCK(drvdata->base);
+	CS_UNLOCK(csa->base);
 
 	/* Clear formatter and stop on flush */
-	writel_relaxed(FFCR_STOP_FI, drvdata->base + TPIU_FFCR);
+	csdev_access_relaxed_write32(csa, FFCR_STOP_FI, TPIU_FFCR);
 	/* Generate manual flush */
-	writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
+	csdev_access_relaxed_write32(csa, FFCR_STOP_FI | FFCR_FON_MAN, TPIU_FFCR);
 	/* Wait for flush to complete */
-	coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
+	coresight_timeout(csa, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
 	/* Wait for formatter to stop */
-	coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
+	coresight_timeout(csa, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
 
-	CS_LOCK(drvdata->base);
+	CS_LOCK(csa->base);
 }
 
 static int tpiu_disable(struct coresight_device *csdev)
 {
-	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-
 	if (atomic_dec_return(csdev->refcnt))
 		return -EBUSY;
 
-	tpiu_disable_hw(drvdata);
+	tpiu_disable_hw(&csdev->access);
 
 	dev_dbg(&csdev->dev, "TPIU disabled\n");
 	return 0;
@@ -149,9 +145,10 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id)
 		return PTR_ERR(base);
 
 	drvdata->base = base;
+	desc.access = CSDEV_ACCESS_IOMEM(base);
 
 	/* Disable tpiu to support older devices */
-	tpiu_disable_hw(drvdata);
+	tpiu_disable_hw(&desc.access);
 
 	pdata = coresight_get_platform_data(dev);
 	if (IS_ERR(pdata))
diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig
index b3fb5b02bcf1..ca52647f8955 100644
--- a/drivers/interconnect/qcom/Kconfig
+++ b/drivers/interconnect/qcom/Kconfig
@@ -17,6 +17,15 @@ config INTERCONNECT_QCOM_MSM8916
 	  This is a driver for the Qualcomm Network-on-Chip on msm8916-based
 	  platforms.
 
+config INTERCONNECT_QCOM_MSM8939
+	tristate "Qualcomm MSM8939 interconnect driver"
+	depends on INTERCONNECT_QCOM
+	depends on QCOM_SMD_RPM
+	select INTERCONNECT_QCOM_SMD_RPM
+	help
+	  This is a driver for the Qualcomm Network-on-Chip on msm8939-based
+	  platforms.
+
 config INTERCONNECT_QCOM_MSM8974
 	tristate "Qualcomm MSM8974 interconnect driver"
 	depends on INTERCONNECT_QCOM
@@ -74,6 +83,15 @@ config INTERCONNECT_QCOM_SDM845
 	  This is a driver for the Qualcomm Network-on-Chip on sdm845-based
 	  platforms.
 
+config INTERCONNECT_QCOM_SDX55
+	tristate "Qualcomm SDX55 interconnect driver"
+	depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
+	select INTERCONNECT_QCOM_RPMH
+	select INTERCONNECT_QCOM_BCM_VOTER
+	help
+	  This is a driver for the Qualcomm Network-on-Chip on sdx55-based
+	  platforms.
+
 config INTERCONNECT_QCOM_SM8150
 	tristate "Qualcomm SM8150 interconnect driver"
 	depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
diff --git a/drivers/interconnect/qcom/Makefile b/drivers/interconnect/qcom/Makefile
index cf628f7990cd..c6a735df067e 100644
--- a/drivers/interconnect/qcom/Makefile
+++ b/drivers/interconnect/qcom/Makefile
@@ -2,24 +2,28 @@
 
 icc-bcm-voter-objs			:= bcm-voter.o
 qnoc-msm8916-objs			:= msm8916.o
+qnoc-msm8939-objs			:= msm8939.o
 qnoc-msm8974-objs			:= msm8974.o
 icc-osm-l3-objs				:= osm-l3.o
 qnoc-qcs404-objs			:= qcs404.o
 icc-rpmh-obj				:= icc-rpmh.o
 qnoc-sc7180-objs			:= sc7180.o
 qnoc-sdm845-objs			:= sdm845.o
+qnoc-sdx55-objs				:= sdx55.o
 qnoc-sm8150-objs			:= sm8150.o
 qnoc-sm8250-objs			:= sm8250.o
-icc-smd-rpm-objs			:= smd-rpm.o
+icc-smd-rpm-objs			:= smd-rpm.o icc-rpm.o
 
 obj-$(CONFIG_INTERCONNECT_QCOM_BCM_VOTER) += icc-bcm-voter.o
 obj-$(CONFIG_INTERCONNECT_QCOM_MSM8916) += qnoc-msm8916.o
+obj-$(CONFIG_INTERCONNECT_QCOM_MSM8939) += qnoc-msm8939.o
 obj-$(CONFIG_INTERCONNECT_QCOM_MSM8974) += qnoc-msm8974.o
 obj-$(CONFIG_INTERCONNECT_QCOM_OSM_L3) += icc-osm-l3.o
 obj-$(CONFIG_INTERCONNECT_QCOM_QCS404) += qnoc-qcs404.o
 obj-$(CONFIG_INTERCONNECT_QCOM_RPMH) += icc-rpmh.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SC7180) += qnoc-sc7180.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SDM845) += qnoc-sdm845.o
+obj-$(CONFIG_INTERCONNECT_QCOM_SDX55) += qnoc-sdx55.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SM8150) += qnoc-sm8150.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SM8250) += qnoc-sm8250.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SMD_RPM) += icc-smd-rpm.o
diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
new file mode 100644
index 000000000000..cc6095492cbe
--- /dev/null
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Linaro Ltd
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "smd-rpm.h"
+#include "icc-rpm.h"
+
+static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+	struct qcom_icc_provider *qp;
+	struct qcom_icc_node *qn;
+	struct icc_provider *provider;
+	struct icc_node *n;
+	u64 sum_bw;
+	u64 max_peak_bw;
+	u64 rate;
+	u32 agg_avg = 0;
+	u32 agg_peak = 0;
+	int ret, i;
+
+	qn = src->data;
+	provider = src->provider;
+	qp = to_qcom_provider(provider);
+
+	list_for_each_entry(n, &provider->nodes, node_list)
+		provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
+				    &agg_avg, &agg_peak);
+
+	sum_bw = icc_units_to_bps(agg_avg);
+	max_peak_bw = icc_units_to_bps(agg_peak);
+
+	/* send bandwidth request message to the RPM processor */
+	if (qn->mas_rpm_id != -1) {
+		ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
+					    RPM_BUS_MASTER_REQ,
+					    qn->mas_rpm_id,
+					    sum_bw);
+		if (ret) {
+			pr_err("qcom_icc_rpm_smd_send mas %d error %d\n",
+			       qn->mas_rpm_id, ret);
+			return ret;
+		}
+	}
+
+	if (qn->slv_rpm_id != -1) {
+		ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
+					    RPM_BUS_SLAVE_REQ,
+					    qn->slv_rpm_id,
+					    sum_bw);
+		if (ret) {
+			pr_err("qcom_icc_rpm_smd_send slv error %d\n",
+			       ret);
+			return ret;
+		}
+	}
+
+	rate = max(sum_bw, max_peak_bw);
+
+	do_div(rate, qn->buswidth);
+
+	if (qn->rate == rate)
+		return 0;
+
+	for (i = 0; i < qp->num_clks; i++) {
+		ret = clk_set_rate(qp->bus_clks[i].clk, rate);
+		if (ret) {
+			pr_err("%s clk_set_rate error: %d\n",
+			       qp->bus_clks[i].id, ret);
+			return ret;
+		}
+	}
+
+	qn->rate = rate;
+
+	return 0;
+}
+
+int qnoc_probe(struct platform_device *pdev, size_t cd_size, int cd_num,
+	       const struct clk_bulk_data *cd)
+{
+	struct device *dev = &pdev->dev;
+	const struct qcom_icc_desc *desc;
+	struct icc_onecell_data *data;
+	struct icc_provider *provider;
+	struct qcom_icc_node **qnodes;
+	struct qcom_icc_provider *qp;
+	struct icc_node *node;
+	size_t num_nodes, i;
+	int ret;
+
+	/* wait for the RPM proxy */
+	if (!qcom_icc_rpm_smd_available())
+		return -EPROBE_DEFER;
+
+	desc = of_device_get_match_data(dev);
+	if (!desc)
+		return -EINVAL;
+
+	qnodes = desc->nodes;
+	num_nodes = desc->num_nodes;
+
+	qp = devm_kzalloc(dev, sizeof(*qp), GFP_KERNEL);
+	if (!qp)
+		return -ENOMEM;
+
+	data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	qp->bus_clks = devm_kmemdup(dev, cd, cd_size,
+				    GFP_KERNEL);
+	if (!qp->bus_clks)
+		return -ENOMEM;
+
+	qp->num_clks = cd_num;
+	ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
+	if (ret)
+		return ret;
+
+	ret = clk_bulk_prepare_enable(qp->num_clks, qp->bus_clks);
+	if (ret)
+		return ret;
+
+	provider = &qp->provider;
+	INIT_LIST_HEAD(&provider->nodes);
+	provider->dev = dev;
+	provider->set = qcom_icc_set;
+	provider->aggregate = icc_std_aggregate;
+	provider->xlate = of_icc_xlate_onecell;
+	provider->data = data;
+
+	ret = icc_provider_add(provider);
+	if (ret) {
+		dev_err(dev, "error adding interconnect provider: %d\n", ret);
+		clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
+		return ret;
+	}
+
+	for (i = 0; i < num_nodes; i++) {
+		size_t j;
+
+		node = icc_node_create(qnodes[i]->id);
+		if (IS_ERR(node)) {
+			ret = PTR_ERR(node);
+			goto err;
+		}
+
+		node->name = qnodes[i]->name;
+		node->data = qnodes[i];
+		icc_node_add(node, provider);
+
+		for (j = 0; j < qnodes[i]->num_links; j++)
+			icc_link_create(node, qnodes[i]->links[j]);
+
+		data->nodes[i] = node;
+	}
+	data->num_nodes = num_nodes;
+
+	platform_set_drvdata(pdev, qp);
+
+	return 0;
+err:
+	icc_nodes_remove(provider);
+	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
+	icc_provider_del(provider);
+
+	return ret;
+}
+EXPORT_SYMBOL(qnoc_probe);
+
+int qnoc_remove(struct platform_device *pdev)
+{
+	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+
+	icc_nodes_remove(&qp->provider);
+	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
+	return icc_provider_del(&qp->provider);
+}
+EXPORT_SYMBOL(qnoc_remove);
diff --git a/drivers/interconnect/qcom/icc-rpm.h b/drivers/interconnect/qcom/icc-rpm.h
new file mode 100644
index 000000000000..79a6f68249c1
--- /dev/null
+++ b/drivers/interconnect/qcom/icc-rpm.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Linaro Ltd
+ */
+
+#ifndef __DRIVERS_INTERCONNECT_QCOM_ICC_RPM_H
+#define __DRIVERS_INTERCONNECT_QCOM_ICC_RPM_H
+
+#define RPM_BUS_MASTER_REQ	0x73616d62
+#define RPM_BUS_SLAVE_REQ	0x766c7362
+
+#define QCOM_MAX_LINKS 12
+
+#define to_qcom_provider(_provider) \
+	container_of(_provider, struct qcom_icc_provider, provider)
+
+/**
+ * struct qcom_icc_provider - Qualcomm specific interconnect provider
+ * @provider: generic interconnect provider
+ * @bus_clks: the clk_bulk_data table of bus clocks
+ * @num_clks: the total number of clk_bulk_data entries
+ */
+struct qcom_icc_provider {
+	struct icc_provider provider;
+	struct clk_bulk_data *bus_clks;
+	int num_clks;
+};
+
+/**
+ * struct qcom_icc_node - Qualcomm specific interconnect nodes
+ * @name: the node name used in debugfs
+ * @id: a unique node identifier
+ * @links: an array of nodes where we can go next while traversing
+ * @num_links: the total number of @links
+ * @buswidth: width of the interconnect between a node and the bus (bytes)
+ * @mas_rpm_id:	RPM id for devices that are bus masters
+ * @slv_rpm_id:	RPM id for devices that are bus slaves
+ * @rate: current bus clock rate in Hz
+ */
+struct qcom_icc_node {
+	unsigned char *name;
+	u16 id;
+	u16 links[QCOM_MAX_LINKS];
+	u16 num_links;
+	u16 buswidth;
+	int mas_rpm_id;
+	int slv_rpm_id;
+	u64 rate;
+};
+
+struct qcom_icc_desc {
+	struct qcom_icc_node **nodes;
+	size_t num_nodes;
+};
+
+#define DEFINE_QNODE(_name, _id, _buswidth, _mas_rpm_id, _slv_rpm_id,	\
+		     ...)						\
+		static struct qcom_icc_node _name = {			\
+		.name = #_name,						\
+		.id = _id,						\
+		.buswidth = _buswidth,					\
+		.mas_rpm_id = _mas_rpm_id,				\
+		.slv_rpm_id = _slv_rpm_id,				\
+		.num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),	\
+		.links = { __VA_ARGS__ },				\
+	}
+
+
+int qnoc_probe(struct platform_device *pdev, size_t cd_size, int cd_num,
+	       const struct clk_bulk_data *cd);
+int qnoc_remove(struct platform_device *pdev);
+
+#endif
diff --git a/drivers/interconnect/qcom/msm8916.c b/drivers/interconnect/qcom/msm8916.c
index e8371d40ab8d..fc3689c8947a 100644
--- a/drivers/interconnect/qcom/msm8916.c
+++ b/drivers/interconnect/qcom/msm8916.c
@@ -15,9 +15,7 @@
 #include <dt-bindings/interconnect/qcom,msm8916.h>
 
 #include "smd-rpm.h"
-
-#define RPM_BUS_MASTER_REQ      0x73616d62
-#define RPM_BUS_SLAVE_REQ       0x766c7362
+#include "icc-rpm.h"
 
 enum {
 	MSM8916_BIMC_SNOC_MAS = 1,
@@ -107,67 +105,11 @@ enum {
 	MSM8916_SNOC_PNOC_SLV,
 };
 
-#define to_msm8916_provider(_provider) \
-	container_of(_provider, struct msm8916_icc_provider, provider)
-
 static const struct clk_bulk_data msm8916_bus_clocks[] = {
 	{ .id = "bus" },
 	{ .id = "bus_a" },
 };
 
-/**
- * struct msm8916_icc_provider - Qualcomm specific interconnect provider
- * @provider: generic interconnect provider
- * @bus_clks: the clk_bulk_data table of bus clocks
- * @num_clks: the total number of clk_bulk_data entries
- */
-struct msm8916_icc_provider {
-	struct icc_provider provider;
-	struct clk_bulk_data *bus_clks;
-	int num_clks;
-};
-
-#define MSM8916_MAX_LINKS	8
-
-/**
- * struct msm8916_icc_node - Qualcomm specific interconnect nodes
- * @name: the node name used in debugfs
- * @id: a unique node identifier
- * @links: an array of nodes where we can go next while traversing
- * @num_links: the total number of @links
- * @buswidth: width of the interconnect between a node and the bus (bytes)
- * @mas_rpm_id:	RPM ID for devices that are bus masters
- * @slv_rpm_id:	RPM ID for devices that are bus slaves
- * @rate: current bus clock rate in Hz
- */
-struct msm8916_icc_node {
-	unsigned char *name;
-	u16 id;
-	u16 links[MSM8916_MAX_LINKS];
-	u16 num_links;
-	u16 buswidth;
-	int mas_rpm_id;
-	int slv_rpm_id;
-	u64 rate;
-};
-
-struct msm8916_icc_desc {
-	struct msm8916_icc_node **nodes;
-	size_t num_nodes;
-};
-
-#define DEFINE_QNODE(_name, _id, _buswidth, _mas_rpm_id, _slv_rpm_id,	\
-					...)				\
-		static struct msm8916_icc_node _name = {		\
-		.name = #_name,						\
-		.id = _id,						\
-		.buswidth = _buswidth,					\
-		.mas_rpm_id = _mas_rpm_id,				\
-		.slv_rpm_id = _slv_rpm_id,				\
-		.num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),	\
-		.links = { __VA_ARGS__ },				\
-	}
-
 DEFINE_QNODE(bimc_snoc_mas, MSM8916_BIMC_SNOC_MAS, 8, -1, -1, MSM8916_BIMC_SNOC_SLV);
 DEFINE_QNODE(bimc_snoc_slv, MSM8916_BIMC_SNOC_SLV, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_1);
 DEFINE_QNODE(mas_apss, MSM8916_MASTER_AMPSS_M0, 8, -1, -1, MSM8916_SLAVE_EBI_CH0, MSM8916_BIMC_SNOC_MAS, MSM8916_SLAVE_AMPSS_L2);
@@ -254,7 +196,7 @@ DEFINE_QNODE(snoc_int_bimc, MSM8916_SNOC_INT_BIMC, 8, 101, 132, MSM8916_SNOC_BIM
 DEFINE_QNODE(snoc_pcnoc_mas, MSM8916_SNOC_PNOC_MAS, 8, -1, -1, MSM8916_SNOC_PNOC_SLV);
 DEFINE_QNODE(snoc_pcnoc_slv, MSM8916_SNOC_PNOC_SLV, 8, -1, -1, MSM8916_PNOC_INT_0);
 
-static struct msm8916_icc_node *msm8916_snoc_nodes[] = {
+static struct qcom_icc_node *msm8916_snoc_nodes[] = {
 	[BIMC_SNOC_SLV] = &bimc_snoc_slv,
 	[MASTER_JPEG] = &mas_jpeg,
 	[MASTER_MDP_PORT0] = &mas_mdp,
@@ -283,12 +225,12 @@ static struct msm8916_icc_node *msm8916_snoc_nodes[] = {
 	[SNOC_QDSS_INT] = &qdss_int,
 };
 
-static struct msm8916_icc_desc msm8916_snoc = {
+static struct qcom_icc_desc msm8916_snoc = {
 	.nodes = msm8916_snoc_nodes,
 	.num_nodes = ARRAY_SIZE(msm8916_snoc_nodes),
 };
 
-static struct msm8916_icc_node *msm8916_bimc_nodes[] = {
+static struct qcom_icc_node *msm8916_bimc_nodes[] = {
 	[BIMC_SNOC_MAS] = &bimc_snoc_mas,
 	[MASTER_AMPSS_M0] = &mas_apss,
 	[MASTER_GRAPHICS_3D] = &mas_gfx,
@@ -300,12 +242,12 @@ static struct msm8916_icc_node *msm8916_bimc_nodes[] = {
 	[SNOC_BIMC_1_SLV] = &snoc_bimc_1_slv,
 };
 
-static struct msm8916_icc_desc msm8916_bimc = {
+static struct qcom_icc_desc msm8916_bimc = {
 	.nodes = msm8916_bimc_nodes,
 	.num_nodes = ARRAY_SIZE(msm8916_bimc_nodes),
 };
 
-static struct msm8916_icc_node *msm8916_pcnoc_nodes[] = {
+static struct qcom_icc_node *msm8916_pcnoc_nodes[] = {
 	[MASTER_BLSP_1] = &mas_blsp_1,
 	[MASTER_DEHR] = &mas_dehr,
 	[MASTER_LPASS] = &mas_audio,
@@ -358,178 +300,15 @@ static struct msm8916_icc_node *msm8916_pcnoc_nodes[] = {
 	[SNOC_PCNOC_SLV] = &snoc_pcnoc_slv,
 };
 
-static struct msm8916_icc_desc msm8916_pcnoc = {
+static struct qcom_icc_desc msm8916_pcnoc = {
 	.nodes = msm8916_pcnoc_nodes,
 	.num_nodes = ARRAY_SIZE(msm8916_pcnoc_nodes),
 };
 
-static int msm8916_icc_set(struct icc_node *src, struct icc_node *dst)
-{
-	struct msm8916_icc_provider *qp;
-	struct msm8916_icc_node *qn;
-	u64 sum_bw, max_peak_bw, rate;
-	u32 agg_avg = 0, agg_peak = 0;
-	struct icc_provider *provider;
-	struct icc_node *n;
-	int ret, i;
-
-	qn = src->data;
-	provider = src->provider;
-	qp = to_msm8916_provider(provider);
-
-	list_for_each_entry(n, &provider->nodes, node_list)
-		provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
-				    &agg_avg, &agg_peak);
-
-	sum_bw = icc_units_to_bps(agg_avg);
-	max_peak_bw = icc_units_to_bps(agg_peak);
-
-	/* send bandwidth request message to the RPM processor */
-	if (qn->mas_rpm_id != -1) {
-		ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-					    RPM_BUS_MASTER_REQ,
-					    qn->mas_rpm_id,
-					    sum_bw);
-		if (ret) {
-			pr_err("qcom_icc_rpm_smd_send mas %d error %d\n",
-			       qn->mas_rpm_id, ret);
-			return ret;
-		}
-	}
-
-	if (qn->slv_rpm_id != -1) {
-		ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-					    RPM_BUS_SLAVE_REQ,
-					    qn->slv_rpm_id,
-					    sum_bw);
-		if (ret) {
-			pr_err("qcom_icc_rpm_smd_send slv error %d\n",
-			       ret);
-			return ret;
-		}
-	}
-
-	rate = max(sum_bw, max_peak_bw);
-
-	do_div(rate, qn->buswidth);
-
-	if (qn->rate == rate)
-		return 0;
-
-	for (i = 0; i < qp->num_clks; i++) {
-		ret = clk_set_rate(qp->bus_clks[i].clk, rate);
-		if (ret) {
-			pr_err("%s clk_set_rate error: %d\n",
-			       qp->bus_clks[i].id, ret);
-			return ret;
-		}
-	}
-
-	qn->rate = rate;
-
-	return 0;
-}
-
 static int msm8916_qnoc_probe(struct platform_device *pdev)
 {
-	const struct msm8916_icc_desc *desc;
-	struct msm8916_icc_node **qnodes;
-	struct msm8916_icc_provider *qp;
-	struct device *dev = &pdev->dev;
-	struct icc_onecell_data *data;
-	struct icc_provider *provider;
-	struct icc_node *node;
-	size_t num_nodes, i;
-	int ret;
-
-	/* wait for the RPM proxy */
-	if (!qcom_icc_rpm_smd_available())
-		return -EPROBE_DEFER;
-
-	desc = of_device_get_match_data(dev);
-	if (!desc)
-		return -EINVAL;
-
-	qnodes = desc->nodes;
-	num_nodes = desc->num_nodes;
-
-	qp = devm_kzalloc(dev, sizeof(*qp), GFP_KERNEL);
-	if (!qp)
-		return -ENOMEM;
-
-	data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
-			    GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	qp->bus_clks = devm_kmemdup(dev, msm8916_bus_clocks,
-				    sizeof(msm8916_bus_clocks), GFP_KERNEL);
-	if (!qp->bus_clks)
-		return -ENOMEM;
-
-	qp->num_clks = ARRAY_SIZE(msm8916_bus_clocks);
-	ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
-	if (ret)
-		return ret;
-
-	ret = clk_bulk_prepare_enable(qp->num_clks, qp->bus_clks);
-	if (ret)
-		return ret;
-
-	provider = &qp->provider;
-	INIT_LIST_HEAD(&provider->nodes);
-	provider->dev = dev;
-	provider->set = msm8916_icc_set;
-	provider->aggregate = icc_std_aggregate;
-	provider->xlate = of_icc_xlate_onecell;
-	provider->data = data;
-
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err(dev, "error adding interconnect provider: %d\n", ret);
-		clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-		return ret;
-	}
-
-	for (i = 0; i < num_nodes; i++) {
-		size_t j;
-
-		node = icc_node_create(qnodes[i]->id);
-		if (IS_ERR(node)) {
-			ret = PTR_ERR(node);
-			goto err;
-		}
-
-		node->name = qnodes[i]->name;
-		node->data = qnodes[i];
-		icc_node_add(node, provider);
-
-		for (j = 0; j < qnodes[i]->num_links; j++)
-			icc_link_create(node, qnodes[i]->links[j]);
-
-		data->nodes[i] = node;
-	}
-	data->num_nodes = num_nodes;
-
-	platform_set_drvdata(pdev, qp);
-
-	return 0;
-
-err:
-	icc_nodes_remove(provider);
-	icc_provider_del(provider);
-	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-
-	return ret;
-}
-
-static int msm8916_qnoc_remove(struct platform_device *pdev)
-{
-	struct msm8916_icc_provider *qp = platform_get_drvdata(pdev);
-
-	icc_nodes_remove(&qp->provider);
-	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-	return icc_provider_del(&qp->provider);
+	return qnoc_probe(pdev, sizeof(msm8916_bus_clocks),
+			  ARRAY_SIZE(msm8916_bus_clocks), msm8916_bus_clocks);
 }
 
 static const struct of_device_id msm8916_noc_of_match[] = {
@@ -542,7 +321,7 @@ MODULE_DEVICE_TABLE(of, msm8916_noc_of_match);
 
 static struct platform_driver msm8916_noc_driver = {
 	.probe = msm8916_qnoc_probe,
-	.remove = msm8916_qnoc_remove,
+	.remove = qnoc_remove,
 	.driver = {
 		.name = "qnoc-msm8916",
 		.of_match_table = msm8916_noc_of_match,
diff --git a/drivers/interconnect/qcom/msm8939.c b/drivers/interconnect/qcom/msm8939.c
new file mode 100644
index 000000000000..dfbec30ed149
--- /dev/null
+++ b/drivers/interconnect/qcom/msm8939.c
@@ -0,0 +1,355 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Linaro Ltd
+ * Author: Jun Nie <jun.nie@linaro.org>
+ * With reference of msm8916 interconnect driver of Georgi Djakov.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+
+#include <dt-bindings/interconnect/qcom,msm8939.h>
+
+#include "smd-rpm.h"
+#include "icc-rpm.h"
+
+enum {
+	MSM8939_BIMC_SNOC_MAS = 1,
+	MSM8939_BIMC_SNOC_SLV,
+	MSM8939_MASTER_AMPSS_M0,
+	MSM8939_MASTER_LPASS,
+	MSM8939_MASTER_BLSP_1,
+	MSM8939_MASTER_DEHR,
+	MSM8939_MASTER_GRAPHICS_3D,
+	MSM8939_MASTER_JPEG,
+	MSM8939_MASTER_MDP_PORT0,
+	MSM8939_MASTER_MDP_PORT1,
+	MSM8939_MASTER_CPP,
+	MSM8939_MASTER_CRYPTO_CORE0,
+	MSM8939_MASTER_SDCC_1,
+	MSM8939_MASTER_SDCC_2,
+	MSM8939_MASTER_QDSS_BAM,
+	MSM8939_MASTER_QDSS_ETR,
+	MSM8939_MASTER_SNOC_CFG,
+	MSM8939_MASTER_SPDM,
+	MSM8939_MASTER_TCU0,
+	MSM8939_MASTER_USB_HS1,
+	MSM8939_MASTER_USB_HS2,
+	MSM8939_MASTER_VFE,
+	MSM8939_MASTER_VIDEO_P0,
+	MSM8939_SNOC_MM_INT_0,
+	MSM8939_SNOC_MM_INT_1,
+	MSM8939_SNOC_MM_INT_2,
+	MSM8939_PNOC_INT_0,
+	MSM8939_PNOC_INT_1,
+	MSM8939_PNOC_MAS_0,
+	MSM8939_PNOC_MAS_1,
+	MSM8939_PNOC_SLV_0,
+	MSM8939_PNOC_SLV_1,
+	MSM8939_PNOC_SLV_2,
+	MSM8939_PNOC_SLV_3,
+	MSM8939_PNOC_SLV_4,
+	MSM8939_PNOC_SLV_8,
+	MSM8939_PNOC_SLV_9,
+	MSM8939_PNOC_SNOC_MAS,
+	MSM8939_PNOC_SNOC_SLV,
+	MSM8939_SNOC_QDSS_INT,
+	MSM8939_SLAVE_AMPSS_L2,
+	MSM8939_SLAVE_APSS,
+	MSM8939_SLAVE_LPASS,
+	MSM8939_SLAVE_BIMC_CFG,
+	MSM8939_SLAVE_BLSP_1,
+	MSM8939_SLAVE_BOOT_ROM,
+	MSM8939_SLAVE_CAMERA_CFG,
+	MSM8939_SLAVE_CATS_128,
+	MSM8939_SLAVE_OCMEM_64,
+	MSM8939_SLAVE_CLK_CTL,
+	MSM8939_SLAVE_CRYPTO_0_CFG,
+	MSM8939_SLAVE_DEHR_CFG,
+	MSM8939_SLAVE_DISPLAY_CFG,
+	MSM8939_SLAVE_EBI_CH0,
+	MSM8939_SLAVE_GRAPHICS_3D_CFG,
+	MSM8939_SLAVE_IMEM_CFG,
+	MSM8939_SLAVE_IMEM,
+	MSM8939_SLAVE_MPM,
+	MSM8939_SLAVE_MSG_RAM,
+	MSM8939_SLAVE_MSS,
+	MSM8939_SLAVE_PDM,
+	MSM8939_SLAVE_PMIC_ARB,
+	MSM8939_SLAVE_PNOC_CFG,
+	MSM8939_SLAVE_PRNG,
+	MSM8939_SLAVE_QDSS_CFG,
+	MSM8939_SLAVE_QDSS_STM,
+	MSM8939_SLAVE_RBCPR_CFG,
+	MSM8939_SLAVE_SDCC_1,
+	MSM8939_SLAVE_SDCC_2,
+	MSM8939_SLAVE_SECURITY,
+	MSM8939_SLAVE_SNOC_CFG,
+	MSM8939_SLAVE_SPDM,
+	MSM8939_SLAVE_SRVC_SNOC,
+	MSM8939_SLAVE_TCSR,
+	MSM8939_SLAVE_TLMM,
+	MSM8939_SLAVE_USB_HS1,
+	MSM8939_SLAVE_USB_HS2,
+	MSM8939_SLAVE_VENUS_CFG,
+	MSM8939_SNOC_BIMC_0_MAS,
+	MSM8939_SNOC_BIMC_0_SLV,
+	MSM8939_SNOC_BIMC_1_MAS,
+	MSM8939_SNOC_BIMC_1_SLV,
+	MSM8939_SNOC_BIMC_2_MAS,
+	MSM8939_SNOC_BIMC_2_SLV,
+	MSM8939_SNOC_INT_0,
+	MSM8939_SNOC_INT_1,
+	MSM8939_SNOC_INT_BIMC,
+	MSM8939_SNOC_PNOC_MAS,
+	MSM8939_SNOC_PNOC_SLV,
+};
+
+static const struct clk_bulk_data msm8939_bus_clocks[] = {
+	{ .id = "bus" },
+	{ .id = "bus_a" },
+};
+
+DEFINE_QNODE(bimc_snoc_mas, MSM8939_BIMC_SNOC_MAS, 8, -1, -1, MSM8939_BIMC_SNOC_SLV);
+DEFINE_QNODE(bimc_snoc_slv, MSM8939_BIMC_SNOC_SLV, 16, -1, 2, MSM8939_SNOC_INT_0, MSM8939_SNOC_INT_1);
+DEFINE_QNODE(mas_apss, MSM8939_MASTER_AMPSS_M0, 16, -1, -1, MSM8939_SLAVE_EBI_CH0, MSM8939_BIMC_SNOC_MAS, MSM8939_SLAVE_AMPSS_L2);
+DEFINE_QNODE(mas_audio, MSM8939_MASTER_LPASS, 4, -1, -1, MSM8939_PNOC_MAS_0);
+DEFINE_QNODE(mas_blsp_1, MSM8939_MASTER_BLSP_1, 4, -1, -1, MSM8939_PNOC_MAS_1);
+DEFINE_QNODE(mas_dehr, MSM8939_MASTER_DEHR, 4, -1, -1, MSM8939_PNOC_MAS_0);
+DEFINE_QNODE(mas_gfx, MSM8939_MASTER_GRAPHICS_3D, 16, -1, -1, MSM8939_SLAVE_EBI_CH0, MSM8939_BIMC_SNOC_MAS, MSM8939_SLAVE_AMPSS_L2);
+DEFINE_QNODE(mas_jpeg, MSM8939_MASTER_JPEG, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_mdp0, MSM8939_MASTER_MDP_PORT0, 16, -1, -1, MSM8939_SNOC_MM_INT_1, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_mdp1, MSM8939_MASTER_MDP_PORT1, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_cpp, MSM8939_MASTER_CPP, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_pcnoc_crypto_0, MSM8939_MASTER_CRYPTO_CORE0, 8, -1, -1, MSM8939_PNOC_INT_1);
+DEFINE_QNODE(mas_pcnoc_sdcc_1, MSM8939_MASTER_SDCC_1, 8, -1, -1, MSM8939_PNOC_INT_1);
+DEFINE_QNODE(mas_pcnoc_sdcc_2, MSM8939_MASTER_SDCC_2, 8, -1, -1, MSM8939_PNOC_INT_1);
+DEFINE_QNODE(mas_qdss_bam, MSM8939_MASTER_QDSS_BAM, 8, -1, -1, MSM8939_SNOC_QDSS_INT);
+DEFINE_QNODE(mas_qdss_etr, MSM8939_MASTER_QDSS_ETR, 8, -1, -1, MSM8939_SNOC_QDSS_INT);
+DEFINE_QNODE(mas_snoc_cfg, MSM8939_MASTER_SNOC_CFG, 4, 20, -1, MSM8939_SLAVE_SRVC_SNOC);
+DEFINE_QNODE(mas_spdm, MSM8939_MASTER_SPDM, 4, -1, -1, MSM8939_PNOC_MAS_0);
+DEFINE_QNODE(mas_tcu0, MSM8939_MASTER_TCU0, 16, -1, -1, MSM8939_SLAVE_EBI_CH0, MSM8939_BIMC_SNOC_MAS, MSM8939_SLAVE_AMPSS_L2);
+DEFINE_QNODE(mas_usb_hs1, MSM8939_MASTER_USB_HS1, 4, -1, -1, MSM8939_PNOC_MAS_1);
+DEFINE_QNODE(mas_usb_hs2, MSM8939_MASTER_USB_HS2, 4, -1, -1, MSM8939_PNOC_MAS_1);
+DEFINE_QNODE(mas_vfe, MSM8939_MASTER_VFE, 16, -1, -1, MSM8939_SNOC_MM_INT_1, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_video, MSM8939_MASTER_VIDEO_P0, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mm_int_0, MSM8939_SNOC_MM_INT_0, 16, -1, -1, MSM8939_SNOC_BIMC_2_MAS);
+DEFINE_QNODE(mm_int_1, MSM8939_SNOC_MM_INT_1, 16, -1, -1, MSM8939_SNOC_BIMC_1_MAS);
+DEFINE_QNODE(mm_int_2, MSM8939_SNOC_MM_INT_2, 16, -1, -1, MSM8939_SNOC_INT_0);
+DEFINE_QNODE(pcnoc_int_0, MSM8939_PNOC_INT_0, 8, -1, -1, MSM8939_PNOC_SNOC_MAS, MSM8939_PNOC_SLV_0, MSM8939_PNOC_SLV_1, MSM8939_PNOC_SLV_2, MSM8939_PNOC_SLV_3, MSM8939_PNOC_SLV_4, MSM8939_PNOC_SLV_8, MSM8939_PNOC_SLV_9);
+DEFINE_QNODE(pcnoc_int_1, MSM8939_PNOC_INT_1, 8, -1, -1, MSM8939_PNOC_SNOC_MAS);
+DEFINE_QNODE(pcnoc_m_0, MSM8939_PNOC_MAS_0, 8, -1, -1, MSM8939_PNOC_INT_0);
+DEFINE_QNODE(pcnoc_m_1, MSM8939_PNOC_MAS_1, 8, -1, -1, MSM8939_PNOC_SNOC_MAS);
+DEFINE_QNODE(pcnoc_s_0, MSM8939_PNOC_SLV_0, 4, -1, -1, MSM8939_SLAVE_CLK_CTL, MSM8939_SLAVE_TLMM, MSM8939_SLAVE_TCSR, MSM8939_SLAVE_SECURITY, MSM8939_SLAVE_MSS);
+DEFINE_QNODE(pcnoc_s_1, MSM8939_PNOC_SLV_1, 4, -1, -1, MSM8939_SLAVE_IMEM_CFG, MSM8939_SLAVE_CRYPTO_0_CFG, MSM8939_SLAVE_MSG_RAM, MSM8939_SLAVE_PDM, MSM8939_SLAVE_PRNG);
+DEFINE_QNODE(pcnoc_s_2, MSM8939_PNOC_SLV_2, 4, -1, -1, MSM8939_SLAVE_SPDM, MSM8939_SLAVE_BOOT_ROM, MSM8939_SLAVE_BIMC_CFG, MSM8939_SLAVE_PNOC_CFG, MSM8939_SLAVE_PMIC_ARB);
+DEFINE_QNODE(pcnoc_s_3, MSM8939_PNOC_SLV_3, 4, -1, -1, MSM8939_SLAVE_MPM, MSM8939_SLAVE_SNOC_CFG, MSM8939_SLAVE_RBCPR_CFG, MSM8939_SLAVE_QDSS_CFG, MSM8939_SLAVE_DEHR_CFG);
+DEFINE_QNODE(pcnoc_s_4, MSM8939_PNOC_SLV_4, 4, -1, -1, MSM8939_SLAVE_VENUS_CFG, MSM8939_SLAVE_CAMERA_CFG, MSM8939_SLAVE_DISPLAY_CFG);
+DEFINE_QNODE(pcnoc_s_8, MSM8939_PNOC_SLV_8, 4, -1, -1, MSM8939_SLAVE_USB_HS1, MSM8939_SLAVE_SDCC_1, MSM8939_SLAVE_BLSP_1);
+DEFINE_QNODE(pcnoc_s_9, MSM8939_PNOC_SLV_9, 4, -1, -1, MSM8939_SLAVE_SDCC_2, MSM8939_SLAVE_LPASS, MSM8939_SLAVE_USB_HS2);
+DEFINE_QNODE(pcnoc_snoc_mas, MSM8939_PNOC_SNOC_MAS, 8, 29, -1, MSM8939_PNOC_SNOC_SLV);
+DEFINE_QNODE(pcnoc_snoc_slv, MSM8939_PNOC_SNOC_SLV, 8, -1, 45, MSM8939_SNOC_INT_0, MSM8939_SNOC_INT_BIMC, MSM8939_SNOC_INT_1);
+DEFINE_QNODE(qdss_int, MSM8939_SNOC_QDSS_INT, 8, -1, -1, MSM8939_SNOC_INT_0, MSM8939_SNOC_INT_BIMC);
+DEFINE_QNODE(slv_apps_l2, MSM8939_SLAVE_AMPSS_L2, 16, -1, -1, 0);
+DEFINE_QNODE(slv_apss, MSM8939_SLAVE_APSS, 4, -1, 20, 0);
+DEFINE_QNODE(slv_audio, MSM8939_SLAVE_LPASS, 4, -1, -1, 0);
+DEFINE_QNODE(slv_bimc_cfg, MSM8939_SLAVE_BIMC_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_blsp_1, MSM8939_SLAVE_BLSP_1, 4, -1, -1, 0);
+DEFINE_QNODE(slv_boot_rom, MSM8939_SLAVE_BOOT_ROM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_camera_cfg, MSM8939_SLAVE_CAMERA_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_cats_0, MSM8939_SLAVE_CATS_128, 16, -1, 106, 0);
+DEFINE_QNODE(slv_cats_1, MSM8939_SLAVE_OCMEM_64, 8, -1, 107, 0);
+DEFINE_QNODE(slv_clk_ctl, MSM8939_SLAVE_CLK_CTL, 4, -1, -1, 0);
+DEFINE_QNODE(slv_crypto_0_cfg, MSM8939_SLAVE_CRYPTO_0_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_dehr_cfg, MSM8939_SLAVE_DEHR_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_display_cfg, MSM8939_SLAVE_DISPLAY_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_ebi_ch0, MSM8939_SLAVE_EBI_CH0, 16, -1, 0, 0);
+DEFINE_QNODE(slv_gfx_cfg, MSM8939_SLAVE_GRAPHICS_3D_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_imem_cfg, MSM8939_SLAVE_IMEM_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_imem, MSM8939_SLAVE_IMEM, 8, -1, 26, 0);
+DEFINE_QNODE(slv_mpm, MSM8939_SLAVE_MPM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_msg_ram, MSM8939_SLAVE_MSG_RAM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_mss, MSM8939_SLAVE_MSS, 4, -1, -1, 0);
+DEFINE_QNODE(slv_pdm, MSM8939_SLAVE_PDM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_pmic_arb, MSM8939_SLAVE_PMIC_ARB, 4, -1, -1, 0);
+DEFINE_QNODE(slv_pcnoc_cfg, MSM8939_SLAVE_PNOC_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_prng, MSM8939_SLAVE_PRNG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_qdss_cfg, MSM8939_SLAVE_QDSS_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_qdss_stm, MSM8939_SLAVE_QDSS_STM, 4, -1, 30, 0);
+DEFINE_QNODE(slv_rbcpr_cfg, MSM8939_SLAVE_RBCPR_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_sdcc_1, MSM8939_SLAVE_SDCC_1, 4, -1, -1, 0);
+DEFINE_QNODE(slv_sdcc_2, MSM8939_SLAVE_SDCC_2, 4, -1, -1, 0);
+DEFINE_QNODE(slv_security, MSM8939_SLAVE_SECURITY, 4, -1, -1, 0);
+DEFINE_QNODE(slv_snoc_cfg, MSM8939_SLAVE_SNOC_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_spdm, MSM8939_SLAVE_SPDM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_srvc_snoc, MSM8939_SLAVE_SRVC_SNOC, 8, -1, 29, 0);
+DEFINE_QNODE(slv_tcsr, MSM8939_SLAVE_TCSR, 4, -1, -1, 0);
+DEFINE_QNODE(slv_tlmm, MSM8939_SLAVE_TLMM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_usb_hs1, MSM8939_SLAVE_USB_HS1, 4, -1, -1, 0);
+DEFINE_QNODE(slv_usb_hs2, MSM8939_SLAVE_USB_HS2, 4, -1, -1, 0);
+DEFINE_QNODE(slv_venus_cfg, MSM8939_SLAVE_VENUS_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(snoc_bimc_0_mas, MSM8939_SNOC_BIMC_0_MAS, 16, 3, -1, MSM8939_SNOC_BIMC_0_SLV);
+DEFINE_QNODE(snoc_bimc_0_slv, MSM8939_SNOC_BIMC_0_SLV, 16, -1, 24, MSM8939_SLAVE_EBI_CH0);
+DEFINE_QNODE(snoc_bimc_1_mas, MSM8939_SNOC_BIMC_1_MAS, 16, 76, -1, MSM8939_SNOC_BIMC_1_SLV);
+DEFINE_QNODE(snoc_bimc_1_slv, MSM8939_SNOC_BIMC_1_SLV, 16, -1, 104, MSM8939_SLAVE_EBI_CH0);
+DEFINE_QNODE(snoc_bimc_2_mas, MSM8939_SNOC_BIMC_2_MAS, 16, -1, -1, MSM8939_SNOC_BIMC_2_SLV);
+DEFINE_QNODE(snoc_bimc_2_slv, MSM8939_SNOC_BIMC_2_SLV, 16, -1, -1, MSM8939_SLAVE_EBI_CH0);
+DEFINE_QNODE(snoc_int_0, MSM8939_SNOC_INT_0, 8, 99, 130, MSM8939_SLAVE_QDSS_STM, MSM8939_SLAVE_IMEM, MSM8939_SNOC_PNOC_MAS);
+DEFINE_QNODE(snoc_int_1, MSM8939_SNOC_INT_1, 8, 100, 131, MSM8939_SLAVE_APSS, MSM8939_SLAVE_CATS_128, MSM8939_SLAVE_OCMEM_64);
+DEFINE_QNODE(snoc_int_bimc, MSM8939_SNOC_INT_BIMC, 8, 101, 132, MSM8939_SNOC_BIMC_1_MAS);
+DEFINE_QNODE(snoc_pcnoc_mas, MSM8939_SNOC_PNOC_MAS, 8, -1, -1, MSM8939_SNOC_PNOC_SLV);
+DEFINE_QNODE(snoc_pcnoc_slv, MSM8939_SNOC_PNOC_SLV, 8, -1, -1, MSM8939_PNOC_INT_0);
+
+static struct qcom_icc_node *msm8939_snoc_nodes[] = {
+	[BIMC_SNOC_SLV] = &bimc_snoc_slv,
+	[MASTER_QDSS_BAM] = &mas_qdss_bam,
+	[MASTER_QDSS_ETR] = &mas_qdss_etr,
+	[MASTER_SNOC_CFG] = &mas_snoc_cfg,
+	[PCNOC_SNOC_SLV] = &pcnoc_snoc_slv,
+	[SLAVE_APSS] = &slv_apss,
+	[SLAVE_CATS_128] = &slv_cats_0,
+	[SLAVE_OCMEM_64] = &slv_cats_1,
+	[SLAVE_IMEM] = &slv_imem,
+	[SLAVE_QDSS_STM] = &slv_qdss_stm,
+	[SLAVE_SRVC_SNOC] = &slv_srvc_snoc,
+	[SNOC_BIMC_0_MAS] = &snoc_bimc_0_mas,
+	[SNOC_BIMC_1_MAS] = &snoc_bimc_1_mas,
+	[SNOC_BIMC_2_MAS] = &snoc_bimc_2_mas,
+	[SNOC_INT_0] = &snoc_int_0,
+	[SNOC_INT_1] = &snoc_int_1,
+	[SNOC_INT_BIMC] = &snoc_int_bimc,
+	[SNOC_PCNOC_MAS] = &snoc_pcnoc_mas,
+	[SNOC_QDSS_INT] = &qdss_int,
+};
+
+static struct qcom_icc_desc msm8939_snoc = {
+	.nodes = msm8939_snoc_nodes,
+	.num_nodes = ARRAY_SIZE(msm8939_snoc_nodes),
+};
+
+static struct qcom_icc_node *msm8939_snoc_mm_nodes[] = {
+	[MASTER_VIDEO_P0] = &mas_video,
+	[MASTER_JPEG] = &mas_jpeg,
+	[MASTER_VFE] = &mas_vfe,
+	[MASTER_MDP_PORT0] = &mas_mdp0,
+	[MASTER_MDP_PORT1] = &mas_mdp1,
+	[MASTER_CPP] = &mas_cpp,
+	[SNOC_MM_INT_0] = &mm_int_0,
+	[SNOC_MM_INT_1] = &mm_int_1,
+	[SNOC_MM_INT_2] = &mm_int_2,
+};
+
+static struct qcom_icc_desc msm8939_snoc_mm = {
+	.nodes = msm8939_snoc_mm_nodes,
+	.num_nodes = ARRAY_SIZE(msm8939_snoc_mm_nodes),
+};
+
+static struct qcom_icc_node *msm8939_bimc_nodes[] = {
+	[BIMC_SNOC_MAS] = &bimc_snoc_mas,
+	[MASTER_AMPSS_M0] = &mas_apss,
+	[MASTER_GRAPHICS_3D] = &mas_gfx,
+	[MASTER_TCU0] = &mas_tcu0,
+	[SLAVE_AMPSS_L2] = &slv_apps_l2,
+	[SLAVE_EBI_CH0] = &slv_ebi_ch0,
+	[SNOC_BIMC_0_SLV] = &snoc_bimc_0_slv,
+	[SNOC_BIMC_1_SLV] = &snoc_bimc_1_slv,
+	[SNOC_BIMC_2_SLV] = &snoc_bimc_2_slv,
+};
+
+static struct qcom_icc_desc msm8939_bimc = {
+	.nodes = msm8939_bimc_nodes,
+	.num_nodes = ARRAY_SIZE(msm8939_bimc_nodes),
+};
+
+static struct qcom_icc_node *msm8939_pcnoc_nodes[] = {
+	[MASTER_BLSP_1] = &mas_blsp_1,
+	[MASTER_DEHR] = &mas_dehr,
+	[MASTER_LPASS] = &mas_audio,
+	[MASTER_CRYPTO_CORE0] = &mas_pcnoc_crypto_0,
+	[MASTER_SDCC_1] = &mas_pcnoc_sdcc_1,
+	[MASTER_SDCC_2] = &mas_pcnoc_sdcc_2,
+	[MASTER_SPDM] = &mas_spdm,
+	[MASTER_USB_HS1] = &mas_usb_hs1,
+	[MASTER_USB_HS2] = &mas_usb_hs2,
+	[PCNOC_INT_0] = &pcnoc_int_0,
+	[PCNOC_INT_1] = &pcnoc_int_1,
+	[PCNOC_MAS_0] = &pcnoc_m_0,
+	[PCNOC_MAS_1] = &pcnoc_m_1,
+	[PCNOC_SLV_0] = &pcnoc_s_0,
+	[PCNOC_SLV_1] = &pcnoc_s_1,
+	[PCNOC_SLV_2] = &pcnoc_s_2,
+	[PCNOC_SLV_3] = &pcnoc_s_3,
+	[PCNOC_SLV_4] = &pcnoc_s_4,
+	[PCNOC_SLV_8] = &pcnoc_s_8,
+	[PCNOC_SLV_9] = &pcnoc_s_9,
+	[PCNOC_SNOC_MAS] = &pcnoc_snoc_mas,
+	[SLAVE_BIMC_CFG] = &slv_bimc_cfg,
+	[SLAVE_BLSP_1] = &slv_blsp_1,
+	[SLAVE_BOOT_ROM] = &slv_boot_rom,
+	[SLAVE_CAMERA_CFG] = &slv_camera_cfg,
+	[SLAVE_CLK_CTL] = &slv_clk_ctl,
+	[SLAVE_CRYPTO_0_CFG] = &slv_crypto_0_cfg,
+	[SLAVE_DEHR_CFG] = &slv_dehr_cfg,
+	[SLAVE_DISPLAY_CFG] = &slv_display_cfg,
+	[SLAVE_GRAPHICS_3D_CFG] = &slv_gfx_cfg,
+	[SLAVE_IMEM_CFG] = &slv_imem_cfg,
+	[SLAVE_LPASS] = &slv_audio,
+	[SLAVE_MPM] = &slv_mpm,
+	[SLAVE_MSG_RAM] = &slv_msg_ram,
+	[SLAVE_MSS] = &slv_mss,
+	[SLAVE_PDM] = &slv_pdm,
+	[SLAVE_PMIC_ARB] = &slv_pmic_arb,
+	[SLAVE_PCNOC_CFG] = &slv_pcnoc_cfg,
+	[SLAVE_PRNG] = &slv_prng,
+	[SLAVE_QDSS_CFG] = &slv_qdss_cfg,
+	[SLAVE_RBCPR_CFG] = &slv_rbcpr_cfg,
+	[SLAVE_SDCC_1] = &slv_sdcc_1,
+	[SLAVE_SDCC_2] = &slv_sdcc_2,
+	[SLAVE_SECURITY] = &slv_security,
+	[SLAVE_SNOC_CFG] = &slv_snoc_cfg,
+	[SLAVE_SPDM] = &slv_spdm,
+	[SLAVE_TCSR] = &slv_tcsr,
+	[SLAVE_TLMM] = &slv_tlmm,
+	[SLAVE_USB_HS1] = &slv_usb_hs1,
+	[SLAVE_USB_HS2] = &slv_usb_hs2,
+	[SLAVE_VENUS_CFG] = &slv_venus_cfg,
+	[SNOC_PCNOC_SLV] = &snoc_pcnoc_slv,
+};
+
+static struct qcom_icc_desc msm8939_pcnoc = {
+	.nodes = msm8939_pcnoc_nodes,
+	.num_nodes = ARRAY_SIZE(msm8939_pcnoc_nodes),
+};
+
+static int msm8939_qnoc_probe(struct platform_device *pdev)
+{
+	return qnoc_probe(pdev, sizeof(msm8939_bus_clocks),
+			  ARRAY_SIZE(msm8939_bus_clocks), msm8939_bus_clocks);
+}
+
+static const struct of_device_id msm8939_noc_of_match[] = {
+	{ .compatible = "qcom,msm8939-bimc", .data = &msm8939_bimc },
+	{ .compatible = "qcom,msm8939-pcnoc", .data = &msm8939_pcnoc },
+	{ .compatible = "qcom,msm8939-snoc", .data = &msm8939_snoc },
+	{ .compatible = "qcom,msm8939-snoc-mm", .data = &msm8939_snoc_mm },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, msm8939_noc_of_match);
+
+static struct platform_driver msm8939_noc_driver = {
+	.probe = msm8939_qnoc_probe,
+	.remove = qnoc_remove,
+	.driver = {
+		.name = "qnoc-msm8939",
+		.of_match_table = msm8939_noc_of_match,
+	},
+};
+module_platform_driver(msm8939_noc_driver);
+MODULE_AUTHOR("Jun Nie <jun.nie@linaro.org>");
+MODULE_DESCRIPTION("Qualcomm MSM8939 NoC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/qcs404.c b/drivers/interconnect/qcom/qcs404.c
index 9820709b43db..36a7e30a00be 100644
--- a/drivers/interconnect/qcom/qcs404.c
+++ b/drivers/interconnect/qcom/qcs404.c
@@ -9,15 +9,12 @@
 #include <linux/interconnect-provider.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
+#include <linux/of_device.h>
 
-#include "smd-rpm.h"
 
-#define RPM_BUS_MASTER_REQ	0x73616d62
-#define RPM_BUS_SLAVE_REQ	0x766c7362
+#include "smd-rpm.h"
+#include "icc-rpm.h"
 
 enum {
 	QCS404_MASTER_AMPSS_M0 = 1,
@@ -95,67 +92,11 @@ enum {
 	QCS404_SLAVE_LPASS,
 };
 
-#define to_qcom_provider(_provider) \
-	container_of(_provider, struct qcom_icc_provider, provider)
-
-static const struct clk_bulk_data bus_clocks[] = {
+static const struct clk_bulk_data qcs404_bus_clocks[] = {
 	{ .id = "bus" },
 	{ .id = "bus_a" },
 };
 
-/**
- * struct qcom_icc_provider - Qualcomm specific interconnect provider
- * @provider: generic interconnect provider
- * @bus_clks: the clk_bulk_data table of bus clocks
- * @num_clks: the total number of clk_bulk_data entries
- */
-struct qcom_icc_provider {
-	struct icc_provider provider;
-	struct clk_bulk_data *bus_clks;
-	int num_clks;
-};
-
-#define QCS404_MAX_LINKS	12
-
-/**
- * struct qcom_icc_node - Qualcomm specific interconnect nodes
- * @name: the node name used in debugfs
- * @id: a unique node identifier
- * @links: an array of nodes where we can go next while traversing
- * @num_links: the total number of @links
- * @buswidth: width of the interconnect between a node and the bus (bytes)
- * @mas_rpm_id:	RPM id for devices that are bus masters
- * @slv_rpm_id:	RPM id for devices that are bus slaves
- * @rate: current bus clock rate in Hz
- */
-struct qcom_icc_node {
-	unsigned char *name;
-	u16 id;
-	u16 links[QCS404_MAX_LINKS];
-	u16 num_links;
-	u16 buswidth;
-	int mas_rpm_id;
-	int slv_rpm_id;
-	u64 rate;
-};
-
-struct qcom_icc_desc {
-	struct qcom_icc_node **nodes;
-	size_t num_nodes;
-};
-
-#define DEFINE_QNODE(_name, _id, _buswidth, _mas_rpm_id, _slv_rpm_id,	\
-		     ...)						\
-		static struct qcom_icc_node _name = {			\
-		.name = #_name,						\
-		.id = _id,						\
-		.buswidth = _buswidth,					\
-		.mas_rpm_id = _mas_rpm_id,				\
-		.slv_rpm_id = _slv_rpm_id,				\
-		.num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),	\
-		.links = { __VA_ARGS__ },				\
-	}
-
 DEFINE_QNODE(mas_apps_proc, QCS404_MASTER_AMPSS_M0, 8, 0, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
 DEFINE_QNODE(mas_oxili, QCS404_MASTER_GRAPHICS_3D, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
 DEFINE_QNODE(mas_mdp, QCS404_MASTER_MDP_PORT0, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
@@ -327,178 +268,11 @@ static struct qcom_icc_desc qcs404_snoc = {
 	.num_nodes = ARRAY_SIZE(qcs404_snoc_nodes),
 };
 
-static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
-{
-	struct qcom_icc_provider *qp;
-	struct qcom_icc_node *qn;
-	struct icc_provider *provider;
-	struct icc_node *n;
-	u64 sum_bw;
-	u64 max_peak_bw;
-	u64 rate;
-	u32 agg_avg = 0;
-	u32 agg_peak = 0;
-	int ret, i;
-
-	qn = src->data;
-	provider = src->provider;
-	qp = to_qcom_provider(provider);
-
-	list_for_each_entry(n, &provider->nodes, node_list)
-		provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
-				    &agg_avg, &agg_peak);
-
-	sum_bw = icc_units_to_bps(agg_avg);
-	max_peak_bw = icc_units_to_bps(agg_peak);
-
-	/* send bandwidth request message to the RPM processor */
-	if (qn->mas_rpm_id != -1) {
-		ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-					    RPM_BUS_MASTER_REQ,
-					    qn->mas_rpm_id,
-					    sum_bw);
-		if (ret) {
-			pr_err("qcom_icc_rpm_smd_send mas %d error %d\n",
-			       qn->mas_rpm_id, ret);
-			return ret;
-		}
-	}
-
-	if (qn->slv_rpm_id != -1) {
-		ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-					    RPM_BUS_SLAVE_REQ,
-					    qn->slv_rpm_id,
-					    sum_bw);
-		if (ret) {
-			pr_err("qcom_icc_rpm_smd_send slv error %d\n",
-			       ret);
-			return ret;
-		}
-	}
-
-	rate = max(sum_bw, max_peak_bw);
-
-	do_div(rate, qn->buswidth);
-
-	if (qn->rate == rate)
-		return 0;
-
-	for (i = 0; i < qp->num_clks; i++) {
-		ret = clk_set_rate(qp->bus_clks[i].clk, rate);
-		if (ret) {
-			pr_err("%s clk_set_rate error: %d\n",
-			       qp->bus_clks[i].id, ret);
-			return ret;
-		}
-	}
-
-	qn->rate = rate;
 
-	return 0;
-}
-
-static int qnoc_probe(struct platform_device *pdev)
+static int qcs404_qnoc_probe(struct platform_device *pdev)
 {
-	struct device *dev = &pdev->dev;
-	const struct qcom_icc_desc *desc;
-	struct icc_onecell_data *data;
-	struct icc_provider *provider;
-	struct qcom_icc_node **qnodes;
-	struct qcom_icc_provider *qp;
-	struct icc_node *node;
-	size_t num_nodes, i;
-	int ret;
-
-	/* wait for the RPM proxy */
-	if (!qcom_icc_rpm_smd_available())
-		return -EPROBE_DEFER;
-
-	desc = of_device_get_match_data(dev);
-	if (!desc)
-		return -EINVAL;
-
-	qnodes = desc->nodes;
-	num_nodes = desc->num_nodes;
-
-	qp = devm_kzalloc(dev, sizeof(*qp), GFP_KERNEL);
-	if (!qp)
-		return -ENOMEM;
-
-	data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
-			    GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	qp->bus_clks = devm_kmemdup(dev, bus_clocks, sizeof(bus_clocks),
-				    GFP_KERNEL);
-	if (!qp->bus_clks)
-		return -ENOMEM;
-
-	qp->num_clks = ARRAY_SIZE(bus_clocks);
-	ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
-	if (ret)
-		return ret;
-
-	ret = clk_bulk_prepare_enable(qp->num_clks, qp->bus_clks);
-	if (ret)
-		return ret;
-
-	provider = &qp->provider;
-	INIT_LIST_HEAD(&provider->nodes);
-	provider->dev = dev;
-	provider->set = qcom_icc_set;
-	provider->aggregate = icc_std_aggregate;
-	provider->xlate = of_icc_xlate_onecell;
-	provider->data = data;
-
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err(dev, "error adding interconnect provider: %d\n", ret);
-		clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-		return ret;
-	}
-
-	for (i = 0; i < num_nodes; i++) {
-		size_t j;
-
-		node = icc_node_create(qnodes[i]->id);
-		if (IS_ERR(node)) {
-			ret = PTR_ERR(node);
-			goto err;
-		}
-
-		node->name = qnodes[i]->name;
-		node->data = qnodes[i];
-		icc_node_add(node, provider);
-
-		dev_dbg(dev, "registered node %s\n", node->name);
-
-		/* populate links */
-		for (j = 0; j < qnodes[i]->num_links; j++)
-			icc_link_create(node, qnodes[i]->links[j]);
-
-		data->nodes[i] = node;
-	}
-	data->num_nodes = num_nodes;
-
-	platform_set_drvdata(pdev, qp);
-
-	return 0;
-err:
-	icc_nodes_remove(provider);
-	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-	icc_provider_del(provider);
-
-	return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
-	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
-	icc_nodes_remove(&qp->provider);
-	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-	return icc_provider_del(&qp->provider);
+	return qnoc_probe(pdev, sizeof(qcs404_bus_clocks),
+			  ARRAY_SIZE(qcs404_bus_clocks), qcs404_bus_clocks);
 }
 
 static const struct of_device_id qcs404_noc_of_match[] = {
@@ -510,7 +284,7 @@ static const struct of_device_id qcs404_noc_of_match[] = {
 MODULE_DEVICE_TABLE(of, qcs404_noc_of_match);
 
 static struct platform_driver qcs404_noc_driver = {
-	.probe = qnoc_probe,
+	.probe = qcs404_qnoc_probe,
 	.remove = qnoc_remove,
 	.driver = {
 		.name = "qnoc-qcs404",
diff --git a/drivers/interconnect/qcom/sdx55.c b/drivers/interconnect/qcom/sdx55.c
new file mode 100644
index 000000000000..a5a122ee3d21
--- /dev/null
+++ b/drivers/interconnect/qcom/sdx55.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Qualcomm SDX55 interconnect driver
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ *
+ * Copyright (c) 2021, Linaro Ltd.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/interconnect.h>
+#include <linux/interconnect-provider.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <dt-bindings/interconnect/qcom,sdx55.h>
+
+#include "bcm-voter.h"
+#include "icc-rpmh.h"
+#include "sdx55.h"
+
+DEFINE_QNODE(ipa_core_master, SDX55_MASTER_IPA_CORE, 1, 8, SDX55_SLAVE_IPA_CORE);
+DEFINE_QNODE(llcc_mc, SDX55_MASTER_LLCC, 4, 4, SDX55_SLAVE_EBI_CH0);
+DEFINE_QNODE(acm_tcu, SDX55_MASTER_TCU_0, 1, 8, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC);
+DEFINE_QNODE(qnm_snoc_gc, SDX55_MASTER_SNOC_GC_MEM_NOC, 1, 8, SDX55_SLAVE_LLCC);
+DEFINE_QNODE(xm_apps_rdwr, SDX55_MASTER_AMPSS_M0, 1, 16, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC);
+DEFINE_QNODE(qhm_audio, SDX55_MASTER_AUDIO, 1, 4, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(qhm_blsp1, SDX55_MASTER_BLSP_1, 1, 4, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(qhm_qdss_bam, SDX55_MASTER_QDSS_BAM, 1, 4, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_TLMM, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qhm_qpic, SDX55_MASTER_QPIC, 1, 4, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO);
+DEFINE_QNODE(qhm_snoc_cfg, SDX55_MASTER_SNOC_CFG, 1, 4, SDX55_SLAVE_SERVICE_SNOC);
+DEFINE_QNODE(qhm_spmi_fetcher1, SDX55_MASTER_SPMI_FETCHER, 1, 4, SDX55_SLAVE_AOSS, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP);
+DEFINE_QNODE(qnm_aggre_noc, SDX55_MASTER_ANOC_SNOC, 1, 8, SDX55_SLAVE_PCIE_0, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_TLMM, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QDSS_STM, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_USB3, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_APPSS, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qnm_ipa, SDX55_MASTER_IPA, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_QDSS_STM, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_TLMM, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qnm_memnoc, SDX55_MASTER_MEM_NOC_SNOC, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_TLMM, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QDSS_STM, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_APPSS,  SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qnm_memnoc_pcie, SDX55_MASTER_MEM_NOC_PCIE_SNOC, 1, 8, SDX55_SLAVE_PCIE_0);
+DEFINE_QNODE(qxm_crypto, SDX55_MASTER_CRYPTO_CORE_0, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP);
+DEFINE_QNODE(xm_emac, SDX55_MASTER_EMAC, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(xm_ipa2pcie_slv, SDX55_MASTER_IPA_PCIE, 1, 8, SDX55_SLAVE_PCIE_0);
+DEFINE_QNODE(xm_pcie, SDX55_MASTER_PCIE, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(xm_qdss_etr, SDX55_MASTER_QDSS_ETR, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(xm_sdc1, SDX55_MASTER_SDCC_1, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO);
+DEFINE_QNODE(xm_usb3, SDX55_MASTER_USB3, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(ipa_core_slave, SDX55_SLAVE_IPA_CORE, 1, 8);
+DEFINE_QNODE(ebi, SDX55_SLAVE_EBI_CH0, 1, 4);
+DEFINE_QNODE(qns_llcc, SDX55_SLAVE_LLCC, 1, 16, SDX55_SLAVE_EBI_CH0);
+DEFINE_QNODE(qns_memnoc_snoc, SDX55_SLAVE_MEM_NOC_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_SNOC);
+DEFINE_QNODE(qns_sys_pcie, SDX55_SLAVE_MEM_NOC_PCIE_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_PCIE_SNOC);
+DEFINE_QNODE(qhs_aop, SDX55_SLAVE_AOP, 1, 4);
+DEFINE_QNODE(qhs_aoss, SDX55_SLAVE_AOSS, 1, 4);
+DEFINE_QNODE(qhs_apss, SDX55_SLAVE_APPSS, 1, 4);
+DEFINE_QNODE(qhs_audio, SDX55_SLAVE_AUDIO, 1, 4);
+DEFINE_QNODE(qhs_blsp1, SDX55_SLAVE_BLSP_1, 1, 4);
+DEFINE_QNODE(qhs_clk_ctl, SDX55_SLAVE_CLK_CTL, 1, 4);
+DEFINE_QNODE(qhs_crypto0_cfg, SDX55_SLAVE_CRYPTO_0_CFG, 1, 4);
+DEFINE_QNODE(qhs_ddrss_cfg, SDX55_SLAVE_CNOC_DDRSS, 1, 4);
+DEFINE_QNODE(qhs_ecc_cfg, SDX55_SLAVE_ECC_CFG, 1, 4);
+DEFINE_QNODE(qhs_emac_cfg, SDX55_SLAVE_EMAC_CFG, 1, 4);
+DEFINE_QNODE(qhs_imem_cfg, SDX55_SLAVE_IMEM_CFG, 1, 4);
+DEFINE_QNODE(qhs_ipa, SDX55_SLAVE_IPA_CFG, 1, 4);
+DEFINE_QNODE(qhs_mss_cfg, SDX55_SLAVE_CNOC_MSS, 1, 4);
+DEFINE_QNODE(qhs_pcie_parf, SDX55_SLAVE_PCIE_PARF, 1, 4);
+DEFINE_QNODE(qhs_pdm, SDX55_SLAVE_PDM, 1, 4);
+DEFINE_QNODE(qhs_prng, SDX55_SLAVE_PRNG, 1, 4);
+DEFINE_QNODE(qhs_qdss_cfg, SDX55_SLAVE_QDSS_CFG, 1, 4);
+DEFINE_QNODE(qhs_qpic, SDX55_SLAVE_QPIC, 1, 4);
+DEFINE_QNODE(qhs_sdc1, SDX55_SLAVE_SDCC_1, 1, 4);
+DEFINE_QNODE(qhs_snoc_cfg, SDX55_SLAVE_SNOC_CFG, 1, 4, SDX55_MASTER_SNOC_CFG);
+DEFINE_QNODE(qhs_spmi_fetcher, SDX55_SLAVE_SPMI_FETCHER, 1, 4);
+DEFINE_QNODE(qhs_spmi_vgi_coex, SDX55_SLAVE_SPMI_VGI_COEX, 1, 4);
+DEFINE_QNODE(qhs_tcsr, SDX55_SLAVE_TCSR, 1, 4);
+DEFINE_QNODE(qhs_tlmm, SDX55_SLAVE_TLMM, 1, 4);
+DEFINE_QNODE(qhs_usb3, SDX55_SLAVE_USB3, 1, 4);
+DEFINE_QNODE(qhs_usb3_phy, SDX55_SLAVE_USB3_PHY_CFG, 1, 4);
+DEFINE_QNODE(qns_aggre_noc, SDX55_SLAVE_ANOC_SNOC, 1, 8, SDX55_MASTER_ANOC_SNOC);
+DEFINE_QNODE(qns_snoc_memnoc, SDX55_SLAVE_SNOC_MEM_NOC_GC, 1, 8, SDX55_MASTER_SNOC_GC_MEM_NOC);
+DEFINE_QNODE(qxs_imem, SDX55_SLAVE_OCIMEM, 1, 8);
+DEFINE_QNODE(srvc_snoc, SDX55_SLAVE_SERVICE_SNOC, 1, 4);
+DEFINE_QNODE(xs_pcie, SDX55_SLAVE_PCIE_0, 1, 8);
+DEFINE_QNODE(xs_qdss_stm, SDX55_SLAVE_QDSS_STM, 1, 4);
+DEFINE_QNODE(xs_sys_tcu_cfg, SDX55_SLAVE_TCU, 1, 8);
+
+DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
+DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
+DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
+DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
+DEFINE_QBCM(bcm_pn0, "PN0", false, &qhm_snoc_cfg);
+DEFINE_QBCM(bcm_sh3, "SH3", false, &xm_apps_rdwr);
+DEFINE_QBCM(bcm_sh4, "SH4", false, &qns_memnoc_snoc, &qns_sys_pcie);
+DEFINE_QBCM(bcm_sn0, "SN0", true, &qns_snoc_memnoc);
+DEFINE_QBCM(bcm_sn1, "SN1", false, &qxs_imem);
+DEFINE_QBCM(bcm_pn1, "PN1", false, &xm_sdc1);
+DEFINE_QBCM(bcm_pn2, "PN2", false, &qhm_audio, &qhm_spmi_fetcher1);
+DEFINE_QBCM(bcm_sn3, "SN3", false, &xs_qdss_stm);
+DEFINE_QBCM(bcm_pn3, "PN3", false, &qhm_blsp1, &qhm_qpic);
+DEFINE_QBCM(bcm_sn4, "SN4", false, &xs_sys_tcu_cfg);
+DEFINE_QBCM(bcm_pn5, "PN5", false, &qxm_crypto);
+DEFINE_QBCM(bcm_sn6, "SN6", false, &xs_pcie);
+DEFINE_QBCM(bcm_sn7, "SN7", false, &qnm_aggre_noc, &xm_emac, &xm_emac, &xm_usb3,
+	    &qns_aggre_noc);
+DEFINE_QBCM(bcm_sn8, "SN8", false, &qhm_qdss_bam, &xm_qdss_etr);
+DEFINE_QBCM(bcm_sn9, "SN9", false, &qnm_memnoc);
+DEFINE_QBCM(bcm_sn10, "SN10", false, &qnm_memnoc_pcie);
+DEFINE_QBCM(bcm_sn11, "SN11", false, &qnm_ipa, &xm_ipa2pcie_slv);
+
+static struct qcom_icc_bcm *mc_virt_bcms[] = {
+	&bcm_mc0,
+};
+
+static struct qcom_icc_node *mc_virt_nodes[] = {
+	[MASTER_LLCC] = &llcc_mc,
+	[SLAVE_EBI_CH0] = &ebi,
+};
+
+static const struct qcom_icc_desc sdx55_mc_virt = {
+	.nodes = mc_virt_nodes,
+	.num_nodes = ARRAY_SIZE(mc_virt_nodes),
+	.bcms = mc_virt_bcms,
+	.num_bcms = ARRAY_SIZE(mc_virt_bcms),
+};
+
+static struct qcom_icc_bcm *mem_noc_bcms[] = {
+	&bcm_sh0,
+	&bcm_sh3,
+	&bcm_sh4,
+};
+
+static struct qcom_icc_node *mem_noc_nodes[] = {
+	[MASTER_TCU_0] = &acm_tcu,
+	[MASTER_SNOC_GC_MEM_NOC] = &qnm_snoc_gc,
+	[MASTER_AMPSS_M0] = &xm_apps_rdwr,
+	[SLAVE_LLCC] = &qns_llcc,
+	[SLAVE_MEM_NOC_SNOC] = &qns_memnoc_snoc,
+	[SLAVE_MEM_NOC_PCIE_SNOC] = &qns_sys_pcie,
+};
+
+static const struct qcom_icc_desc sdx55_mem_noc = {
+	.nodes = mem_noc_nodes,
+	.num_nodes = ARRAY_SIZE(mem_noc_nodes),
+	.bcms = mem_noc_bcms,
+	.num_bcms = ARRAY_SIZE(mem_noc_bcms),
+};
+
+static struct qcom_icc_bcm *system_noc_bcms[] = {
+	&bcm_ce0,
+	&bcm_pn0,
+	&bcm_pn1,
+	&bcm_pn2,
+	&bcm_pn3,
+	&bcm_pn5,
+	&bcm_sn0,
+	&bcm_sn1,
+	&bcm_sn3,
+	&bcm_sn4,
+	&bcm_sn6,
+	&bcm_sn7,
+	&bcm_sn8,
+	&bcm_sn9,
+	&bcm_sn10,
+	&bcm_sn11,
+};
+
+static struct qcom_icc_node *system_noc_nodes[] = {
+	[MASTER_AUDIO] = &qhm_audio,
+	[MASTER_BLSP_1] = &qhm_blsp1,
+	[MASTER_QDSS_BAM] = &qhm_qdss_bam,
+	[MASTER_QPIC] = &qhm_qpic,
+	[MASTER_SNOC_CFG] = &qhm_snoc_cfg,
+	[MASTER_SPMI_FETCHER] = &qhm_spmi_fetcher1,
+	[MASTER_ANOC_SNOC] = &qnm_aggre_noc,
+	[MASTER_IPA] = &qnm_ipa,
+	[MASTER_MEM_NOC_SNOC] = &qnm_memnoc,
+	[MASTER_MEM_NOC_PCIE_SNOC] = &qnm_memnoc_pcie,
+	[MASTER_CRYPTO_CORE_0] = &qxm_crypto,
+	[MASTER_EMAC] = &xm_emac,
+	[MASTER_IPA_PCIE] = &xm_ipa2pcie_slv,
+	[MASTER_PCIE] = &xm_pcie,
+	[MASTER_QDSS_ETR] = &xm_qdss_etr,
+	[MASTER_SDCC_1] = &xm_sdc1,
+	[MASTER_USB3] = &xm_usb3,
+	[SLAVE_AOP] = &qhs_aop,
+	[SLAVE_AOSS] = &qhs_aoss,
+	[SLAVE_APPSS] = &qhs_apss,
+	[SLAVE_AUDIO] = &qhs_audio,
+	[SLAVE_BLSP_1] = &qhs_blsp1,
+	[SLAVE_CLK_CTL] = &qhs_clk_ctl,
+	[SLAVE_CRYPTO_0_CFG] = &qhs_crypto0_cfg,
+	[SLAVE_CNOC_DDRSS] = &qhs_ddrss_cfg,
+	[SLAVE_ECC_CFG] = &qhs_ecc_cfg,
+	[SLAVE_EMAC_CFG] = &qhs_emac_cfg,
+	[SLAVE_IMEM_CFG] = &qhs_imem_cfg,
+	[SLAVE_IPA_CFG] = &qhs_ipa,
+	[SLAVE_CNOC_MSS] = &qhs_mss_cfg,
+	[SLAVE_PCIE_PARF] = &qhs_pcie_parf,
+	[SLAVE_PDM] = &qhs_pdm,
+	[SLAVE_PRNG] = &qhs_prng,
+	[SLAVE_QDSS_CFG] = &qhs_qdss_cfg,
+	[SLAVE_QPIC] = &qhs_qpic,
+	[SLAVE_SDCC_1] = &qhs_sdc1,
+	[SLAVE_SNOC_CFG] = &qhs_snoc_cfg,
+	[SLAVE_SPMI_FETCHER] = &qhs_spmi_fetcher,
+	[SLAVE_SPMI_VGI_COEX] = &qhs_spmi_vgi_coex,
+	[SLAVE_TCSR] = &qhs_tcsr,
+	[SLAVE_TLMM] = &qhs_tlmm,
+	[SLAVE_USB3] = &qhs_usb3,
+	[SLAVE_USB3_PHY_CFG] = &qhs_usb3_phy,
+	[SLAVE_ANOC_SNOC] = &qns_aggre_noc,
+	[SLAVE_SNOC_MEM_NOC_GC] = &qns_snoc_memnoc,
+	[SLAVE_OCIMEM] = &qxs_imem,
+	[SLAVE_SERVICE_SNOC] = &srvc_snoc,
+	[SLAVE_PCIE_0] = &xs_pcie,
+	[SLAVE_QDSS_STM] = &xs_qdss_stm,
+	[SLAVE_TCU] = &xs_sys_tcu_cfg,
+};
+
+static const struct qcom_icc_desc sdx55_system_noc = {
+	.nodes = system_noc_nodes,
+	.num_nodes = ARRAY_SIZE(system_noc_nodes),
+	.bcms = system_noc_bcms,
+	.num_bcms = ARRAY_SIZE(system_noc_bcms),
+};
+
+static struct qcom_icc_bcm *ipa_virt_bcms[] = {
+	&bcm_ip0,
+};
+
+static struct qcom_icc_node *ipa_virt_nodes[] = {
+	[MASTER_IPA_CORE] = &ipa_core_master,
+	[SLAVE_IPA_CORE] = &ipa_core_slave,
+};
+
+static const struct qcom_icc_desc sdx55_ipa_virt = {
+	.nodes = ipa_virt_nodes,
+	.num_nodes = ARRAY_SIZE(ipa_virt_nodes),
+	.bcms = ipa_virt_bcms,
+	.num_bcms = ARRAY_SIZE(ipa_virt_bcms),
+};
+
+static int qnoc_probe(struct platform_device *pdev)
+{
+	const struct qcom_icc_desc *desc;
+	struct icc_onecell_data *data;
+	struct icc_provider *provider;
+	struct qcom_icc_node **qnodes;
+	struct qcom_icc_provider *qp;
+	struct icc_node *node;
+	size_t num_nodes, i;
+	int ret;
+
+	desc = device_get_match_data(&pdev->dev);
+	if (!desc)
+		return -EINVAL;
+
+	qnodes = desc->nodes;
+	num_nodes = desc->num_nodes;
+
+	qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
+	if (!qp)
+		return -ENOMEM;
+
+	data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	provider = &qp->provider;
+	provider->dev = &pdev->dev;
+	provider->set = qcom_icc_set;
+	provider->pre_aggregate = qcom_icc_pre_aggregate;
+	provider->aggregate = qcom_icc_aggregate;
+	provider->xlate = of_icc_xlate_onecell;
+	INIT_LIST_HEAD(&provider->nodes);
+	provider->data = data;
+
+	qp->dev = &pdev->dev;
+	qp->bcms = desc->bcms;
+	qp->num_bcms = desc->num_bcms;
+
+	qp->voter = of_bcm_voter_get(qp->dev, NULL);
+	if (IS_ERR(qp->voter))
+		return PTR_ERR(qp->voter);
+
+	ret = icc_provider_add(provider);
+	if (ret) {
+		dev_err(&pdev->dev, "error adding interconnect provider\n");
+		return ret;
+	}
+
+	for (i = 0; i < qp->num_bcms; i++)
+		qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
+
+	for (i = 0; i < num_nodes; i++) {
+		size_t j;
+
+		if (!qnodes[i])
+			continue;
+
+		node = icc_node_create(qnodes[i]->id);
+		if (IS_ERR(node)) {
+			ret = PTR_ERR(node);
+			goto err;
+		}
+
+		node->name = qnodes[i]->name;
+		node->data = qnodes[i];
+		icc_node_add(node, provider);
+
+		for (j = 0; j < qnodes[i]->num_links; j++)
+			icc_link_create(node, qnodes[i]->links[j]);
+
+		data->nodes[i] = node;
+	}
+	data->num_nodes = num_nodes;
+
+	platform_set_drvdata(pdev, qp);
+
+	return 0;
+err:
+	icc_nodes_remove(provider);
+	icc_provider_del(provider);
+	return ret;
+}
+
+static int qnoc_remove(struct platform_device *pdev)
+{
+	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+
+	icc_nodes_remove(&qp->provider);
+	return icc_provider_del(&qp->provider);
+}
+
+static const struct of_device_id qnoc_of_match[] = {
+	{ .compatible = "qcom,sdx55-mc-virt",
+	  .data = &sdx55_mc_virt},
+	{ .compatible = "qcom,sdx55-mem-noc",
+	  .data = &sdx55_mem_noc},
+	{ .compatible = "qcom,sdx55-system-noc",
+	  .data = &sdx55_system_noc},
+	{ .compatible = "qcom,sdx55-ipa-virt",
+	  .data = &sdx55_ipa_virt},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, qnoc_of_match);
+
+static struct platform_driver qnoc_driver = {
+	.probe = qnoc_probe,
+	.remove = qnoc_remove,
+	.driver = {
+		.name = "qnoc-sdx55",
+		.of_match_table = qnoc_of_match,
+		.sync_state = icc_sync_state,
+	},
+};
+module_platform_driver(qnoc_driver);
+
+MODULE_DESCRIPTION("Qualcomm SDX55 NoC driver");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/sdx55.h b/drivers/interconnect/qcom/sdx55.h
new file mode 100644
index 000000000000..deff8afe0631
--- /dev/null
+++ b/drivers/interconnect/qcom/sdx55.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021, Linaro Ltd.
+ */
+
+#ifndef __DRIVERS_INTERCONNECT_QCOM_SDX55_H
+#define __DRIVERS_INTERCONNECT_QCOM_SDX55_H
+
+#define SDX55_MASTER_IPA_CORE			0
+#define SDX55_MASTER_LLCC			1
+#define SDX55_MASTER_TCU_0			2
+#define SDX55_MASTER_SNOC_GC_MEM_NOC		3
+#define SDX55_MASTER_AMPSS_M0			4
+#define SDX55_MASTER_AUDIO			5
+#define SDX55_MASTER_BLSP_1			6
+#define SDX55_MASTER_QDSS_BAM			7
+#define SDX55_MASTER_QPIC			8
+#define SDX55_MASTER_SNOC_CFG			9
+#define SDX55_MASTER_SPMI_FETCHER		10
+#define SDX55_MASTER_ANOC_SNOC			11
+#define SDX55_MASTER_IPA			12
+#define SDX55_MASTER_MEM_NOC_SNOC		13
+#define SDX55_MASTER_MEM_NOC_PCIE_SNOC		14
+#define SDX55_MASTER_CRYPTO_CORE_0		15
+#define SDX55_MASTER_EMAC			16
+#define SDX55_MASTER_IPA_PCIE			17
+#define SDX55_MASTER_PCIE			18
+#define SDX55_MASTER_QDSS_ETR			19
+#define SDX55_MASTER_SDCC_1			20
+#define SDX55_MASTER_USB3			21
+#define SDX55_SLAVE_IPA_CORE			22
+#define SDX55_SLAVE_EBI_CH0			23
+#define SDX55_SLAVE_LLCC			24
+#define SDX55_SLAVE_MEM_NOC_SNOC		25
+#define SDX55_SLAVE_MEM_NOC_PCIE_SNOC		26
+#define SDX55_SLAVE_ANOC_SNOC			27
+#define SDX55_SLAVE_SNOC_CFG			28
+#define SDX55_SLAVE_EMAC_CFG			29
+#define SDX55_SLAVE_USB3			30
+#define SDX55_SLAVE_TLMM			31
+#define SDX55_SLAVE_SPMI_FETCHER		32
+#define SDX55_SLAVE_QDSS_CFG			33
+#define SDX55_SLAVE_PDM				34
+#define SDX55_SLAVE_SNOC_MEM_NOC_GC		35
+#define SDX55_SLAVE_TCSR			36
+#define SDX55_SLAVE_CNOC_DDRSS			37
+#define SDX55_SLAVE_SPMI_VGI_COEX		38
+#define SDX55_SLAVE_QPIC			39
+#define SDX55_SLAVE_OCIMEM			40
+#define SDX55_SLAVE_IPA_CFG			41
+#define SDX55_SLAVE_USB3_PHY_CFG		42
+#define SDX55_SLAVE_AOP				43
+#define SDX55_SLAVE_BLSP_1			44
+#define SDX55_SLAVE_SDCC_1			45
+#define SDX55_SLAVE_CNOC_MSS			46
+#define SDX55_SLAVE_PCIE_PARF			47
+#define SDX55_SLAVE_ECC_CFG			48
+#define SDX55_SLAVE_AUDIO			49
+#define SDX55_SLAVE_AOSS			51
+#define SDX55_SLAVE_PRNG			52
+#define SDX55_SLAVE_CRYPTO_0_CFG		53
+#define SDX55_SLAVE_TCU				54
+#define SDX55_SLAVE_CLK_CTL			55
+#define SDX55_SLAVE_IMEM_CFG			56
+#define SDX55_SLAVE_SERVICE_SNOC		57
+#define SDX55_SLAVE_PCIE_0			58
+#define SDX55_SLAVE_QDSS_STM			59
+#define SDX55_SLAVE_APPSS			60
+
+#endif
diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c
index 9267a85fee18..7de9605cac4f 100644
--- a/drivers/ipack/ipack.c
+++ b/drivers/ipack/ipack.c
@@ -64,9 +64,6 @@ static int ipack_bus_probe(struct device *device)
 	struct ipack_device *dev = to_ipack_dev(device);
 	struct ipack_driver *drv = to_ipack_driver(device->driver);
 
-	if (!drv->ops->probe)
-		return -EINVAL;
-
 	return drv->ops->probe(dev);
 }
 
@@ -75,10 +72,9 @@ static int ipack_bus_remove(struct device *device)
 	struct ipack_device *dev = to_ipack_dev(device);
 	struct ipack_driver *drv = to_ipack_driver(device->driver);
 
-	if (!drv->ops->remove)
-		return -EINVAL;
+	if (drv->ops->remove)
+		drv->ops->remove(dev);
 
-	drv->ops->remove(dev);
 	return 0;
 }
 
@@ -252,6 +248,9 @@ EXPORT_SYMBOL_GPL(ipack_bus_unregister);
 int ipack_driver_register(struct ipack_driver *edrv, struct module *owner,
 			  const char *name)
 {
+	if (!edrv->ops->probe)
+		return -EINVAL;
+
 	edrv->driver.owner = owner;
 	edrv->driver.name = name;
 	edrv->driver.bus = &ipack_bus_type;
diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index 7d9d33d8ebf6..72c0df129d5c 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -137,6 +137,15 @@ config TI_EMIF_SRAM
 	  sequence so this driver provides several relocatable PM functions
 	  for the SoC PM code to use.
 
+config FPGA_DFL_EMIF
+	tristate "FPGA DFL EMIF Driver"
+	depends on FPGA_DFL && HAS_IOMEM
+	help
+	  This driver is for the EMIF private feature implemented under
+	  FPGA Device Feature List (DFL) framework. It is used to expose
+	  memory interface status information as well as memory clearing
+	  control.
+
 config MVEBU_DEVBUS
 	bool "Marvell EBU Device Bus Controller"
 	default y if PLAT_ORION
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index e71cf7b99641..bc7663ed1c25 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -28,6 +28,8 @@ obj-$(CONFIG_STM32_FMC2_EBI)	+= stm32-fmc2-ebi.o
 obj-$(CONFIG_SAMSUNG_MC)	+= samsung/
 obj-$(CONFIG_TEGRA_MC)		+= tegra/
 obj-$(CONFIG_TI_EMIF_SRAM)	+= ti-emif-sram.o
+obj-$(CONFIG_FPGA_DFL_EMIF)	+= dfl-emif.o
+
 ti-emif-sram-objs		:= ti-emif-pm.o ti-emif-sram-pm.o
 
 AFLAGS_ti-emif-sram-pm.o	:=-Wa,-march=armv7-a
diff --git a/drivers/memory/dfl-emif.c b/drivers/memory/dfl-emif.c
new file mode 100644
index 000000000000..3f719816771d
--- /dev/null
+++ b/drivers/memory/dfl-emif.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DFL device driver for EMIF private feature
+ *
+ * Copyright (C) 2020 Intel Corporation, Inc.
+ *
+ */
+#include <linux/bitfield.h>
+#include <linux/dfl.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define FME_FEATURE_ID_EMIF		0x9
+
+#define EMIF_STAT			0x8
+#define EMIF_STAT_INIT_DONE_SFT		0
+#define EMIF_STAT_CALC_FAIL_SFT		8
+#define EMIF_STAT_CLEAR_BUSY_SFT	16
+#define EMIF_CTRL			0x10
+#define EMIF_CTRL_CLEAR_EN_SFT		0
+#define EMIF_CTRL_CLEAR_EN_MSK		GENMASK_ULL(3, 0)
+
+#define EMIF_POLL_INVL			10000 /* us */
+#define EMIF_POLL_TIMEOUT		5000000 /* us */
+
+struct dfl_emif {
+	struct device *dev;
+	void __iomem *base;
+	spinlock_t lock;	/* Serialises access to EMIF_CTRL reg */
+};
+
+struct emif_attr {
+	struct device_attribute attr;
+	u32 shift;
+	u32 index;
+};
+
+#define to_emif_attr(dev_attr) \
+	container_of(dev_attr, struct emif_attr, attr)
+
+static ssize_t emif_state_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct emif_attr *eattr = to_emif_attr(attr);
+	struct dfl_emif *de = dev_get_drvdata(dev);
+	u64 val;
+
+	val = readq(de->base + EMIF_STAT);
+
+	return sysfs_emit(buf, "%u\n",
+			  !!(val & BIT_ULL(eattr->shift + eattr->index)));
+}
+
+static ssize_t emif_clear_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct emif_attr *eattr = to_emif_attr(attr);
+	struct dfl_emif *de = dev_get_drvdata(dev);
+	u64 clear_busy_msk, clear_en_msk, val;
+	void __iomem *base = de->base;
+
+	if (!sysfs_streq(buf, "1"))
+		return -EINVAL;
+
+	clear_busy_msk = BIT_ULL(EMIF_STAT_CLEAR_BUSY_SFT + eattr->index);
+	clear_en_msk = BIT_ULL(EMIF_CTRL_CLEAR_EN_SFT + eattr->index);
+
+	spin_lock(&de->lock);
+	/* The CLEAR_EN field is WO, but other fields are RW */
+	val = readq(base + EMIF_CTRL);
+	val &= ~EMIF_CTRL_CLEAR_EN_MSK;
+	val |= clear_en_msk;
+	writeq(val, base + EMIF_CTRL);
+	spin_unlock(&de->lock);
+
+	if (readq_poll_timeout(base + EMIF_STAT, val,
+			       !(val & clear_busy_msk),
+			       EMIF_POLL_INVL, EMIF_POLL_TIMEOUT)) {
+		dev_err(de->dev, "timeout, fail to clear\n");
+		return -ETIMEDOUT;
+	}
+
+	return count;
+}
+
+#define emif_state_attr(_name, _shift, _index)				\
+	static struct emif_attr emif_attr_##inf##_index##_##_name =	\
+		{ .attr = __ATTR(inf##_index##_##_name, 0444,		\
+				 emif_state_show, NULL),		\
+		  .shift = (_shift), .index = (_index) }
+
+#define emif_clear_attr(_index)						\
+	static struct emif_attr emif_attr_##inf##_index##_clear =	\
+		{ .attr = __ATTR(inf##_index##_clear, 0200,		\
+				 NULL, emif_clear_store),		\
+		  .index = (_index) }
+
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 0);
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 1);
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 2);
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 3);
+
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 0);
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 1);
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 2);
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 3);
+
+emif_clear_attr(0);
+emif_clear_attr(1);
+emif_clear_attr(2);
+emif_clear_attr(3);
+
+static struct attribute *dfl_emif_attrs[] = {
+	&emif_attr_inf0_init_done.attr.attr,
+	&emif_attr_inf0_cal_fail.attr.attr,
+	&emif_attr_inf0_clear.attr.attr,
+
+	&emif_attr_inf1_init_done.attr.attr,
+	&emif_attr_inf1_cal_fail.attr.attr,
+	&emif_attr_inf1_clear.attr.attr,
+
+	&emif_attr_inf2_init_done.attr.attr,
+	&emif_attr_inf2_cal_fail.attr.attr,
+	&emif_attr_inf2_clear.attr.attr,
+
+	&emif_attr_inf3_init_done.attr.attr,
+	&emif_attr_inf3_cal_fail.attr.attr,
+	&emif_attr_inf3_clear.attr.attr,
+
+	NULL,
+};
+
+static umode_t dfl_emif_visible(struct kobject *kobj,
+				struct attribute *attr, int n)
+{
+	struct dfl_emif *de = dev_get_drvdata(kobj_to_dev(kobj));
+	struct emif_attr *eattr = container_of(attr, struct emif_attr,
+					       attr.attr);
+	u64 val;
+
+	/*
+	 * This device supports upto 4 memory interfaces, but not all
+	 * interfaces are used on different platforms. The read out value of
+	 * CLEAN_EN field (which is a bitmap) could tell how many interfaces
+	 * are available.
+	 */
+	val = FIELD_GET(EMIF_CTRL_CLEAR_EN_MSK, readq(de->base + EMIF_CTRL));
+
+	return (val & BIT_ULL(eattr->index)) ? attr->mode : 0;
+}
+
+static const struct attribute_group dfl_emif_group = {
+	.is_visible = dfl_emif_visible,
+	.attrs = dfl_emif_attrs,
+};
+
+static const struct attribute_group *dfl_emif_groups[] = {
+	&dfl_emif_group,
+	NULL,
+};
+
+static int dfl_emif_probe(struct dfl_device *ddev)
+{
+	struct device *dev = &ddev->dev;
+	struct dfl_emif *de;
+
+	de = devm_kzalloc(dev, sizeof(*de), GFP_KERNEL);
+	if (!de)
+		return -ENOMEM;
+
+	de->base = devm_ioremap_resource(dev, &ddev->mmio_res);
+	if (IS_ERR(de->base))
+		return PTR_ERR(de->base);
+
+	de->dev = dev;
+	spin_lock_init(&de->lock);
+	dev_set_drvdata(dev, de);
+
+	return 0;
+}
+
+static const struct dfl_device_id dfl_emif_ids[] = {
+	{ FME_ID, FME_FEATURE_ID_EMIF },
+	{ }
+};
+MODULE_DEVICE_TABLE(dfl, dfl_emif_ids);
+
+static struct dfl_driver dfl_emif_driver = {
+	.drv	= {
+		.name       = "dfl-emif",
+		.dev_groups = dfl_emif_groups,
+	},
+	.id_table = dfl_emif_ids,
+	.probe   = dfl_emif_probe,
+};
+module_dfl_driver(dfl_emif_driver);
+
+MODULE_DESCRIPTION("DFL EMIF driver");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index fafa8b0d8099..f532c59bb59b 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -50,14 +50,6 @@ config AD525X_DPOT_SPI
 	  To compile this driver as a module, choose M here: the
 	  module will be called ad525x_dpot-spi.
 
-config ATMEL_TCLIB
-	bool "Atmel AT32/AT91 Timer/Counter Library"
-	depends on ARCH_AT91
-	help
-	  Select this if you want a library to allocate the Timer/Counter
-	  blocks found on many Atmel processors.  This facilitates using
-	  these blocks by different drivers despite processor differences.
-
 config DUMMY_IRQ
 	tristate "Dummy IRQ handler"
 	help
@@ -112,19 +104,6 @@ config PHANTOM
 	  If you choose to build module, its name will be phantom. If unsure,
 	  say N here.
 
-config INTEL_MID_PTI
-	tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
-	depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST)
-	help
-	  The PTI (Parallel Trace Interface) driver directs
-	  trace data routed from various parts in the system out
-	  through an Intel Penwell PTI port and out of the mobile
-	  device for analysis with a debugging tool (Lauterbach or Fido).
-
-	  You should select this driver if the target kernel is meant for
-	  an Intel Atom (non-netbook) mobile device containing a MIPI
-	  P1149.7 standard implementation.
-
 config TIFM_CORE
 	tristate "TI Flash Media interface support"
 	depends on PCI
@@ -478,6 +457,7 @@ source "drivers/misc/genwqe/Kconfig"
 source "drivers/misc/echo/Kconfig"
 source "drivers/misc/cxl/Kconfig"
 source "drivers/misc/ocxl/Kconfig"
+source "drivers/misc/bcm-vk/Kconfig"
 source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index d23231e73330..99b6f15a3c70 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -8,9 +8,7 @@ obj-$(CONFIG_IBMVMC)		+= ibmvmc.o
 obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
 obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
 obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
-obj-$(CONFIG_INTEL_MID_PTI)	+= pti.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
-obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
 obj-$(CONFIG_DUMMY_IRQ)		+= dummy-irq.o
 obj-$(CONFIG_ICS932S401)	+= ics932s401.o
 obj-$(CONFIG_LKDTM)		+= lkdtm/
@@ -51,6 +49,7 @@ obj-$(CONFIG_ECHO)		+= echo/
 obj-$(CONFIG_CXL_BASE)		+= cxl/
 obj-$(CONFIG_PCI_ENDPOINT_TEST)	+= pci_endpoint_test.o
 obj-$(CONFIG_OCXL)		+= ocxl/
+obj-$(CONFIG_BCM_VK)		+= bcm-vk/
 obj-y				+= cardreader/
 obj-$(CONFIG_PVPANIC)   	+= pvpanic.o
 obj-$(CONFIG_HABANA_AI)		+= habanalabs/
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
deleted file mode 100644
index 7de7840f613c..000000000000
--- a/drivers/misc/atmel_tclib.c
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/of.h>
-#include <soc/at91/atmel_tcb.h>
-
-/*
- * This is a thin library to solve the problem of how to portably allocate
- * one of the TC blocks.  For simplicity, it doesn't currently expect to
- * share individual timers between different drivers.
- */
-
-#if defined(CONFIG_AVR32)
-/* AVR32 has these divide PBB */
-const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
-EXPORT_SYMBOL(atmel_tc_divisors);
-
-#elif defined(CONFIG_ARCH_AT91)
-/* AT91 has these divide MCK */
-const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
-EXPORT_SYMBOL(atmel_tc_divisors);
-
-#endif
-
-static DEFINE_SPINLOCK(tc_list_lock);
-static LIST_HEAD(tc_list);
-
-/**
- * atmel_tc_alloc - allocate a specified TC block
- * @block: which block to allocate
- *
- * Caller allocates a block.  If it is available, a pointer to a
- * pre-initialized struct atmel_tc is returned. The caller can access
- * the registers directly through the "regs" field.
- */
-struct atmel_tc *atmel_tc_alloc(unsigned block)
-{
-	struct atmel_tc		*tc;
-	struct platform_device	*pdev = NULL;
-
-	spin_lock(&tc_list_lock);
-	list_for_each_entry(tc, &tc_list, node) {
-		if (tc->allocated)
-			continue;
-
-		if ((tc->pdev->dev.of_node && tc->id == block) ||
-		    (tc->pdev->id == block)) {
-			pdev = tc->pdev;
-			tc->allocated = true;
-			break;
-		}
-	}
-	spin_unlock(&tc_list_lock);
-
-	return pdev ? tc : NULL;
-}
-EXPORT_SYMBOL_GPL(atmel_tc_alloc);
-
-/**
- * atmel_tc_free - release a specified TC block
- * @tc: Timer/counter block that was returned by atmel_tc_alloc()
- *
- * This reverses the effect of atmel_tc_alloc(), invalidating the resource
- * returned by that routine and making the TC available to other drivers.
- */
-void atmel_tc_free(struct atmel_tc *tc)
-{
-	spin_lock(&tc_list_lock);
-	if (tc->allocated)
-		tc->allocated = false;
-	spin_unlock(&tc_list_lock);
-}
-EXPORT_SYMBOL_GPL(atmel_tc_free);
-
-#if defined(CONFIG_OF)
-static struct atmel_tcb_config tcb_rm9200_config = {
-	.counter_width = 16,
-};
-
-static struct atmel_tcb_config tcb_sam9x5_config = {
-	.counter_width = 32,
-};
-
-static const struct of_device_id atmel_tcb_dt_ids[] = {
-	{
-		.compatible = "atmel,at91rm9200-tcb",
-		.data = &tcb_rm9200_config,
-	}, {
-		.compatible = "atmel,at91sam9x5-tcb",
-		.data = &tcb_sam9x5_config,
-	}, {
-		/* sentinel */
-	}
-};
-
-MODULE_DEVICE_TABLE(of, atmel_tcb_dt_ids);
-#endif
-
-static int __init tc_probe(struct platform_device *pdev)
-{
-	struct atmel_tc *tc;
-	struct clk	*clk;
-	int		irq;
-	unsigned int	i;
-
-	if (of_get_child_count(pdev->dev.of_node))
-		return -EBUSY;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return -EINVAL;
-
-	tc = devm_kzalloc(&pdev->dev, sizeof(struct atmel_tc), GFP_KERNEL);
-	if (!tc)
-		return -ENOMEM;
-
-	tc->pdev = pdev;
-
-	clk = devm_clk_get(&pdev->dev, "t0_clk");
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk");
-	if (IS_ERR(tc->slow_clk))
-		return PTR_ERR(tc->slow_clk);
-
-	tc->regs = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(tc->regs))
-		return PTR_ERR(tc->regs);
-
-	/* Now take SoC information if available */
-	if (pdev->dev.of_node) {
-		const struct of_device_id *match;
-		match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node);
-		if (match)
-			tc->tcb_config = match->data;
-
-		tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb");
-	} else {
-		tc->id = pdev->id;
-	}
-
-	tc->clk[0] = clk;
-	tc->clk[1] = devm_clk_get(&pdev->dev, "t1_clk");
-	if (IS_ERR(tc->clk[1]))
-		tc->clk[1] = clk;
-	tc->clk[2] = devm_clk_get(&pdev->dev, "t2_clk");
-	if (IS_ERR(tc->clk[2]))
-		tc->clk[2] = clk;
-
-	tc->irq[0] = irq;
-	tc->irq[1] = platform_get_irq(pdev, 1);
-	if (tc->irq[1] < 0)
-		tc->irq[1] = irq;
-	tc->irq[2] = platform_get_irq(pdev, 2);
-	if (tc->irq[2] < 0)
-		tc->irq[2] = irq;
-
-	for (i = 0; i < 3; i++)
-		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
-
-	spin_lock(&tc_list_lock);
-	list_add_tail(&tc->node, &tc_list);
-	spin_unlock(&tc_list_lock);
-
-	platform_set_drvdata(pdev, tc);
-
-	return 0;
-}
-
-static void tc_shutdown(struct platform_device *pdev)
-{
-	int i;
-	struct atmel_tc *tc = platform_get_drvdata(pdev);
-
-	for (i = 0; i < 3; i++)
-		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
-}
-
-static struct platform_driver tc_driver = {
-	.driver = {
-		.name	= "atmel_tcb",
-		.of_match_table	= of_match_ptr(atmel_tcb_dt_ids),
-	},
-	.shutdown = tc_shutdown,
-};
-
-static int __init tc_init(void)
-{
-	return platform_driver_probe(&tc_driver, tc_probe);
-}
-arch_initcall(tc_init);
diff --git a/drivers/misc/bcm-vk/Kconfig b/drivers/misc/bcm-vk/Kconfig
new file mode 100644
index 000000000000..68a972772b99
--- /dev/null
+++ b/drivers/misc/bcm-vk/Kconfig
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Broadcom VK device
+#
+config BCM_VK
+	tristate "Support for Broadcom VK Accelerators"
+	depends on PCI_MSI
+	help
+	  Select this option to enable support for Broadcom
+	  VK Accelerators.  VK is used for performing
+	  multiple specific offload processing tasks in parallel.
+	  Such offload tasks assist in such operations as video
+	  transcoding, compression, and crypto tasks.
+	  This driver enables userspace programs to access these
+	  accelerators via /dev/bcm-vk.N devices.
+
+	  If unsure, say N.
+
+config BCM_VK_TTY
+	bool "Enable tty ports on a Broadcom VK Accelerator device"
+	depends on TTY
+	depends on BCM_VK
+	help
+	  Select this option to enable tty support to allow console
+	  access to Broadcom VK Accelerator cards from host.
+
+	  Device node will in the form /dev/bcm-vk.x_ttyVKy where:
+	  x is the instance of the VK card
+	  y is the tty device number on the VK card.
diff --git a/drivers/misc/bcm-vk/Makefile b/drivers/misc/bcm-vk/Makefile
new file mode 100644
index 000000000000..1df2ebe851ca
--- /dev/null
+++ b/drivers/misc/bcm-vk/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Broadcom VK driver
+#
+
+obj-$(CONFIG_BCM_VK) += bcm_vk.o
+bcm_vk-objs := \
+	bcm_vk_dev.o \
+	bcm_vk_msg.o \
+	bcm_vk_sg.o
+
+bcm_vk-$(CONFIG_BCM_VK_TTY) += bcm_vk_tty.o
diff --git a/drivers/misc/bcm-vk/bcm_vk.h b/drivers/misc/bcm-vk/bcm_vk.h
new file mode 100644
index 000000000000..a1338f375589
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk.h
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_H
+#define BCM_VK_H
+
+#include <linux/atomic.h>
+#include <linux/firmware.h>
+#include <linux/irq.h>
+#include <linux/kref.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/tty.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk_msg.h"
+
+#define DRV_MODULE_NAME		"bcm-vk"
+
+/*
+ * Load Image is completed in two stages:
+ *
+ * 1) When the VK device boot-up, M7 CPU runs and executes the BootROM.
+ * The Secure Boot Loader (SBL) as part of the BootROM will run
+ * to open up ITCM for host to push BOOT1 image.
+ * SBL will authenticate the image before jumping to BOOT1 image.
+ *
+ * 2) Because BOOT1 image is a secured image, we also called it the
+ * Secure Boot Image (SBI). At second stage, SBI will initialize DDR
+ * and wait for host to push BOOT2 image to DDR.
+ * SBI will authenticate the image before jumping to BOOT2 image.
+ *
+ */
+/* Location of registers of interest in BAR0 */
+
+/* Request register for Secure Boot Loader (SBL) download */
+#define BAR_CODEPUSH_SBL		0x400
+/* Start of ITCM */
+#define CODEPUSH_BOOT1_ENTRY		0x00400000
+#define CODEPUSH_MASK		        0xfffff000
+#define CODEPUSH_BOOTSTART		BIT(0)
+
+/* Boot Status register */
+#define BAR_BOOT_STATUS			0x404
+
+#define SRAM_OPEN			BIT(16)
+#define DDR_OPEN			BIT(17)
+
+/* Firmware loader progress status definitions */
+#define FW_LOADER_ACK_SEND_MORE_DATA	BIT(18)
+#define FW_LOADER_ACK_IN_PROGRESS	BIT(19)
+#define FW_LOADER_ACK_RCVD_ALL_DATA	BIT(20)
+
+/* Boot1/2 is running in standalone mode */
+#define BOOT_STDALONE_RUNNING		BIT(21)
+
+/* definitions for boot status register */
+#define BOOT_STATE_MASK			(0xffffffff & \
+					 ~(FW_LOADER_ACK_SEND_MORE_DATA | \
+					   FW_LOADER_ACK_IN_PROGRESS | \
+					   BOOT_STDALONE_RUNNING))
+
+#define BOOT_ERR_SHIFT			4
+#define BOOT_ERR_MASK			(0xf << BOOT_ERR_SHIFT)
+#define BOOT_PROG_MASK			0xf
+
+#define BROM_STATUS_NOT_RUN		0x2
+#define BROM_NOT_RUN			(SRAM_OPEN | BROM_STATUS_NOT_RUN)
+#define BROM_STATUS_COMPLETE		0x6
+#define BROM_RUNNING			(SRAM_OPEN | BROM_STATUS_COMPLETE)
+#define BOOT1_STATUS_COMPLETE		0x6
+#define BOOT1_RUNNING			(DDR_OPEN | BOOT1_STATUS_COMPLETE)
+#define BOOT2_STATUS_COMPLETE		0x6
+#define BOOT2_RUNNING			(FW_LOADER_ACK_RCVD_ALL_DATA | \
+					 BOOT2_STATUS_COMPLETE)
+
+/* Boot request for Secure Boot Image (SBI) */
+#define BAR_CODEPUSH_SBI		0x408
+/* 64M mapped to BAR2 */
+#define CODEPUSH_BOOT2_ENTRY		0x60000000
+
+#define BAR_CARD_STATUS			0x410
+/* CARD_STATUS definitions */
+#define CARD_STATUS_TTYVK0_READY	BIT(0)
+#define CARD_STATUS_TTYVK1_READY	BIT(1)
+
+#define BAR_BOOT1_STDALONE_PROGRESS	0x420
+#define BOOT1_STDALONE_SUCCESS		(BIT(13) | BIT(14))
+#define BOOT1_STDALONE_PROGRESS_MASK	BOOT1_STDALONE_SUCCESS
+
+#define BAR_METADATA_VERSION		0x440
+#define BAR_OS_UPTIME			0x444
+#define BAR_CHIP_ID			0x448
+#define MAJOR_SOC_REV(_chip_id)		(((_chip_id) >> 20) & 0xf)
+
+#define BAR_CARD_TEMPERATURE		0x45c
+/* defines for all temperature sensor */
+#define BCM_VK_TEMP_FIELD_MASK		0xff
+#define BCM_VK_CPU_TEMP_SHIFT		0
+#define BCM_VK_DDR0_TEMP_SHIFT		8
+#define BCM_VK_DDR1_TEMP_SHIFT		16
+
+#define BAR_CARD_VOLTAGE		0x460
+/* defines for voltage rail conversion */
+#define BCM_VK_VOLT_RAIL_MASK		0xffff
+#define BCM_VK_3P3_VOLT_REG_SHIFT	16
+
+#define BAR_CARD_ERR_LOG		0x464
+/* Error log register bit definition - register for error alerts */
+#define ERR_LOG_UECC			BIT(0)
+#define ERR_LOG_SSIM_BUSY		BIT(1)
+#define ERR_LOG_AFBC_BUSY		BIT(2)
+#define ERR_LOG_HIGH_TEMP_ERR		BIT(3)
+#define ERR_LOG_WDOG_TIMEOUT		BIT(4)
+#define ERR_LOG_SYS_FAULT		BIT(5)
+#define ERR_LOG_RAMDUMP			BIT(6)
+#define ERR_LOG_COP_WDOG_TIMEOUT	BIT(7)
+/* warnings */
+#define ERR_LOG_MEM_ALLOC_FAIL		BIT(8)
+#define ERR_LOG_LOW_TEMP_WARN		BIT(9)
+#define ERR_LOG_ECC			BIT(10)
+#define ERR_LOG_IPC_DWN			BIT(11)
+
+/* Alert bit definitions detectd on host */
+#define ERR_LOG_HOST_INTF_V_FAIL	BIT(13)
+#define ERR_LOG_HOST_HB_FAIL		BIT(14)
+#define ERR_LOG_HOST_PCIE_DWN		BIT(15)
+
+#define BAR_CARD_ERR_MEM		0x468
+/* defines for mem err, all fields have same width */
+#define BCM_VK_MEM_ERR_FIELD_MASK	0xff
+#define BCM_VK_ECC_MEM_ERR_SHIFT	0
+#define BCM_VK_UECC_MEM_ERR_SHIFT	8
+/* threshold of event occurrence and logs start to come out */
+#define BCM_VK_ECC_THRESHOLD		10
+#define BCM_VK_UECC_THRESHOLD		1
+
+#define BAR_CARD_PWR_AND_THRE		0x46c
+/* defines for power and temp threshold, all fields have same width */
+#define BCM_VK_PWR_AND_THRE_FIELD_MASK	0xff
+#define BCM_VK_LOW_TEMP_THRE_SHIFT	0
+#define BCM_VK_HIGH_TEMP_THRE_SHIFT	8
+#define BCM_VK_PWR_STATE_SHIFT		16
+
+#define BAR_CARD_STATIC_INFO		0x470
+
+#define BAR_INTF_VER			0x47c
+#define BAR_INTF_VER_MAJOR_SHIFT	16
+#define BAR_INTF_VER_MASK		0xffff
+/*
+ * major and minor semantic version numbers supported
+ * Please update as required on interface changes
+ */
+#define SEMANTIC_MAJOR			1
+#define SEMANTIC_MINOR			0
+
+/*
+ * first door bell reg, ie for queue = 0.  Only need the first one, as
+ * we will use the queue number to derive the others
+ */
+#define VK_BAR0_REGSEG_DB_BASE		0x484
+#define VK_BAR0_REGSEG_DB_REG_GAP	8 /*
+					   * DB register gap,
+					   * DB1 at 0x48c and DB2 at 0x494
+					   */
+
+/* reset register and specific values */
+#define VK_BAR0_RESET_DB_NUM		3
+#define VK_BAR0_RESET_DB_SOFT		0xffffffff
+#define VK_BAR0_RESET_DB_HARD		0xfffffffd
+#define VK_BAR0_RESET_RAMPDUMP		0xa0000000
+
+#define VK_BAR0_Q_DB_BASE(q_num)	(VK_BAR0_REGSEG_DB_BASE + \
+					 ((q_num) * VK_BAR0_REGSEG_DB_REG_GAP))
+#define VK_BAR0_RESET_DB_BASE		(VK_BAR0_REGSEG_DB_BASE + \
+					 (VK_BAR0_RESET_DB_NUM * VK_BAR0_REGSEG_DB_REG_GAP))
+
+#define BAR_BOOTSRC_SELECT		0xc78
+/* BOOTSRC definitions */
+#define BOOTSRC_SOFT_ENABLE		BIT(14)
+
+/* Card OS Firmware version size */
+#define BAR_FIRMWARE_TAG_SIZE		50
+#define FIRMWARE_STATUS_PRE_INIT_DONE	0x1f
+
+/* VK MSG_ID defines */
+#define VK_MSG_ID_BITMAP_SIZE		4096
+#define VK_MSG_ID_BITMAP_MASK		(VK_MSG_ID_BITMAP_SIZE - 1)
+#define VK_MSG_ID_OVERFLOW		0xffff
+
+/*
+ * BAR1
+ */
+
+/* BAR1 message q definition */
+
+/* indicate if msgq ctrl in BAR1 is populated */
+#define VK_BAR1_MSGQ_DEF_RDY		0x60c0
+/* ready marker value for the above location, normal boot2 */
+#define VK_BAR1_MSGQ_RDY_MARKER		0xbeefcafe
+/* ready marker value for the above location, normal boot2 */
+#define VK_BAR1_DIAG_RDY_MARKER		0xdeadcafe
+/* number of msgqs in BAR1 */
+#define VK_BAR1_MSGQ_NR			0x60c4
+/* BAR1 queue control structure offset */
+#define VK_BAR1_MSGQ_CTRL_OFF		0x60c8
+
+/* BAR1 ucode and boot1 version tag */
+#define VK_BAR1_UCODE_VER_TAG		0x6170
+#define VK_BAR1_BOOT1_VER_TAG		0x61b0
+#define VK_BAR1_VER_TAG_SIZE		64
+
+/* Memory to hold the DMA buffer memory address allocated for boot2 download */
+#define VK_BAR1_DMA_BUF_OFF_HI		0x61e0
+#define VK_BAR1_DMA_BUF_OFF_LO		(VK_BAR1_DMA_BUF_OFF_HI + 4)
+#define VK_BAR1_DMA_BUF_SZ		(VK_BAR1_DMA_BUF_OFF_HI + 8)
+
+/* Scratch memory allocated on host for VK */
+#define VK_BAR1_SCRATCH_OFF_HI		0x61f0
+#define VK_BAR1_SCRATCH_OFF_LO		(VK_BAR1_SCRATCH_OFF_HI + 4)
+#define VK_BAR1_SCRATCH_SZ_ADDR		(VK_BAR1_SCRATCH_OFF_HI + 8)
+#define VK_BAR1_SCRATCH_DEF_NR_PAGES	32
+
+/* BAR1 DAUTH info */
+#define VK_BAR1_DAUTH_BASE_ADDR		0x6200
+#define VK_BAR1_DAUTH_STORE_SIZE	0x48
+#define VK_BAR1_DAUTH_VALID_SIZE	0x8
+#define VK_BAR1_DAUTH_MAX		4
+#define VK_BAR1_DAUTH_STORE_ADDR(x) \
+		(VK_BAR1_DAUTH_BASE_ADDR + \
+		 (x) * (VK_BAR1_DAUTH_STORE_SIZE + VK_BAR1_DAUTH_VALID_SIZE))
+#define VK_BAR1_DAUTH_VALID_ADDR(x) \
+		(VK_BAR1_DAUTH_STORE_ADDR(x) + VK_BAR1_DAUTH_STORE_SIZE)
+
+/* BAR1 SOTP AUTH and REVID info */
+#define VK_BAR1_SOTP_REVID_BASE_ADDR	0x6340
+#define VK_BAR1_SOTP_REVID_SIZE		0x10
+#define VK_BAR1_SOTP_REVID_MAX		2
+#define VK_BAR1_SOTP_REVID_ADDR(x) \
+		(VK_BAR1_SOTP_REVID_BASE_ADDR + (x) * VK_BAR1_SOTP_REVID_SIZE)
+
+/* VK device supports a maximum of 3 bars */
+#define MAX_BAR	3
+
+/* default number of msg blk for inband SGL */
+#define BCM_VK_DEF_IB_SGL_BLK_LEN	 16
+#define BCM_VK_IB_SGL_BLK_MAX		 24
+
+enum pci_barno {
+	BAR_0 = 0,
+	BAR_1,
+	BAR_2
+};
+
+#ifdef CONFIG_BCM_VK_TTY
+#define BCM_VK_NUM_TTY 2
+#else
+#define BCM_VK_NUM_TTY 0
+#endif
+
+struct bcm_vk_tty {
+	struct tty_port port;
+	u32 to_offset;	/* bar offset to use */
+	u32 to_size;	/* to VK buffer size */
+	u32 wr;		/* write offset shadow */
+	u32 from_offset;	/* bar offset to use */
+	u32 from_size;	/* from VK buffer size */
+	u32 rd;		/* read offset shadow */
+	pid_t pid;
+	bool irq_enabled;
+	bool is_opened;		/* tracks tty open/close */
+};
+
+/* VK device max power state, supports 3, full, reduced and low */
+#define MAX_OPP 3
+#define MAX_CARD_INFO_TAG_SIZE 64
+
+struct bcm_vk_card_info {
+	u32 version;
+	char os_tag[MAX_CARD_INFO_TAG_SIZE];
+	char cmpt_tag[MAX_CARD_INFO_TAG_SIZE];
+	u32 cpu_freq_mhz;
+	u32 cpu_scale[MAX_OPP];
+	u32 ddr_freq_mhz;
+	u32 ddr_size_MB;
+	u32 video_core_freq_mhz;
+};
+
+/* DAUTH related info */
+struct bcm_vk_dauth_key {
+	char store[VK_BAR1_DAUTH_STORE_SIZE];
+	char valid[VK_BAR1_DAUTH_VALID_SIZE];
+};
+
+struct bcm_vk_dauth_info {
+	struct bcm_vk_dauth_key keys[VK_BAR1_DAUTH_MAX];
+};
+
+/*
+ * Control structure of logging messages from the card.  This
+ * buffer is for logmsg that comes from vk
+ */
+struct bcm_vk_peer_log {
+	u32 rd_idx;
+	u32 wr_idx;
+	u32 buf_size;
+	u32 mask;
+	char data[0];
+};
+
+/* max buf size allowed */
+#define BCM_VK_PEER_LOG_BUF_MAX SZ_16K
+/* max size per line of peer log */
+#define BCM_VK_PEER_LOG_LINE_MAX  256
+
+/*
+ * single entry for processing type + utilization
+ */
+#define BCM_VK_PROC_TYPE_TAG_LEN 8
+struct bcm_vk_proc_mon_entry_t {
+	char tag[BCM_VK_PROC_TYPE_TAG_LEN];
+	u32 used;
+	u32 max; /**< max capacity */
+};
+
+/**
+ * Structure for run time utilization
+ */
+#define BCM_VK_PROC_MON_MAX 8 /* max entries supported */
+struct bcm_vk_proc_mon_info {
+	u32 num; /**< no of entries */
+	u32 entry_size; /**< per entry size */
+	struct bcm_vk_proc_mon_entry_t entries[BCM_VK_PROC_MON_MAX];
+};
+
+struct bcm_vk_hb_ctrl {
+	struct timer_list timer;
+	u32 last_uptime;
+	u32 lost_cnt;
+};
+
+struct bcm_vk_alert {
+	u16 flags;
+	u16 notfs;
+};
+
+/* some alert counters that the driver will keep track */
+struct bcm_vk_alert_cnts {
+	u16 ecc;
+	u16 uecc;
+};
+
+struct bcm_vk {
+	struct pci_dev *pdev;
+	void __iomem *bar[MAX_BAR];
+	int num_irqs;
+
+	struct bcm_vk_card_info card_info;
+	struct bcm_vk_proc_mon_info proc_mon_info;
+	struct bcm_vk_dauth_info dauth_info;
+
+	/* mutex to protect the ioctls */
+	struct mutex mutex;
+	struct miscdevice miscdev;
+	int devid; /* dev id allocated */
+
+#ifdef CONFIG_BCM_VK_TTY
+	struct tty_driver *tty_drv;
+	struct timer_list serial_timer;
+	struct bcm_vk_tty tty[BCM_VK_NUM_TTY];
+	struct workqueue_struct *tty_wq_thread;
+	struct work_struct tty_wq_work;
+#endif
+
+	/* Reference-counting to handle file operations */
+	struct kref kref;
+
+	spinlock_t msg_id_lock; /* Spinlock for msg_id */
+	u16 msg_id;
+	DECLARE_BITMAP(bmap, VK_MSG_ID_BITMAP_SIZE);
+	spinlock_t ctx_lock; /* Spinlock for component context */
+	struct bcm_vk_ctx ctx[VK_CMPT_CTX_MAX];
+	struct bcm_vk_ht_entry pid_ht[VK_PID_HT_SZ];
+	pid_t reset_pid; /* process that issue reset */
+
+	atomic_t msgq_inited; /* indicate if info has been synced with vk */
+	struct bcm_vk_msg_chan to_v_msg_chan;
+	struct bcm_vk_msg_chan to_h_msg_chan;
+
+	struct workqueue_struct *wq_thread;
+	struct work_struct wq_work; /* work queue for deferred job */
+	unsigned long wq_offload[1]; /* various flags on wq requested */
+	void *tdma_vaddr; /* test dma segment virtual addr */
+	dma_addr_t tdma_addr; /* test dma segment bus addr */
+
+	struct notifier_block panic_nb;
+	u32 ib_sgl_size; /* size allocated for inband sgl insertion */
+
+	/* heart beat mechanism control structure */
+	struct bcm_vk_hb_ctrl hb_ctrl;
+	/* house-keeping variable of error logs */
+	spinlock_t host_alert_lock; /* protection to access host_alert struct */
+	struct bcm_vk_alert host_alert;
+	struct bcm_vk_alert peer_alert; /* bits set by the card */
+	struct bcm_vk_alert_cnts alert_cnts;
+
+	/* offset of the peer log control in BAR2 */
+	u32 peerlog_off;
+	struct bcm_vk_peer_log peerlog_info; /* record of peer log info */
+	/* offset of processing monitoring info in BAR2 */
+	u32 proc_mon_off;
+};
+
+/* wq offload work items bits definitions */
+enum bcm_vk_wq_offload_flags {
+	BCM_VK_WQ_DWNLD_PEND = 0,
+	BCM_VK_WQ_DWNLD_AUTO = 1,
+	BCM_VK_WQ_NOTF_PEND  = 2,
+};
+
+/* a macro to get an individual field with mask and shift */
+#define BCM_VK_EXTRACT_FIELD(_field, _reg, _mask, _shift) \
+		(_field = (((_reg) >> (_shift)) & (_mask)))
+
+struct bcm_vk_entry {
+	const u32 mask;
+	const u32 exp_val;
+	const char *str;
+};
+
+/* alerts that could be generated from peer */
+#define BCM_VK_PEER_ERR_NUM 12
+extern struct bcm_vk_entry const bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM];
+/* alerts detected by the host */
+#define BCM_VK_HOST_ERR_NUM 3
+extern struct bcm_vk_entry const bcm_vk_host_err[BCM_VK_HOST_ERR_NUM];
+
+/*
+ * check if PCIe interface is down on read.  Use it when it is
+ * certain that _val should never be all ones.
+ */
+#define BCM_VK_INTF_IS_DOWN(val) ((val) == 0xffffffff)
+
+static inline u32 vkread32(struct bcm_vk *vk, enum pci_barno bar, u64 offset)
+{
+	return readl(vk->bar[bar] + offset);
+}
+
+static inline void vkwrite32(struct bcm_vk *vk,
+			     u32 value,
+			     enum pci_barno bar,
+			     u64 offset)
+{
+	writel(value, vk->bar[bar] + offset);
+}
+
+static inline u8 vkread8(struct bcm_vk *vk, enum pci_barno bar, u64 offset)
+{
+	return readb(vk->bar[bar] + offset);
+}
+
+static inline void vkwrite8(struct bcm_vk *vk,
+			    u8 value,
+			    enum pci_barno bar,
+			    u64 offset)
+{
+	writeb(value, vk->bar[bar] + offset);
+}
+
+static inline bool bcm_vk_msgq_marker_valid(struct bcm_vk *vk)
+{
+	u32 rdy_marker = 0;
+	u32 fw_status;
+
+	fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+
+	if ((fw_status & VK_FWSTS_READY) == VK_FWSTS_READY)
+		rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+
+	return (rdy_marker == VK_BAR1_MSGQ_RDY_MARKER);
+}
+
+int bcm_vk_open(struct inode *inode, struct file *p_file);
+ssize_t bcm_vk_read(struct file *p_file, char __user *buf, size_t count,
+		    loff_t *f_pos);
+ssize_t bcm_vk_write(struct file *p_file, const char __user *buf,
+		     size_t count, loff_t *f_pos);
+__poll_t bcm_vk_poll(struct file *p_file, struct poll_table_struct *wait);
+int bcm_vk_release(struct inode *inode, struct file *p_file);
+void bcm_vk_release_data(struct kref *kref);
+irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id);
+irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id);
+irqreturn_t bcm_vk_tty_irqhandler(int irq, void *dev_id);
+int bcm_vk_msg_init(struct bcm_vk *vk);
+void bcm_vk_msg_remove(struct bcm_vk *vk);
+void bcm_vk_drain_msg_on_reset(struct bcm_vk *vk);
+int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync);
+void bcm_vk_blk_drv_access(struct bcm_vk *vk);
+s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk);
+int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, u32 shut_type,
+			     const pid_t pid, const u32 q_num);
+void bcm_to_v_q_doorbell(struct bcm_vk *vk, u32 q_num, u32 db_val);
+int bcm_vk_auto_load_all_images(struct bcm_vk *vk);
+void bcm_vk_hb_init(struct bcm_vk *vk);
+void bcm_vk_hb_deinit(struct bcm_vk *vk);
+void bcm_vk_handle_notf(struct bcm_vk *vk);
+bool bcm_vk_drv_access_ok(struct bcm_vk *vk);
+void bcm_vk_set_host_alert(struct bcm_vk *vk, u32 bit_mask);
+
+#ifdef CONFIG_BCM_VK_TTY
+int bcm_vk_tty_init(struct bcm_vk *vk, char *name);
+void bcm_vk_tty_exit(struct bcm_vk *vk);
+void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk);
+void bcm_vk_tty_wq_exit(struct bcm_vk *vk);
+
+static inline void bcm_vk_tty_set_irq_enabled(struct bcm_vk *vk, int index)
+{
+	vk->tty[index].irq_enabled = true;
+}
+#else
+static inline int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
+{
+	return 0;
+}
+
+static inline void bcm_vk_tty_exit(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_wq_exit(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_set_irq_enabled(struct bcm_vk *vk, int index)
+{
+}
+#endif /* CONFIG_BCM_VK_TTY */
+
+#endif
diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c
new file mode 100644
index 000000000000..6bfea3210389
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_dev.c
@@ -0,0 +1,1652 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk.h"
+
+#define PCI_DEVICE_ID_VALKYRIE	0x5e87
+#define PCI_DEVICE_ID_VIPER	0x5e88
+
+static DEFINE_IDA(bcm_vk_ida);
+
+enum soc_idx {
+	VALKYRIE_A0 = 0,
+	VALKYRIE_B0,
+	VIPER,
+	VK_IDX_INVALID
+};
+
+enum img_idx {
+	IMG_PRI = 0,
+	IMG_SEC,
+	IMG_PER_TYPE_MAX
+};
+
+struct load_image_entry {
+	const u32 image_type;
+	const char *image_name[IMG_PER_TYPE_MAX];
+};
+
+#define NUM_BOOT_STAGES 2
+/* default firmware images names */
+static const struct load_image_entry image_tab[][NUM_BOOT_STAGES] = {
+	[VALKYRIE_A0] = {
+		{VK_IMAGE_TYPE_BOOT1, {"vk_a0-boot1.bin", "vk-boot1.bin"}},
+		{VK_IMAGE_TYPE_BOOT2, {"vk_a0-boot2.bin", "vk-boot2.bin"}}
+	},
+	[VALKYRIE_B0] = {
+		{VK_IMAGE_TYPE_BOOT1, {"vk_b0-boot1.bin", "vk-boot1.bin"}},
+		{VK_IMAGE_TYPE_BOOT2, {"vk_b0-boot2.bin", "vk-boot2.bin"}}
+	},
+
+	[VIPER] = {
+		{VK_IMAGE_TYPE_BOOT1, {"vp-boot1.bin", ""}},
+		{VK_IMAGE_TYPE_BOOT2, {"vp-boot2.bin", ""}}
+	},
+};
+
+/* Location of memory base addresses of interest in BAR1 */
+/* Load Boot1 to start of ITCM */
+#define BAR1_CODEPUSH_BASE_BOOT1	0x100000
+
+/* Allow minimum 1s for Load Image timeout responses */
+#define LOAD_IMAGE_TIMEOUT_MS		(1 * MSEC_PER_SEC)
+
+/* Image startup timeouts */
+#define BOOT1_STARTUP_TIMEOUT_MS	(5 * MSEC_PER_SEC)
+#define BOOT2_STARTUP_TIMEOUT_MS	(10 * MSEC_PER_SEC)
+
+/* 1ms wait for checking the transfer complete status */
+#define TXFR_COMPLETE_TIMEOUT_MS	1
+
+/* MSIX usages */
+#define VK_MSIX_MSGQ_MAX		3
+#define VK_MSIX_NOTF_MAX		1
+#define VK_MSIX_TTY_MAX			BCM_VK_NUM_TTY
+#define VK_MSIX_IRQ_MAX			(VK_MSIX_MSGQ_MAX + VK_MSIX_NOTF_MAX + \
+					 VK_MSIX_TTY_MAX)
+#define VK_MSIX_IRQ_MIN_REQ             (VK_MSIX_MSGQ_MAX + VK_MSIX_NOTF_MAX)
+
+/* Number of bits set in DMA mask*/
+#define BCM_VK_DMA_BITS			64
+
+/* Ucode boot wait time */
+#define BCM_VK_UCODE_BOOT_US            (100 * USEC_PER_MSEC)
+/* 50% margin */
+#define BCM_VK_UCODE_BOOT_MAX_US        ((BCM_VK_UCODE_BOOT_US * 3) >> 1)
+
+/* deinit time for the card os after receiving doorbell */
+#define BCM_VK_DEINIT_TIME_MS		(2 * MSEC_PER_SEC)
+
+/*
+ * module parameters
+ */
+static bool auto_load = true;
+module_param(auto_load, bool, 0444);
+MODULE_PARM_DESC(auto_load,
+		 "Load images automatically at PCIe probe time.\n");
+static uint nr_scratch_pages = VK_BAR1_SCRATCH_DEF_NR_PAGES;
+module_param(nr_scratch_pages, uint, 0444);
+MODULE_PARM_DESC(nr_scratch_pages,
+		 "Number of pre allocated DMAable coherent pages.\n");
+static uint nr_ib_sgl_blk = BCM_VK_DEF_IB_SGL_BLK_LEN;
+module_param(nr_ib_sgl_blk, uint, 0444);
+MODULE_PARM_DESC(nr_ib_sgl_blk,
+		 "Number of in-band msg blks for short SGL.\n");
+
+/*
+ * alerts that could be generated from peer
+ */
+const struct bcm_vk_entry bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM] = {
+	{ERR_LOG_UECC, ERR_LOG_UECC, "uecc"},
+	{ERR_LOG_SSIM_BUSY, ERR_LOG_SSIM_BUSY, "ssim_busy"},
+	{ERR_LOG_AFBC_BUSY, ERR_LOG_AFBC_BUSY, "afbc_busy"},
+	{ERR_LOG_HIGH_TEMP_ERR, ERR_LOG_HIGH_TEMP_ERR, "high_temp"},
+	{ERR_LOG_WDOG_TIMEOUT, ERR_LOG_WDOG_TIMEOUT, "wdog_timeout"},
+	{ERR_LOG_SYS_FAULT, ERR_LOG_SYS_FAULT, "sys_fault"},
+	{ERR_LOG_RAMDUMP, ERR_LOG_RAMDUMP, "ramdump"},
+	{ERR_LOG_COP_WDOG_TIMEOUT, ERR_LOG_COP_WDOG_TIMEOUT,
+	 "cop_wdog_timeout"},
+	{ERR_LOG_MEM_ALLOC_FAIL, ERR_LOG_MEM_ALLOC_FAIL, "malloc_fail warn"},
+	{ERR_LOG_LOW_TEMP_WARN, ERR_LOG_LOW_TEMP_WARN, "low_temp warn"},
+	{ERR_LOG_ECC, ERR_LOG_ECC, "ecc"},
+	{ERR_LOG_IPC_DWN, ERR_LOG_IPC_DWN, "ipc_down"},
+};
+
+/* alerts detected by the host */
+const struct bcm_vk_entry bcm_vk_host_err[BCM_VK_HOST_ERR_NUM] = {
+	{ERR_LOG_HOST_PCIE_DWN, ERR_LOG_HOST_PCIE_DWN, "PCIe_down"},
+	{ERR_LOG_HOST_HB_FAIL, ERR_LOG_HOST_HB_FAIL, "hb_fail"},
+	{ERR_LOG_HOST_INTF_V_FAIL, ERR_LOG_HOST_INTF_V_FAIL, "intf_ver_fail"},
+};
+
+irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id)
+{
+	struct bcm_vk *vk = dev_id;
+
+	if (!bcm_vk_drv_access_ok(vk)) {
+		dev_err(&vk->pdev->dev,
+			"Interrupt %d received when msgq not inited\n", irq);
+		goto skip_schedule_work;
+	}
+
+	/* if notification is not pending, set bit and schedule work */
+	if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
+		queue_work(vk->wq_thread, &vk->wq_work);
+
+skip_schedule_work:
+	return IRQ_HANDLED;
+}
+
+static int bcm_vk_intf_ver_chk(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	u32 reg;
+	u16 major, minor;
+	int ret = 0;
+
+	/* read interface register */
+	reg = vkread32(vk, BAR_0, BAR_INTF_VER);
+	major = (reg >> BAR_INTF_VER_MAJOR_SHIFT) & BAR_INTF_VER_MASK;
+	minor = reg & BAR_INTF_VER_MASK;
+
+	/*
+	 * if major number is 0, it is pre-release and it would be allowed
+	 * to continue, else, check versions accordingly
+	 */
+	if (!major) {
+		dev_warn(dev, "Pre-release major.minor=%d.%d - drv %d.%d\n",
+			 major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+	} else if (major != SEMANTIC_MAJOR) {
+		dev_err(dev,
+			"Intf major.minor=%d.%d rejected - drv %d.%d\n",
+			major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+		bcm_vk_set_host_alert(vk, ERR_LOG_HOST_INTF_V_FAIL);
+		ret = -EPFNOSUPPORT;
+	} else {
+		dev_dbg(dev,
+			"Intf major.minor=%d.%d passed - drv %d.%d\n",
+			major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+	}
+	return ret;
+}
+
+static void bcm_vk_log_notf(struct bcm_vk *vk,
+			    struct bcm_vk_alert *alert,
+			    struct bcm_vk_entry const *entry_tab,
+			    const u32 table_size)
+{
+	u32 i;
+	u32 masked_val, latched_val;
+	struct bcm_vk_entry const *entry;
+	u32 reg;
+	u16 ecc_mem_err, uecc_mem_err;
+	struct device *dev = &vk->pdev->dev;
+
+	for (i = 0; i < table_size; i++) {
+		entry = &entry_tab[i];
+		masked_val = entry->mask & alert->notfs;
+		latched_val = entry->mask & alert->flags;
+
+		if (masked_val == ERR_LOG_UECC) {
+			/*
+			 * if there is difference between stored cnt and it
+			 * is greater than threshold, log it.
+			 */
+			reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
+			BCM_VK_EXTRACT_FIELD(uecc_mem_err, reg,
+					     BCM_VK_MEM_ERR_FIELD_MASK,
+					     BCM_VK_UECC_MEM_ERR_SHIFT);
+			if ((uecc_mem_err != vk->alert_cnts.uecc) &&
+			    (uecc_mem_err >= BCM_VK_UECC_THRESHOLD))
+				dev_info(dev,
+					 "ALERT! %s.%d uecc RAISED - ErrCnt %d\n",
+					 DRV_MODULE_NAME, vk->devid,
+					 uecc_mem_err);
+			vk->alert_cnts.uecc = uecc_mem_err;
+		} else if (masked_val == ERR_LOG_ECC) {
+			reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
+			BCM_VK_EXTRACT_FIELD(ecc_mem_err, reg,
+					     BCM_VK_MEM_ERR_FIELD_MASK,
+					     BCM_VK_ECC_MEM_ERR_SHIFT);
+			if ((ecc_mem_err != vk->alert_cnts.ecc) &&
+			    (ecc_mem_err >= BCM_VK_ECC_THRESHOLD))
+				dev_info(dev, "ALERT! %s.%d ecc RAISED - ErrCnt %d\n",
+					 DRV_MODULE_NAME, vk->devid,
+					 ecc_mem_err);
+			vk->alert_cnts.ecc = ecc_mem_err;
+		} else if (masked_val != latched_val) {
+			/* print a log as info */
+			dev_info(dev, "ALERT! %s.%d %s %s\n",
+				 DRV_MODULE_NAME, vk->devid, entry->str,
+				 masked_val ? "RAISED" : "CLEARED");
+		}
+	}
+}
+
+static void bcm_vk_dump_peer_log(struct bcm_vk *vk)
+{
+	struct bcm_vk_peer_log log;
+	struct bcm_vk_peer_log *log_info = &vk->peerlog_info;
+	char loc_buf[BCM_VK_PEER_LOG_LINE_MAX];
+	int cnt;
+	struct device *dev = &vk->pdev->dev;
+	unsigned int data_offset;
+
+	memcpy_fromio(&log, vk->bar[BAR_2] + vk->peerlog_off, sizeof(log));
+
+	dev_dbg(dev, "Peer PANIC: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+		log.buf_size, log.mask, log.rd_idx, log.wr_idx);
+
+	if (!log_info->buf_size) {
+		dev_err(dev, "Peer log dump disabled - skipped!\n");
+		return;
+	}
+
+	/* perform range checking for rd/wr idx */
+	if ((log.rd_idx > log_info->mask) ||
+	    (log.wr_idx > log_info->mask) ||
+	    (log.buf_size != log_info->buf_size) ||
+	    (log.mask != log_info->mask)) {
+		dev_err(dev,
+			"Corrupted Ptrs: Size 0x%x(0x%x) Mask 0x%x(0x%x) [Rd Wr] = [%d %d], skip log dump.\n",
+			log_info->buf_size, log.buf_size,
+			log_info->mask, log.mask,
+			log.rd_idx, log.wr_idx);
+		return;
+	}
+
+	cnt = 0;
+	data_offset = vk->peerlog_off + sizeof(struct bcm_vk_peer_log);
+	loc_buf[BCM_VK_PEER_LOG_LINE_MAX - 1] = '\0';
+	while (log.rd_idx != log.wr_idx) {
+		loc_buf[cnt] = vkread8(vk, BAR_2, data_offset + log.rd_idx);
+
+		if ((loc_buf[cnt] == '\0') ||
+		    (cnt == (BCM_VK_PEER_LOG_LINE_MAX - 1))) {
+			dev_err(dev, "%s", loc_buf);
+			cnt = 0;
+		} else {
+			cnt++;
+		}
+		log.rd_idx = (log.rd_idx + 1) & log.mask;
+	}
+	/* update rd idx at the end */
+	vkwrite32(vk, log.rd_idx, BAR_2,
+		  vk->peerlog_off + offsetof(struct bcm_vk_peer_log, rd_idx));
+}
+
+void bcm_vk_handle_notf(struct bcm_vk *vk)
+{
+	u32 reg;
+	struct bcm_vk_alert alert;
+	bool intf_down;
+	unsigned long flags;
+
+	/* handle peer alerts and then locally detected ones */
+	reg = vkread32(vk, BAR_0, BAR_CARD_ERR_LOG);
+	intf_down = BCM_VK_INTF_IS_DOWN(reg);
+	if (!intf_down) {
+		vk->peer_alert.notfs = reg;
+		bcm_vk_log_notf(vk, &vk->peer_alert, bcm_vk_peer_err,
+				ARRAY_SIZE(bcm_vk_peer_err));
+		vk->peer_alert.flags = vk->peer_alert.notfs;
+	} else {
+		/* turn off access */
+		bcm_vk_blk_drv_access(vk);
+	}
+
+	/* check and make copy of alert with lock and then free lock */
+	spin_lock_irqsave(&vk->host_alert_lock, flags);
+	if (intf_down)
+		vk->host_alert.notfs |= ERR_LOG_HOST_PCIE_DWN;
+
+	alert = vk->host_alert;
+	vk->host_alert.flags = vk->host_alert.notfs;
+	spin_unlock_irqrestore(&vk->host_alert_lock, flags);
+
+	/* call display with copy */
+	bcm_vk_log_notf(vk, &alert, bcm_vk_host_err,
+			ARRAY_SIZE(bcm_vk_host_err));
+
+	/*
+	 * If it is a sys fault or heartbeat timeout, we would like extract
+	 * log msg from the card so that we would know what is the last fault
+	 */
+	if (!intf_down &&
+	    ((vk->host_alert.flags & ERR_LOG_HOST_HB_FAIL) ||
+	     (vk->peer_alert.flags & ERR_LOG_SYS_FAULT)))
+		bcm_vk_dump_peer_log(vk);
+}
+
+static inline int bcm_vk_wait(struct bcm_vk *vk, enum pci_barno bar,
+			      u64 offset, u32 mask, u32 value,
+			      unsigned long timeout_ms)
+{
+	struct device *dev = &vk->pdev->dev;
+	unsigned long start_time;
+	unsigned long timeout;
+	u32 rd_val, boot_status;
+
+	start_time = jiffies;
+	timeout = start_time + msecs_to_jiffies(timeout_ms);
+
+	do {
+		rd_val = vkread32(vk, bar, offset);
+		dev_dbg(dev, "BAR%d Offset=0x%llx: 0x%x\n",
+			bar, offset, rd_val);
+
+		/* check for any boot err condition */
+		boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+		if (boot_status & BOOT_ERR_MASK) {
+			dev_err(dev, "Boot Err 0x%x, progress 0x%x after %d ms\n",
+				(boot_status & BOOT_ERR_MASK) >> BOOT_ERR_SHIFT,
+				boot_status & BOOT_PROG_MASK,
+				jiffies_to_msecs(jiffies - start_time));
+			return -EFAULT;
+		}
+
+		if (time_after(jiffies, timeout))
+			return -ETIMEDOUT;
+
+		cpu_relax();
+		cond_resched();
+	} while ((rd_val & mask) != value);
+
+	return 0;
+}
+
+static void bcm_vk_get_card_info(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	u32 offset;
+	int i;
+	u8 *dst;
+	struct bcm_vk_card_info *info = &vk->card_info;
+
+	/* first read the offset from spare register */
+	offset = vkread32(vk, BAR_0, BAR_CARD_STATIC_INFO);
+	offset &= (pci_resource_len(vk->pdev, BAR_2 * 2) - 1);
+
+	/* based on the offset, read info to internal card info structure */
+	dst = (u8 *)info;
+	for (i = 0; i < sizeof(*info); i++)
+		*dst++ = vkread8(vk, BAR_2, offset++);
+
+#define CARD_INFO_LOG_FMT "version   : %x\n" \
+			  "os_tag    : %s\n" \
+			  "cmpt_tag  : %s\n" \
+			  "cpu_freq  : %d MHz\n" \
+			  "cpu_scale : %d full, %d lowest\n" \
+			  "ddr_freq  : %d MHz\n" \
+			  "ddr_size  : %d MB\n" \
+			  "video_freq: %d MHz\n"
+	dev_dbg(dev, CARD_INFO_LOG_FMT, info->version, info->os_tag,
+		info->cmpt_tag, info->cpu_freq_mhz, info->cpu_scale[0],
+		info->cpu_scale[MAX_OPP - 1], info->ddr_freq_mhz,
+		info->ddr_size_MB, info->video_core_freq_mhz);
+
+	/*
+	 * get the peer log pointer, only need the offset, and get record
+	 * of the log buffer information which would be used for checking
+	 * before dump, in case the BAR2 memory has been corrupted.
+	 */
+	vk->peerlog_off = offset;
+	memcpy_fromio(&vk->peerlog_info, vk->bar[BAR_2] + vk->peerlog_off,
+		      sizeof(vk->peerlog_info));
+
+	/*
+	 * Do a range checking and if out of bound, the record will be zeroed
+	 * which guarantees that nothing would be dumped.  In other words,
+	 * peer dump is disabled.
+	 */
+	if ((vk->peerlog_info.buf_size > BCM_VK_PEER_LOG_BUF_MAX) ||
+	    (vk->peerlog_info.mask != (vk->peerlog_info.buf_size - 1)) ||
+	    (vk->peerlog_info.rd_idx > vk->peerlog_info.mask) ||
+	    (vk->peerlog_info.wr_idx > vk->peerlog_info.mask)) {
+		dev_err(dev, "Peer log disabled - range error: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+			vk->peerlog_info.buf_size,
+			vk->peerlog_info.mask,
+			vk->peerlog_info.rd_idx,
+			vk->peerlog_info.wr_idx);
+		memset(&vk->peerlog_info, 0, sizeof(vk->peerlog_info));
+	} else {
+		dev_dbg(dev, "Peer log: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+			vk->peerlog_info.buf_size,
+			vk->peerlog_info.mask,
+			vk->peerlog_info.rd_idx,
+			vk->peerlog_info.wr_idx);
+	}
+}
+
+static void bcm_vk_get_proc_mon_info(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_proc_mon_info *mon = &vk->proc_mon_info;
+	u32 num, entry_size, offset, buf_size;
+	u8 *dst;
+
+	/* calculate offset which is based on peerlog offset */
+	buf_size = vkread32(vk, BAR_2,
+			    vk->peerlog_off
+			    + offsetof(struct bcm_vk_peer_log, buf_size));
+	offset = vk->peerlog_off + sizeof(struct bcm_vk_peer_log)
+		 + buf_size;
+
+	/* first read the num and entry size */
+	num = vkread32(vk, BAR_2, offset);
+	entry_size = vkread32(vk, BAR_2, offset + sizeof(num));
+
+	/* check for max allowed */
+	if (num > BCM_VK_PROC_MON_MAX) {
+		dev_err(dev, "Processing monitoring entry %d exceeds max %d\n",
+			num, BCM_VK_PROC_MON_MAX);
+		return;
+	}
+	mon->num = num;
+	mon->entry_size = entry_size;
+
+	vk->proc_mon_off = offset;
+
+	/* read it once that will capture those static info */
+	dst = (u8 *)&mon->entries[0];
+	offset += sizeof(num) + sizeof(entry_size);
+	memcpy_fromio(dst, vk->bar[BAR_2] + offset, num * entry_size);
+}
+
+static int bcm_vk_sync_card_info(struct bcm_vk *vk)
+{
+	u32 rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+
+	/* check for marker, but allow diags mode to skip sync */
+	if (!bcm_vk_msgq_marker_valid(vk))
+		return (rdy_marker == VK_BAR1_DIAG_RDY_MARKER ? 0 : -EINVAL);
+
+	/*
+	 * Write down scratch addr which is used for DMA. For
+	 * signed part, BAR1 is accessible only after boot2 has come
+	 * up
+	 */
+	if (vk->tdma_addr) {
+		vkwrite32(vk, (u64)vk->tdma_addr >> 32, BAR_1,
+			  VK_BAR1_SCRATCH_OFF_HI);
+		vkwrite32(vk, (u32)vk->tdma_addr, BAR_1,
+			  VK_BAR1_SCRATCH_OFF_LO);
+		vkwrite32(vk, nr_scratch_pages * PAGE_SIZE, BAR_1,
+			  VK_BAR1_SCRATCH_SZ_ADDR);
+	}
+
+	/* get static card info, only need to read once */
+	bcm_vk_get_card_info(vk);
+
+	/* get the proc mon info once */
+	bcm_vk_get_proc_mon_info(vk);
+
+	return 0;
+}
+
+void bcm_vk_blk_drv_access(struct bcm_vk *vk)
+{
+	int i;
+
+	/*
+	 * kill all the apps except for the process that is resetting.
+	 * If not called during reset, reset_pid will be 0, and all will be
+	 * killed.
+	 */
+	spin_lock(&vk->ctx_lock);
+
+	/* set msgq_inited to 0 so that all rd/wr will be blocked */
+	atomic_set(&vk->msgq_inited, 0);
+
+	for (i = 0; i < VK_PID_HT_SZ; i++) {
+		struct bcm_vk_ctx *ctx;
+
+		list_for_each_entry(ctx, &vk->pid_ht[i].head, node) {
+			if (ctx->pid != vk->reset_pid) {
+				dev_dbg(&vk->pdev->dev,
+					"Send kill signal to pid %d\n",
+					ctx->pid);
+				kill_pid(find_vpid(ctx->pid), SIGKILL, 1);
+			}
+		}
+	}
+	bcm_vk_tty_terminate_tty_user(vk);
+	spin_unlock(&vk->ctx_lock);
+}
+
+static void bcm_vk_buf_notify(struct bcm_vk *vk, void *bufp,
+			      dma_addr_t host_buf_addr, u32 buf_size)
+{
+	/* update the dma address to the card */
+	vkwrite32(vk, (u64)host_buf_addr >> 32, BAR_1,
+		  VK_BAR1_DMA_BUF_OFF_HI);
+	vkwrite32(vk, (u32)host_buf_addr, BAR_1,
+		  VK_BAR1_DMA_BUF_OFF_LO);
+	vkwrite32(vk, buf_size, BAR_1, VK_BAR1_DMA_BUF_SZ);
+}
+
+static int bcm_vk_load_image_by_type(struct bcm_vk *vk, u32 load_type,
+				     const char *filename)
+{
+	struct device *dev = &vk->pdev->dev;
+	const struct firmware *fw = NULL;
+	void *bufp = NULL;
+	size_t max_buf, offset;
+	int ret;
+	u64 offset_codepush;
+	u32 codepush;
+	u32 value;
+	dma_addr_t boot_dma_addr;
+	bool is_stdalone;
+
+	if (load_type == VK_IMAGE_TYPE_BOOT1) {
+		/*
+		 * After POR, enable VK soft BOOTSRC so bootrom do not clear
+		 * the pushed image (the TCM memories).
+		 */
+		value = vkread32(vk, BAR_0, BAR_BOOTSRC_SELECT);
+		value |= BOOTSRC_SOFT_ENABLE;
+		vkwrite32(vk, value, BAR_0, BAR_BOOTSRC_SELECT);
+
+		codepush = CODEPUSH_BOOTSTART + CODEPUSH_BOOT1_ENTRY;
+		offset_codepush = BAR_CODEPUSH_SBL;
+
+		/* Write a 1 to request SRAM open bit */
+		vkwrite32(vk, CODEPUSH_BOOTSTART, BAR_0, offset_codepush);
+
+		/* Wait for VK to respond */
+		ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, SRAM_OPEN,
+				  SRAM_OPEN, LOAD_IMAGE_TIMEOUT_MS);
+		if (ret < 0) {
+			dev_err(dev, "boot1 wait SRAM err - ret(%d)\n", ret);
+			goto err_buf_out;
+		}
+
+		max_buf = SZ_256K;
+		bufp = dma_alloc_coherent(dev,
+					  max_buf,
+					  &boot_dma_addr, GFP_KERNEL);
+		if (!bufp) {
+			dev_err(dev, "Error allocating 0x%zx\n", max_buf);
+			ret = -ENOMEM;
+			goto err_buf_out;
+		}
+	} else if (load_type == VK_IMAGE_TYPE_BOOT2) {
+		codepush = CODEPUSH_BOOT2_ENTRY;
+		offset_codepush = BAR_CODEPUSH_SBI;
+
+		/* Wait for VK to respond */
+		ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, DDR_OPEN,
+				  DDR_OPEN, LOAD_IMAGE_TIMEOUT_MS);
+		if (ret < 0) {
+			dev_err(dev, "boot2 wait DDR open error - ret(%d)\n",
+				ret);
+			goto err_buf_out;
+		}
+
+		max_buf = SZ_4M;
+		bufp = dma_alloc_coherent(dev,
+					  max_buf,
+					  &boot_dma_addr, GFP_KERNEL);
+		if (!bufp) {
+			dev_err(dev, "Error allocating 0x%zx\n", max_buf);
+			ret = -ENOMEM;
+			goto err_buf_out;
+		}
+
+		bcm_vk_buf_notify(vk, bufp, boot_dma_addr, max_buf);
+	} else {
+		dev_err(dev, "Error invalid image type 0x%x\n", load_type);
+		ret = -EINVAL;
+		goto err_buf_out;
+	}
+
+	offset = 0;
+	ret = request_partial_firmware_into_buf(&fw, filename, dev,
+						bufp, max_buf, offset);
+	if (ret) {
+		dev_err(dev, "Error %d requesting firmware file: %s\n",
+			ret, filename);
+		goto err_firmware_out;
+	}
+	dev_dbg(dev, "size=0x%zx\n", fw->size);
+	if (load_type == VK_IMAGE_TYPE_BOOT1)
+		memcpy_toio(vk->bar[BAR_1] + BAR1_CODEPUSH_BASE_BOOT1,
+			    bufp,
+			    fw->size);
+
+	dev_dbg(dev, "Signaling 0x%x to 0x%llx\n", codepush, offset_codepush);
+	vkwrite32(vk, codepush, BAR_0, offset_codepush);
+
+	if (load_type == VK_IMAGE_TYPE_BOOT1) {
+		u32 boot_status;
+
+		/* wait until done */
+		ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
+				  BOOT1_RUNNING,
+				  BOOT1_RUNNING,
+				  BOOT1_STARTUP_TIMEOUT_MS);
+
+		boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+		is_stdalone = !BCM_VK_INTF_IS_DOWN(boot_status) &&
+			      (boot_status & BOOT_STDALONE_RUNNING);
+		if (ret && !is_stdalone) {
+			dev_err(dev,
+				"Timeout %ld ms waiting for boot1 to come up - ret(%d)\n",
+				BOOT1_STARTUP_TIMEOUT_MS, ret);
+			goto err_firmware_out;
+		} else if (is_stdalone) {
+			u32 reg;
+
+			reg = vkread32(vk, BAR_0, BAR_BOOT1_STDALONE_PROGRESS);
+			if ((reg & BOOT1_STDALONE_PROGRESS_MASK) ==
+				     BOOT1_STDALONE_SUCCESS) {
+				dev_info(dev, "Boot1 standalone success\n");
+				ret = 0;
+			} else {
+				dev_err(dev, "Timeout %ld ms - Boot1 standalone failure\n",
+					BOOT1_STARTUP_TIMEOUT_MS);
+				ret = -EINVAL;
+				goto err_firmware_out;
+			}
+		}
+	} else if (load_type == VK_IMAGE_TYPE_BOOT2) {
+		unsigned long timeout;
+
+		timeout = jiffies + msecs_to_jiffies(LOAD_IMAGE_TIMEOUT_MS);
+
+		/* To send more data to VK than max_buf allowed at a time */
+		do {
+			/*
+			 * Check for ack from card. when Ack is received,
+			 * it means all the data is received by card.
+			 * Exit the loop after ack is received.
+			 */
+			ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
+					  FW_LOADER_ACK_RCVD_ALL_DATA,
+					  FW_LOADER_ACK_RCVD_ALL_DATA,
+					  TXFR_COMPLETE_TIMEOUT_MS);
+			if (ret == 0) {
+				dev_dbg(dev, "Exit boot2 download\n");
+				break;
+			} else if (ret == -EFAULT) {
+				dev_err(dev, "Error detected during ACK waiting");
+				goto err_firmware_out;
+			}
+
+			/* exit the loop, if there is no response from card */
+			if (time_after(jiffies, timeout)) {
+				dev_err(dev, "Error. No reply from card\n");
+				ret = -ETIMEDOUT;
+				goto err_firmware_out;
+			}
+
+			/* Wait for VK to open BAR space to copy new data */
+			ret = bcm_vk_wait(vk, BAR_0, offset_codepush,
+					  codepush, 0,
+					  TXFR_COMPLETE_TIMEOUT_MS);
+			if (ret == 0) {
+				offset += max_buf;
+				ret = request_partial_firmware_into_buf
+						(&fw,
+						 filename,
+						 dev, bufp,
+						 max_buf,
+						 offset);
+				if (ret) {
+					dev_err(dev,
+						"Error %d requesting firmware file: %s offset: 0x%zx\n",
+						ret, filename, offset);
+					goto err_firmware_out;
+				}
+				dev_dbg(dev, "size=0x%zx\n", fw->size);
+				dev_dbg(dev, "Signaling 0x%x to 0x%llx\n",
+					codepush, offset_codepush);
+				vkwrite32(vk, codepush, BAR_0, offset_codepush);
+				/* reload timeout after every codepush */
+				timeout = jiffies +
+				    msecs_to_jiffies(LOAD_IMAGE_TIMEOUT_MS);
+			} else if (ret == -EFAULT) {
+				dev_err(dev, "Error detected waiting for transfer\n");
+				goto err_firmware_out;
+			}
+		} while (1);
+
+		/* wait for fw status bits to indicate app ready */
+		ret = bcm_vk_wait(vk, BAR_0, VK_BAR_FWSTS,
+				  VK_FWSTS_READY,
+				  VK_FWSTS_READY,
+				  BOOT2_STARTUP_TIMEOUT_MS);
+		if (ret < 0) {
+			dev_err(dev, "Boot2 not ready - ret(%d)\n", ret);
+			goto err_firmware_out;
+		}
+
+		is_stdalone = vkread32(vk, BAR_0, BAR_BOOT_STATUS) &
+			      BOOT_STDALONE_RUNNING;
+		if (!is_stdalone) {
+			ret = bcm_vk_intf_ver_chk(vk);
+			if (ret) {
+				dev_err(dev, "failure in intf version check\n");
+				goto err_firmware_out;
+			}
+
+			/*
+			 * Next, initialize Message Q if we are loading boot2.
+			 * Do a force sync
+			 */
+			ret = bcm_vk_sync_msgq(vk, true);
+			if (ret) {
+				dev_err(dev, "Boot2 Error reading comm msg Q info\n");
+				ret = -EIO;
+				goto err_firmware_out;
+			}
+
+			/* sync & channel other info */
+			ret = bcm_vk_sync_card_info(vk);
+			if (ret) {
+				dev_err(dev, "Syncing Card Info failure\n");
+				goto err_firmware_out;
+			}
+		}
+	}
+
+err_firmware_out:
+	release_firmware(fw);
+
+err_buf_out:
+	if (bufp)
+		dma_free_coherent(dev, max_buf, bufp, boot_dma_addr);
+
+	return ret;
+}
+
+static u32 bcm_vk_next_boot_image(struct bcm_vk *vk)
+{
+	u32 boot_status;
+	u32 fw_status;
+	u32 load_type = 0;  /* default for unknown */
+
+	boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+
+	if (!BCM_VK_INTF_IS_DOWN(boot_status) && (boot_status & SRAM_OPEN))
+		load_type = VK_IMAGE_TYPE_BOOT1;
+	else if (boot_status == BOOT1_RUNNING)
+		load_type = VK_IMAGE_TYPE_BOOT2;
+
+	/* Log status so that we know different stages */
+	dev_info(&vk->pdev->dev,
+		 "boot-status value for next image: 0x%x : fw-status 0x%x\n",
+		 boot_status, fw_status);
+
+	return load_type;
+}
+
+static enum soc_idx get_soc_idx(struct bcm_vk *vk)
+{
+	struct pci_dev *pdev = vk->pdev;
+	enum soc_idx idx = VK_IDX_INVALID;
+	u32 rev;
+	static enum soc_idx const vk_soc_tab[] = { VALKYRIE_A0, VALKYRIE_B0 };
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_VALKYRIE:
+		/* get the chip id to decide sub-class */
+		rev = MAJOR_SOC_REV(vkread32(vk, BAR_0, BAR_CHIP_ID));
+		if (rev < ARRAY_SIZE(vk_soc_tab)) {
+			idx = vk_soc_tab[rev];
+		} else {
+			/* Default to A0 firmware for all other chip revs */
+			idx = VALKYRIE_A0;
+			dev_warn(&pdev->dev,
+				 "Rev %d not in image lookup table, default to idx=%d\n",
+				 rev, idx);
+		}
+		break;
+
+	case PCI_DEVICE_ID_VIPER:
+		idx = VIPER;
+		break;
+
+	default:
+		dev_err(&pdev->dev, "no images for 0x%x\n", pdev->device);
+	}
+	return idx;
+}
+
+static const char *get_load_fw_name(struct bcm_vk *vk,
+				    const struct load_image_entry *entry)
+{
+	const struct firmware *fw;
+	struct device *dev = &vk->pdev->dev;
+	int ret;
+	unsigned long dummy;
+	int i;
+
+	for (i = 0; i < IMG_PER_TYPE_MAX; i++) {
+		fw = NULL;
+		ret = request_partial_firmware_into_buf(&fw,
+							entry->image_name[i],
+							dev, &dummy,
+							sizeof(dummy),
+							0);
+		release_firmware(fw);
+		if (!ret)
+			return entry->image_name[i];
+	}
+	return NULL;
+}
+
+int bcm_vk_auto_load_all_images(struct bcm_vk *vk)
+{
+	int i, ret = -1;
+	enum soc_idx idx;
+	struct device *dev = &vk->pdev->dev;
+	u32 curr_type;
+	const char *curr_name;
+
+	idx = get_soc_idx(vk);
+	if (idx == VK_IDX_INVALID)
+		goto auto_load_all_exit;
+
+	/* log a message to know the relative loading order */
+	dev_dbg(dev, "Load All for device %d\n", vk->devid);
+
+	for (i = 0; i < NUM_BOOT_STAGES; i++) {
+		curr_type = image_tab[idx][i].image_type;
+		if (bcm_vk_next_boot_image(vk) == curr_type) {
+			curr_name = get_load_fw_name(vk, &image_tab[idx][i]);
+			if (!curr_name) {
+				dev_err(dev, "No suitable firmware exists for type %d",
+					curr_type);
+				ret = -ENOENT;
+				goto auto_load_all_exit;
+			}
+			ret = bcm_vk_load_image_by_type(vk, curr_type,
+							curr_name);
+			dev_info(dev, "Auto load %s, ret %d\n",
+				 curr_name, ret);
+
+			if (ret) {
+				dev_err(dev, "Error loading default %s\n",
+					curr_name);
+				goto auto_load_all_exit;
+			}
+		}
+	}
+
+auto_load_all_exit:
+	return ret;
+}
+
+static int bcm_vk_trigger_autoload(struct bcm_vk *vk)
+{
+	if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0)
+		return -EPERM;
+
+	set_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
+	queue_work(vk->wq_thread, &vk->wq_work);
+
+	return 0;
+}
+
+/*
+ * deferred work queue for draining and auto download.
+ */
+static void bcm_vk_wq_handler(struct work_struct *work)
+{
+	struct bcm_vk *vk = container_of(work, struct bcm_vk, wq_work);
+	struct device *dev = &vk->pdev->dev;
+	s32 ret;
+
+	/* check wq offload bit map to perform various operations */
+	if (test_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload)) {
+		/* clear bit right the way for notification */
+		clear_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload);
+		bcm_vk_handle_notf(vk);
+	}
+	if (test_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload)) {
+		bcm_vk_auto_load_all_images(vk);
+
+		/*
+		 * at the end of operation, clear AUTO bit and pending
+		 * bit
+		 */
+		clear_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
+		clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+	}
+
+	/* next, try to drain */
+	ret = bcm_to_h_msg_dequeue(vk);
+
+	if (ret == 0)
+		dev_dbg(dev, "Spurious trigger for workqueue\n");
+	else if (ret < 0)
+		bcm_vk_blk_drv_access(vk);
+}
+
+static long bcm_vk_load_image(struct bcm_vk *vk,
+			      const struct vk_image __user *arg)
+{
+	struct device *dev = &vk->pdev->dev;
+	const char *image_name;
+	struct vk_image image;
+	u32 next_loadable;
+	enum soc_idx idx;
+	int image_idx;
+	int ret = -EPERM;
+
+	if (copy_from_user(&image, arg, sizeof(image)))
+		return -EACCES;
+
+	if ((image.type != VK_IMAGE_TYPE_BOOT1) &&
+	    (image.type != VK_IMAGE_TYPE_BOOT2)) {
+		dev_err(dev, "invalid image.type %u\n", image.type);
+		return ret;
+	}
+
+	next_loadable = bcm_vk_next_boot_image(vk);
+	if (next_loadable != image.type) {
+		dev_err(dev, "Next expected image %u, Loading %u\n",
+			next_loadable, image.type);
+		return ret;
+	}
+
+	/*
+	 * if something is pending download already.  This could only happen
+	 * for now when the driver is being loaded, or if someone has issued
+	 * another download command in another shell.
+	 */
+	if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0) {
+		dev_err(dev, "Download operation already pending.\n");
+		return ret;
+	}
+
+	image_name = image.filename;
+	if (image_name[0] == '\0') {
+		/* Use default image name if NULL */
+		idx = get_soc_idx(vk);
+		if (idx == VK_IDX_INVALID)
+			goto err_idx;
+
+		/* Image idx starts with boot1 */
+		image_idx = image.type - VK_IMAGE_TYPE_BOOT1;
+		image_name = get_load_fw_name(vk, &image_tab[idx][image_idx]);
+		if (!image_name) {
+			dev_err(dev, "No suitable image found for type %d",
+				image.type);
+			ret = -ENOENT;
+			goto err_idx;
+		}
+	} else {
+		/* Ensure filename is NULL terminated */
+		image.filename[sizeof(image.filename) - 1] = '\0';
+	}
+	ret = bcm_vk_load_image_by_type(vk, image.type, image_name);
+	dev_info(dev, "Load %s, ret %d\n", image_name, ret);
+err_idx:
+	clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+
+	return ret;
+}
+
+static int bcm_vk_reset_successful(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	u32 fw_status, reset_reason;
+	int ret = -EAGAIN;
+
+	/*
+	 * Reset could be triggered when the card in several state:
+	 *   i)   in bootROM
+	 *   ii)  after boot1
+	 *   iii) boot2 running
+	 *
+	 * i) & ii) - no status bits will be updated.  If vkboot1
+	 * runs automatically after reset, it  will update the reason
+	 * to be unknown reason
+	 * iii) - reboot reason match + deinit done.
+	 */
+	fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+	/* immediate exit if interface goes down */
+	if (BCM_VK_INTF_IS_DOWN(fw_status)) {
+		dev_err(dev, "PCIe Intf Down!\n");
+		goto reset_exit;
+	}
+
+	reset_reason = (fw_status & VK_FWSTS_RESET_REASON_MASK);
+	if ((reset_reason == VK_FWSTS_RESET_MBOX_DB) ||
+	    (reset_reason == VK_FWSTS_RESET_UNKNOWN))
+		ret = 0;
+
+	/*
+	 * if some of the deinit bits are set, but done
+	 * bit is not, this is a failure if triggered while boot2 is running
+	 */
+	if ((fw_status & VK_FWSTS_DEINIT_TRIGGERED) &&
+	    !(fw_status & VK_FWSTS_RESET_DONE))
+		ret = -EAGAIN;
+
+reset_exit:
+	dev_dbg(dev, "FW status = 0x%x ret %d\n", fw_status, ret);
+
+	return ret;
+}
+
+static void bcm_to_v_reset_doorbell(struct bcm_vk *vk, u32 db_val)
+{
+	vkwrite32(vk, db_val, BAR_0, VK_BAR0_RESET_DB_BASE);
+}
+
+static int bcm_vk_trigger_reset(struct bcm_vk *vk)
+{
+	u32 i;
+	u32 value, boot_status;
+	bool is_stdalone, is_boot2;
+	static const u32 bar0_reg_clr_list[] = { BAR_OS_UPTIME,
+						 BAR_INTF_VER,
+						 BAR_CARD_VOLTAGE,
+						 BAR_CARD_TEMPERATURE,
+						 BAR_CARD_PWR_AND_THRE };
+
+	/* clean up before pressing the door bell */
+	bcm_vk_drain_msg_on_reset(vk);
+	vkwrite32(vk, 0, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+	/* make tag '\0' terminated */
+	vkwrite32(vk, 0, BAR_1, VK_BAR1_BOOT1_VER_TAG);
+
+	for (i = 0; i < VK_BAR1_DAUTH_MAX; i++) {
+		vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_STORE_ADDR(i));
+		vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_VALID_ADDR(i));
+	}
+	for (i = 0; i < VK_BAR1_SOTP_REVID_MAX; i++)
+		vkwrite32(vk, 0, BAR_1, VK_BAR1_SOTP_REVID_ADDR(i));
+
+	memset(&vk->card_info, 0, sizeof(vk->card_info));
+	memset(&vk->peerlog_info, 0, sizeof(vk->peerlog_info));
+	memset(&vk->proc_mon_info, 0, sizeof(vk->proc_mon_info));
+	memset(&vk->alert_cnts, 0, sizeof(vk->alert_cnts));
+
+	/*
+	 * When boot request fails, the CODE_PUSH_OFFSET stays persistent.
+	 * Allowing us to debug the failure. When we call reset,
+	 * we should clear CODE_PUSH_OFFSET so ROM does not execute
+	 * boot again (and fails again) and instead waits for a new
+	 * codepush.  And, if previous boot has encountered error, need
+	 * to clear the entry values
+	 */
+	boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	if (boot_status & BOOT_ERR_MASK) {
+		dev_info(&vk->pdev->dev,
+			 "Card in boot error 0x%x, clear CODEPUSH val\n",
+			 boot_status);
+		value = 0;
+	} else {
+		value = vkread32(vk, BAR_0, BAR_CODEPUSH_SBL);
+		value &= CODEPUSH_MASK;
+	}
+	vkwrite32(vk, value, BAR_0, BAR_CODEPUSH_SBL);
+
+	/* special reset handling */
+	is_stdalone = boot_status & BOOT_STDALONE_RUNNING;
+	is_boot2 = (boot_status & BOOT_STATE_MASK) == BOOT2_RUNNING;
+	if (vk->peer_alert.flags & ERR_LOG_RAMDUMP) {
+		/*
+		 * if card is in ramdump mode, it is hitting an error.  Don't
+		 * reset the reboot reason as it will contain valid info that
+		 * is important - simply use special reset
+		 */
+		vkwrite32(vk, VK_BAR0_RESET_RAMPDUMP, BAR_0, VK_BAR_FWSTS);
+		return VK_BAR0_RESET_RAMPDUMP;
+	} else if (is_stdalone && !is_boot2) {
+		dev_info(&vk->pdev->dev, "Hard reset on Standalone mode");
+		bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_HARD);
+		return VK_BAR0_RESET_DB_HARD;
+	}
+
+	/* reset fw_status with proper reason, and press db */
+	vkwrite32(vk, VK_FWSTS_RESET_MBOX_DB, BAR_0, VK_BAR_FWSTS);
+	bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_SOFT);
+
+	/* clear other necessary registers and alert records */
+	for (i = 0; i < ARRAY_SIZE(bar0_reg_clr_list); i++)
+		vkwrite32(vk, 0, BAR_0, bar0_reg_clr_list[i]);
+	memset(&vk->host_alert, 0, sizeof(vk->host_alert));
+	memset(&vk->peer_alert, 0, sizeof(vk->peer_alert));
+	/* clear 4096 bits of bitmap */
+	bitmap_clear(vk->bmap, 0, VK_MSG_ID_BITMAP_SIZE);
+
+	return 0;
+}
+
+static long bcm_vk_reset(struct bcm_vk *vk, struct vk_reset __user *arg)
+{
+	struct device *dev = &vk->pdev->dev;
+	struct vk_reset reset;
+	int ret = 0;
+	u32 ramdump_reset;
+	int special_reset;
+
+	if (copy_from_user(&reset, arg, sizeof(struct vk_reset)))
+		return -EFAULT;
+
+	/* check if any download is in-progress, if so return error */
+	if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0) {
+		dev_err(dev, "Download operation pending - skip reset.\n");
+		return -EPERM;
+	}
+
+	ramdump_reset = vk->peer_alert.flags & ERR_LOG_RAMDUMP;
+	dev_info(dev, "Issue Reset %s\n",
+		 ramdump_reset ? "in ramdump mode" : "");
+
+	/*
+	 * The following is the sequence of reset:
+	 * - send card level graceful shut down
+	 * - wait enough time for VK to handle its business, stopping DMA etc
+	 * - kill host apps
+	 * - Trigger interrupt with DB
+	 */
+	bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_GRACEFUL, 0, 0);
+
+	spin_lock(&vk->ctx_lock);
+	if (!vk->reset_pid) {
+		vk->reset_pid = task_pid_nr(current);
+	} else {
+		dev_err(dev, "Reset already launched by process pid %d\n",
+			vk->reset_pid);
+		ret = -EACCES;
+	}
+	spin_unlock(&vk->ctx_lock);
+	if (ret)
+		goto err_exit;
+
+	bcm_vk_blk_drv_access(vk);
+	special_reset = bcm_vk_trigger_reset(vk);
+
+	/*
+	 * Wait enough time for card os to deinit
+	 * and populate the reset reason.
+	 */
+	msleep(BCM_VK_DEINIT_TIME_MS);
+
+	if (special_reset) {
+		/* if it is special ramdump reset, return the type to user */
+		reset.arg2 = special_reset;
+		if (copy_to_user(arg, &reset, sizeof(reset)))
+			ret = -EFAULT;
+	} else {
+		ret = bcm_vk_reset_successful(vk);
+	}
+
+err_exit:
+	clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+	return ret;
+}
+
+static int bcm_vk_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct bcm_vk_ctx *ctx = file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	unsigned long pg_size;
+
+	/* only BAR2 is mmap possible, which is bar num 4 due to 64bit */
+#define VK_MMAPABLE_BAR 4
+
+	pg_size = ((pci_resource_len(vk->pdev, VK_MMAPABLE_BAR) - 1)
+		    >> PAGE_SHIFT) + 1;
+	if (vma->vm_pgoff + vma_pages(vma) > pg_size)
+		return -EINVAL;
+
+	vma->vm_pgoff += (pci_resource_start(vk->pdev, VK_MMAPABLE_BAR)
+			  >> PAGE_SHIFT);
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  vma->vm_end - vma->vm_start,
+				  vma->vm_page_prot);
+}
+
+static long bcm_vk_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long ret = -EINVAL;
+	struct bcm_vk_ctx *ctx = file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	void __user *argp = (void __user *)arg;
+
+	dev_dbg(&vk->pdev->dev,
+		"ioctl, cmd=0x%02x, arg=0x%02lx\n",
+		cmd, arg);
+
+	mutex_lock(&vk->mutex);
+
+	switch (cmd) {
+	case VK_IOCTL_LOAD_IMAGE:
+		ret = bcm_vk_load_image(vk, argp);
+		break;
+
+	case VK_IOCTL_RESET:
+		ret = bcm_vk_reset(vk, argp);
+		break;
+
+	default:
+		break;
+	}
+
+	mutex_unlock(&vk->mutex);
+
+	return ret;
+}
+
+static const struct file_operations bcm_vk_fops = {
+	.owner = THIS_MODULE,
+	.open = bcm_vk_open,
+	.read = bcm_vk_read,
+	.write = bcm_vk_write,
+	.poll = bcm_vk_poll,
+	.release = bcm_vk_release,
+	.mmap = bcm_vk_mmap,
+	.unlocked_ioctl = bcm_vk_ioctl,
+};
+
+static int bcm_vk_on_panic(struct notifier_block *nb,
+			   unsigned long e, void *p)
+{
+	struct bcm_vk *vk = container_of(nb, struct bcm_vk, panic_nb);
+
+	bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_HARD);
+
+	return 0;
+}
+
+static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int err;
+	int i;
+	int id;
+	int irq;
+	char name[20];
+	struct bcm_vk *vk;
+	struct device *dev = &pdev->dev;
+	struct miscdevice *misc_device;
+	u32 boot_status;
+
+	/* allocate vk structure which is tied to kref for freeing */
+	vk = kzalloc(sizeof(*vk), GFP_KERNEL);
+	if (!vk)
+		return -ENOMEM;
+
+	kref_init(&vk->kref);
+	if (nr_ib_sgl_blk > BCM_VK_IB_SGL_BLK_MAX) {
+		dev_warn(dev, "Inband SGL blk %d limited to max %d\n",
+			 nr_ib_sgl_blk, BCM_VK_IB_SGL_BLK_MAX);
+		nr_ib_sgl_blk = BCM_VK_IB_SGL_BLK_MAX;
+	}
+	vk->ib_sgl_size = nr_ib_sgl_blk * VK_MSGQ_BLK_SIZE;
+	mutex_init(&vk->mutex);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Cannot enable PCI device\n");
+		goto err_free_exit;
+	}
+	vk->pdev = pci_dev_get(pdev);
+
+	err = pci_request_regions(pdev, DRV_MODULE_NAME);
+	if (err) {
+		dev_err(dev, "Cannot obtain PCI resources\n");
+		goto err_disable_pdev;
+	}
+
+	/* make sure DMA is good */
+	err = dma_set_mask_and_coherent(&pdev->dev,
+					DMA_BIT_MASK(BCM_VK_DMA_BITS));
+	if (err) {
+		dev_err(dev, "failed to set DMA mask\n");
+		goto err_disable_pdev;
+	}
+
+	/* The tdma is a scratch area for some DMA testings. */
+	if (nr_scratch_pages) {
+		vk->tdma_vaddr = dma_alloc_coherent
+					(dev,
+					 nr_scratch_pages * PAGE_SIZE,
+					 &vk->tdma_addr, GFP_KERNEL);
+		if (!vk->tdma_vaddr) {
+			err = -ENOMEM;
+			goto err_disable_pdev;
+		}
+	}
+
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, vk);
+
+	irq = pci_alloc_irq_vectors(pdev,
+				    1,
+				    VK_MSIX_IRQ_MAX,
+				    PCI_IRQ_MSI | PCI_IRQ_MSIX);
+
+	if (irq < VK_MSIX_IRQ_MIN_REQ) {
+		dev_err(dev, "failed to get min %d MSIX interrupts, irq(%d)\n",
+			VK_MSIX_IRQ_MIN_REQ, irq);
+		err = (irq >= 0) ? -EINVAL : irq;
+		goto err_disable_pdev;
+	}
+
+	if (irq != VK_MSIX_IRQ_MAX)
+		dev_warn(dev, "Number of IRQs %d allocated - requested(%d).\n",
+			 irq, VK_MSIX_IRQ_MAX);
+
+	for (i = 0; i < MAX_BAR; i++) {
+		/* multiple by 2 for 64 bit BAR mapping */
+		vk->bar[i] = pci_ioremap_bar(pdev, i * 2);
+		if (!vk->bar[i]) {
+			dev_err(dev, "failed to remap BAR%d\n", i);
+			err = -ENOMEM;
+			goto err_iounmap;
+		}
+	}
+
+	for (vk->num_irqs = 0;
+	     vk->num_irqs < VK_MSIX_MSGQ_MAX;
+	     vk->num_irqs++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+				       bcm_vk_msgq_irqhandler,
+				       IRQF_SHARED, DRV_MODULE_NAME, vk);
+		if (err) {
+			dev_err(dev, "failed to request msgq IRQ %d for MSIX %d\n",
+				pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+			goto err_irq;
+		}
+	}
+	/* one irq for notification from VK */
+	err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+			       bcm_vk_notf_irqhandler,
+			       IRQF_SHARED, DRV_MODULE_NAME, vk);
+	if (err) {
+		dev_err(dev, "failed to request notf IRQ %d for MSIX %d\n",
+			pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+		goto err_irq;
+	}
+	vk->num_irqs++;
+
+	for (i = 0;
+	     (i < VK_MSIX_TTY_MAX) && (vk->num_irqs < irq);
+	     i++, vk->num_irqs++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+				       bcm_vk_tty_irqhandler,
+				       IRQF_SHARED, DRV_MODULE_NAME, vk);
+		if (err) {
+			dev_err(dev, "failed request tty IRQ %d for MSIX %d\n",
+				pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+			goto err_irq;
+		}
+		bcm_vk_tty_set_irq_enabled(vk, i);
+	}
+
+	id = ida_simple_get(&bcm_vk_ida, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		err = id;
+		dev_err(dev, "unable to get id\n");
+		goto err_irq;
+	}
+
+	vk->devid = id;
+	snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id);
+	misc_device = &vk->miscdev;
+	misc_device->minor = MISC_DYNAMIC_MINOR;
+	misc_device->name = kstrdup(name, GFP_KERNEL);
+	if (!misc_device->name) {
+		err = -ENOMEM;
+		goto err_ida_remove;
+	}
+	misc_device->fops = &bcm_vk_fops,
+
+	err = misc_register(misc_device);
+	if (err) {
+		dev_err(dev, "failed to register device\n");
+		goto err_kfree_name;
+	}
+
+	INIT_WORK(&vk->wq_work, bcm_vk_wq_handler);
+
+	/* create dedicated workqueue */
+	vk->wq_thread = create_singlethread_workqueue(name);
+	if (!vk->wq_thread) {
+		dev_err(dev, "Fail to create workqueue thread\n");
+		err = -ENOMEM;
+		goto err_misc_deregister;
+	}
+
+	err = bcm_vk_msg_init(vk);
+	if (err) {
+		dev_err(dev, "failed to init msg queue info\n");
+		goto err_destroy_workqueue;
+	}
+
+	/* sync other info */
+	bcm_vk_sync_card_info(vk);
+
+	/* register for panic notifier */
+	vk->panic_nb.notifier_call = bcm_vk_on_panic;
+	err = atomic_notifier_chain_register(&panic_notifier_list,
+					     &vk->panic_nb);
+	if (err) {
+		dev_err(dev, "Fail to register panic notifier\n");
+		goto err_destroy_workqueue;
+	}
+
+	snprintf(name, sizeof(name), KBUILD_MODNAME ".%d_ttyVK", id);
+	err = bcm_vk_tty_init(vk, name);
+	if (err)
+		goto err_unregister_panic_notifier;
+
+	/*
+	 * lets trigger an auto download.  We don't want to do it serially here
+	 * because at probing time, it is not supposed to block for a long time.
+	 */
+	boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	if (auto_load) {
+		if ((boot_status & BOOT_STATE_MASK) == BROM_RUNNING) {
+			err = bcm_vk_trigger_autoload(vk);
+			if (err)
+				goto err_bcm_vk_tty_exit;
+		} else {
+			dev_err(dev,
+				"Auto-load skipped - BROM not in proper state (0x%x)\n",
+				boot_status);
+		}
+	}
+
+	/* enable hb */
+	bcm_vk_hb_init(vk);
+
+	dev_dbg(dev, "BCM-VK:%u created\n", id);
+
+	return 0;
+
+err_bcm_vk_tty_exit:
+	bcm_vk_tty_exit(vk);
+
+err_unregister_panic_notifier:
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &vk->panic_nb);
+
+err_destroy_workqueue:
+	destroy_workqueue(vk->wq_thread);
+
+err_misc_deregister:
+	misc_deregister(misc_device);
+
+err_kfree_name:
+	kfree(misc_device->name);
+	misc_device->name = NULL;
+
+err_ida_remove:
+	ida_simple_remove(&bcm_vk_ida, id);
+
+err_irq:
+	for (i = 0; i < vk->num_irqs; i++)
+		devm_free_irq(dev, pci_irq_vector(pdev, i), vk);
+
+	pci_disable_msix(pdev);
+	pci_disable_msi(pdev);
+
+err_iounmap:
+	for (i = 0; i < MAX_BAR; i++) {
+		if (vk->bar[i])
+			pci_iounmap(pdev, vk->bar[i]);
+	}
+	pci_release_regions(pdev);
+
+err_disable_pdev:
+	if (vk->tdma_vaddr)
+		dma_free_coherent(&pdev->dev, nr_scratch_pages * PAGE_SIZE,
+				  vk->tdma_vaddr, vk->tdma_addr);
+
+	pci_free_irq_vectors(pdev);
+	pci_disable_device(pdev);
+	pci_dev_put(pdev);
+
+err_free_exit:
+	kfree(vk);
+
+	return err;
+}
+
+void bcm_vk_release_data(struct kref *kref)
+{
+	struct bcm_vk *vk = container_of(kref, struct bcm_vk, kref);
+	struct pci_dev *pdev = vk->pdev;
+
+	dev_dbg(&pdev->dev, "BCM-VK:%d release data 0x%p\n", vk->devid, vk);
+	pci_dev_put(pdev);
+	kfree(vk);
+}
+
+static void bcm_vk_remove(struct pci_dev *pdev)
+{
+	int i;
+	struct bcm_vk *vk = pci_get_drvdata(pdev);
+	struct miscdevice *misc_device = &vk->miscdev;
+
+	bcm_vk_hb_deinit(vk);
+
+	/*
+	 * Trigger a reset to card and wait enough time for UCODE to rerun,
+	 * which re-initialize the card into its default state.
+	 * This ensures when driver is re-enumerated it will start from
+	 * a completely clean state.
+	 */
+	bcm_vk_trigger_reset(vk);
+	usleep_range(BCM_VK_UCODE_BOOT_US, BCM_VK_UCODE_BOOT_MAX_US);
+
+	/* unregister panic notifier */
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &vk->panic_nb);
+
+	bcm_vk_msg_remove(vk);
+	bcm_vk_tty_exit(vk);
+
+	if (vk->tdma_vaddr)
+		dma_free_coherent(&pdev->dev, nr_scratch_pages * PAGE_SIZE,
+				  vk->tdma_vaddr, vk->tdma_addr);
+
+	/* remove if name is set which means misc dev registered */
+	if (misc_device->name) {
+		misc_deregister(misc_device);
+		kfree(misc_device->name);
+		ida_simple_remove(&bcm_vk_ida, vk->devid);
+	}
+	for (i = 0; i < vk->num_irqs; i++)
+		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), vk);
+
+	pci_disable_msix(pdev);
+	pci_disable_msi(pdev);
+
+	cancel_work_sync(&vk->wq_work);
+	destroy_workqueue(vk->wq_thread);
+	bcm_vk_tty_wq_exit(vk);
+
+	for (i = 0; i < MAX_BAR; i++) {
+		if (vk->bar[i])
+			pci_iounmap(pdev, vk->bar[i]);
+	}
+
+	dev_dbg(&pdev->dev, "BCM-VK:%d released\n", vk->devid);
+
+	pci_release_regions(pdev);
+	pci_free_irq_vectors(pdev);
+	pci_disable_device(pdev);
+
+	kref_put(&vk->kref, bcm_vk_release_data);
+}
+
+static void bcm_vk_shutdown(struct pci_dev *pdev)
+{
+	struct bcm_vk *vk = pci_get_drvdata(pdev);
+	u32 reg, boot_stat;
+
+	reg = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+	boot_stat = reg & BOOT_STATE_MASK;
+
+	if (boot_stat == BOOT1_RUNNING) {
+		/* simply trigger a reset interrupt to park it */
+		bcm_vk_trigger_reset(vk);
+	} else if (boot_stat == BROM_NOT_RUN) {
+		int err;
+		u16 lnksta;
+
+		/*
+		 * The boot status only reflects boot condition since last reset
+		 * As ucode will run only once to configure pcie, if multiple
+		 * resets happen, we lost track if ucode has run or not.
+		 * Here, read the current link speed and use that to
+		 * sync up the bootstatus properly so that on reboot-back-up,
+		 * it has the proper state to start with autoload
+		 */
+		err = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+		if (!err &&
+		    (lnksta & PCI_EXP_LNKSTA_CLS) != PCI_EXP_LNKSTA_CLS_2_5GB) {
+			reg |= BROM_STATUS_COMPLETE;
+			vkwrite32(vk, reg, BAR_0, BAR_BOOT_STATUS);
+		}
+	}
+}
+
+static const struct pci_device_id bcm_vk_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VALKYRIE), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VIPER), },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, bcm_vk_ids);
+
+static struct pci_driver pci_driver = {
+	.name     = DRV_MODULE_NAME,
+	.id_table = bcm_vk_ids,
+	.probe    = bcm_vk_probe,
+	.remove   = bcm_vk_remove,
+	.shutdown = bcm_vk_shutdown,
+};
+module_pci_driver(pci_driver);
+
+MODULE_DESCRIPTION("Broadcom VK Host Driver");
+MODULE_AUTHOR("Scott Branden <scott.branden@broadcom.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION("1.0");
diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.c b/drivers/misc/bcm-vk/bcm_vk_msg.c
new file mode 100644
index 000000000000..f40cf08a6192
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_msg.c
@@ -0,0 +1,1357 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/sizes.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+
+#include "bcm_vk.h"
+#include "bcm_vk_msg.h"
+#include "bcm_vk_sg.h"
+
+/* functions to manipulate the transport id in msg block */
+#define BCM_VK_MSG_Q_SHIFT	 4
+#define BCM_VK_MSG_Q_MASK	 0xF
+#define BCM_VK_MSG_ID_MASK	 0xFFF
+
+#define BCM_VK_DMA_DRAIN_MAX_MS	  2000
+
+/* number x q_size will be the max number of msg processed per loop */
+#define BCM_VK_MSG_PROC_MAX_LOOP 2
+
+/* module parameter */
+static bool hb_mon = true;
+module_param(hb_mon, bool, 0444);
+MODULE_PARM_DESC(hb_mon, "Monitoring heartbeat continuously.\n");
+static int batch_log = 1;
+module_param(batch_log, int, 0444);
+MODULE_PARM_DESC(batch_log, "Max num of logs per batch operation.\n");
+
+static bool hb_mon_is_on(void)
+{
+	return hb_mon;
+}
+
+static u32 get_q_num(const struct vk_msg_blk *msg)
+{
+	u32 q_num = msg->trans_id & BCM_VK_MSG_Q_MASK;
+
+	if (q_num >= VK_MSGQ_PER_CHAN_MAX)
+		q_num = VK_MSGQ_NUM_DEFAULT;
+	return q_num;
+}
+
+static void set_q_num(struct vk_msg_blk *msg, u32 q_num)
+{
+	u32 trans_q;
+
+	if (q_num >= VK_MSGQ_PER_CHAN_MAX)
+		trans_q = VK_MSGQ_NUM_DEFAULT;
+	else
+		trans_q = q_num;
+
+	msg->trans_id = (msg->trans_id & ~BCM_VK_MSG_Q_MASK) | trans_q;
+}
+
+static u32 get_msg_id(const struct vk_msg_blk *msg)
+{
+	return ((msg->trans_id >> BCM_VK_MSG_Q_SHIFT) & BCM_VK_MSG_ID_MASK);
+}
+
+static void set_msg_id(struct vk_msg_blk *msg, u32 val)
+{
+	msg->trans_id = (val << BCM_VK_MSG_Q_SHIFT) | get_q_num(msg);
+}
+
+static u32 msgq_inc(const struct bcm_vk_sync_qinfo *qinfo, u32 idx, u32 inc)
+{
+	return ((idx + inc) & qinfo->q_mask);
+}
+
+static
+struct vk_msg_blk __iomem *msgq_blk_addr(const struct bcm_vk_sync_qinfo *qinfo,
+					 u32 idx)
+{
+	return qinfo->q_start + (VK_MSGQ_BLK_SIZE * idx);
+}
+
+static u32 msgq_occupied(const struct bcm_vk_msgq __iomem *msgq,
+			 const struct bcm_vk_sync_qinfo *qinfo)
+{
+	u32 wr_idx, rd_idx;
+
+	wr_idx = readl_relaxed(&msgq->wr_idx);
+	rd_idx = readl_relaxed(&msgq->rd_idx);
+
+	return ((wr_idx - rd_idx) & qinfo->q_mask);
+}
+
+static
+u32 msgq_avail_space(const struct bcm_vk_msgq __iomem *msgq,
+		     const struct bcm_vk_sync_qinfo *qinfo)
+{
+	return (qinfo->q_size - msgq_occupied(msgq, qinfo) - 1);
+}
+
+/* number of retries when enqueue message fails before returning EAGAIN */
+#define BCM_VK_H2VK_ENQ_RETRY 10
+#define BCM_VK_H2VK_ENQ_RETRY_DELAY_MS 50
+
+bool bcm_vk_drv_access_ok(struct bcm_vk *vk)
+{
+	return (!!atomic_read(&vk->msgq_inited));
+}
+
+void bcm_vk_set_host_alert(struct bcm_vk *vk, u32 bit_mask)
+{
+	struct bcm_vk_alert *alert = &vk->host_alert;
+	unsigned long flags;
+
+	/* use irqsave version as this maybe called inside timer interrupt */
+	spin_lock_irqsave(&vk->host_alert_lock, flags);
+	alert->notfs |= bit_mask;
+	spin_unlock_irqrestore(&vk->host_alert_lock, flags);
+
+	if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
+		queue_work(vk->wq_thread, &vk->wq_work);
+}
+
+/*
+ * Heartbeat related defines
+ * The heartbeat from host is a last resort.  If stuck condition happens
+ * on the card, firmware is supposed to detect it.  Therefore, the heartbeat
+ * values used will be more relaxed on the driver, which need to be bigger
+ * than the watchdog timeout on the card.  The watchdog timeout on the card
+ * is 20s, with a jitter of 2s => 22s.  We use a value of 27s here.
+ */
+#define BCM_VK_HB_TIMER_S 3
+#define BCM_VK_HB_TIMER_VALUE (BCM_VK_HB_TIMER_S * HZ)
+#define BCM_VK_HB_LOST_MAX (27 / BCM_VK_HB_TIMER_S)
+
+static void bcm_vk_hb_poll(struct timer_list *t)
+{
+	u32 uptime_s;
+	struct bcm_vk_hb_ctrl *hb = container_of(t, struct bcm_vk_hb_ctrl,
+						 timer);
+	struct bcm_vk *vk = container_of(hb, struct bcm_vk, hb_ctrl);
+
+	if (bcm_vk_drv_access_ok(vk) && hb_mon_is_on()) {
+		/* read uptime from register and compare */
+		uptime_s = vkread32(vk, BAR_0, BAR_OS_UPTIME);
+
+		if (uptime_s == hb->last_uptime)
+			hb->lost_cnt++;
+		else /* reset to avoid accumulation */
+			hb->lost_cnt = 0;
+
+		dev_dbg(&vk->pdev->dev, "Last uptime %d current %d, lost %d\n",
+			hb->last_uptime, uptime_s, hb->lost_cnt);
+
+		/*
+		 * if the interface goes down without any activity, a value
+		 * of 0xFFFFFFFF will be continuously read, and the detection
+		 * will be happened eventually.
+		 */
+		hb->last_uptime = uptime_s;
+	} else {
+		/* reset heart beat lost cnt */
+		hb->lost_cnt = 0;
+	}
+
+	/* next, check if heartbeat exceeds limit */
+	if (hb->lost_cnt > BCM_VK_HB_LOST_MAX) {
+		dev_err(&vk->pdev->dev, "Heartbeat Misses %d times, %d s!\n",
+			BCM_VK_HB_LOST_MAX,
+			BCM_VK_HB_LOST_MAX * BCM_VK_HB_TIMER_S);
+
+		bcm_vk_blk_drv_access(vk);
+		bcm_vk_set_host_alert(vk, ERR_LOG_HOST_HB_FAIL);
+	}
+	/* re-arm timer */
+	mod_timer(&hb->timer, jiffies + BCM_VK_HB_TIMER_VALUE);
+}
+
+void bcm_vk_hb_init(struct bcm_vk *vk)
+{
+	struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
+
+	timer_setup(&hb->timer, bcm_vk_hb_poll, 0);
+	mod_timer(&hb->timer, jiffies + BCM_VK_HB_TIMER_VALUE);
+}
+
+void bcm_vk_hb_deinit(struct bcm_vk *vk)
+{
+	struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
+
+	del_timer(&hb->timer);
+}
+
+static void bcm_vk_msgid_bitmap_clear(struct bcm_vk *vk,
+				      unsigned int start,
+				      unsigned int nbits)
+{
+	spin_lock(&vk->msg_id_lock);
+	bitmap_clear(vk->bmap, start, nbits);
+	spin_unlock(&vk->msg_id_lock);
+}
+
+/*
+ * allocate a ctx per file struct
+ */
+static struct bcm_vk_ctx *bcm_vk_get_ctx(struct bcm_vk *vk, const pid_t pid)
+{
+	u32 i;
+	struct bcm_vk_ctx *ctx = NULL;
+	u32 hash_idx = hash_32(pid, VK_PID_HT_SHIFT_BIT);
+
+	spin_lock(&vk->ctx_lock);
+
+	/* check if it is in reset, if so, don't allow */
+	if (vk->reset_pid) {
+		dev_err(&vk->pdev->dev,
+			"No context allowed during reset by pid %d\n",
+			vk->reset_pid);
+
+		goto in_reset_exit;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
+		if (!vk->ctx[i].in_use) {
+			vk->ctx[i].in_use = true;
+			ctx = &vk->ctx[i];
+			break;
+		}
+	}
+
+	if (!ctx) {
+		dev_err(&vk->pdev->dev, "All context in use\n");
+
+		goto all_in_use_exit;
+	}
+
+	/* set the pid and insert it to hash table */
+	ctx->pid = pid;
+	ctx->hash_idx = hash_idx;
+	list_add_tail(&ctx->node, &vk->pid_ht[hash_idx].head);
+
+	/* increase kref */
+	kref_get(&vk->kref);
+
+	/* clear counter */
+	atomic_set(&ctx->pend_cnt, 0);
+	atomic_set(&ctx->dma_cnt, 0);
+	init_waitqueue_head(&ctx->rd_wq);
+
+all_in_use_exit:
+in_reset_exit:
+	spin_unlock(&vk->ctx_lock);
+
+	return ctx;
+}
+
+static u16 bcm_vk_get_msg_id(struct bcm_vk *vk)
+{
+	u16 rc = VK_MSG_ID_OVERFLOW;
+	u16 test_bit_count = 0;
+
+	spin_lock(&vk->msg_id_lock);
+	while (test_bit_count < (VK_MSG_ID_BITMAP_SIZE - 1)) {
+		/*
+		 * first time come in this loop, msg_id will be 0
+		 * and the first one tested will be 1.  We skip
+		 * VK_SIMPLEX_MSG_ID (0) for one way host2vk
+		 * communication
+		 */
+		vk->msg_id++;
+		if (vk->msg_id == VK_MSG_ID_BITMAP_SIZE)
+			vk->msg_id = 1;
+
+		if (test_bit(vk->msg_id, vk->bmap)) {
+			test_bit_count++;
+			continue;
+		}
+		rc = vk->msg_id;
+		bitmap_set(vk->bmap, vk->msg_id, 1);
+		break;
+	}
+	spin_unlock(&vk->msg_id_lock);
+
+	return rc;
+}
+
+static int bcm_vk_free_ctx(struct bcm_vk *vk, struct bcm_vk_ctx *ctx)
+{
+	u32 idx;
+	u32 hash_idx;
+	pid_t pid;
+	struct bcm_vk_ctx *entry;
+	int count = 0;
+
+	if (!ctx) {
+		dev_err(&vk->pdev->dev, "NULL context detected\n");
+		return -EINVAL;
+	}
+	idx = ctx->idx;
+	pid = ctx->pid;
+
+	spin_lock(&vk->ctx_lock);
+
+	if (!vk->ctx[idx].in_use) {
+		dev_err(&vk->pdev->dev, "context[%d] not in use!\n", idx);
+	} else {
+		vk->ctx[idx].in_use = false;
+		vk->ctx[idx].miscdev = NULL;
+
+		/* Remove it from hash list and see if it is the last one. */
+		list_del(&ctx->node);
+		hash_idx = ctx->hash_idx;
+		list_for_each_entry(entry, &vk->pid_ht[hash_idx].head, node) {
+			if (entry->pid == pid)
+				count++;
+		}
+	}
+
+	spin_unlock(&vk->ctx_lock);
+
+	return count;
+}
+
+static void bcm_vk_free_wkent(struct device *dev, struct bcm_vk_wkent *entry)
+{
+	int proc_cnt;
+
+	bcm_vk_sg_free(dev, entry->dma, VK_DMA_MAX_ADDRS, &proc_cnt);
+	if (proc_cnt)
+		atomic_dec(&entry->ctx->dma_cnt);
+
+	kfree(entry->to_h_msg);
+	kfree(entry);
+}
+
+static void bcm_vk_drain_all_pend(struct device *dev,
+				  struct bcm_vk_msg_chan *chan,
+				  struct bcm_vk_ctx *ctx)
+{
+	u32 num;
+	struct bcm_vk_wkent *entry, *tmp;
+	struct bcm_vk *vk;
+	struct list_head del_q;
+
+	if (ctx)
+		vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+
+	INIT_LIST_HEAD(&del_q);
+	spin_lock(&chan->pendq_lock);
+	for (num = 0; num < chan->q_nr; num++) {
+		list_for_each_entry_safe(entry, tmp, &chan->pendq[num], node) {
+			if ((!ctx) || (entry->ctx->idx == ctx->idx)) {
+				list_del(&entry->node);
+				list_add_tail(&entry->node, &del_q);
+			}
+		}
+	}
+	spin_unlock(&chan->pendq_lock);
+
+	/* batch clean up */
+	num = 0;
+	list_for_each_entry_safe(entry, tmp, &del_q, node) {
+		list_del(&entry->node);
+		num++;
+		if (ctx) {
+			struct vk_msg_blk *msg;
+			int bit_set;
+			bool responded;
+			u32 msg_id;
+
+			/* if it is specific ctx, log for any stuck */
+			msg = entry->to_v_msg;
+			msg_id = get_msg_id(msg);
+			bit_set = test_bit(msg_id, vk->bmap);
+			responded = entry->to_h_msg ? true : false;
+			if (num <= batch_log)
+				dev_info(dev,
+					 "Drained: fid %u size %u msg 0x%x(seq-%x) ctx 0x%x[fd-%d] args:[0x%x 0x%x] resp %s, bmap %d\n",
+					 msg->function_id, msg->size,
+					 msg_id, entry->seq_num,
+					 msg->context_id, entry->ctx->idx,
+					 msg->cmd, msg->arg,
+					 responded ? "T" : "F", bit_set);
+			if (responded)
+				atomic_dec(&ctx->pend_cnt);
+			else if (bit_set)
+				bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
+		}
+		bcm_vk_free_wkent(dev, entry);
+	}
+	if (num && ctx)
+		dev_info(dev, "Total drained items %d [fd-%d]\n",
+			 num, ctx->idx);
+}
+
+void bcm_vk_drain_msg_on_reset(struct bcm_vk *vk)
+{
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, NULL);
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, NULL);
+}
+
+/*
+ * Function to sync up the messages queue info that is provided by BAR1
+ */
+int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync)
+{
+	struct bcm_vk_msgq __iomem *msgq;
+	struct device *dev = &vk->pdev->dev;
+	u32 msgq_off;
+	u32 num_q;
+	struct bcm_vk_msg_chan *chan_list[] = {&vk->to_v_msg_chan,
+					       &vk->to_h_msg_chan};
+	struct bcm_vk_msg_chan *chan;
+	int i, j;
+	int ret = 0;
+
+	/*
+	 * If the driver is loaded at startup where vk OS is not up yet,
+	 * the msgq-info may not be available until a later time.  In
+	 * this case, we skip and the sync function is supposed to be
+	 * called again.
+	 */
+	if (!bcm_vk_msgq_marker_valid(vk)) {
+		dev_info(dev, "BAR1 msgq marker not initialized.\n");
+		return -EAGAIN;
+	}
+
+	msgq_off = vkread32(vk, BAR_1, VK_BAR1_MSGQ_CTRL_OFF);
+
+	/* each side is always half the total  */
+	num_q = vkread32(vk, BAR_1, VK_BAR1_MSGQ_NR) / 2;
+	if (!num_q || (num_q > VK_MSGQ_PER_CHAN_MAX)) {
+		dev_err(dev,
+			"Advertised msgq %d error - max %d allowed\n",
+			num_q, VK_MSGQ_PER_CHAN_MAX);
+		return -EINVAL;
+	}
+
+	vk->to_v_msg_chan.q_nr = num_q;
+	vk->to_h_msg_chan.q_nr = num_q;
+
+	/* first msgq location */
+	msgq = vk->bar[BAR_1] + msgq_off;
+
+	/*
+	 * if this function is called when it is already inited,
+	 * something is wrong
+	 */
+	if (bcm_vk_drv_access_ok(vk) && !force_sync) {
+		dev_err(dev, "Msgq info already in sync\n");
+		return -EPERM;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(chan_list); i++) {
+		chan = chan_list[i];
+		memset(chan->sync_qinfo, 0, sizeof(chan->sync_qinfo));
+
+		for (j = 0; j < num_q; j++) {
+			struct bcm_vk_sync_qinfo *qinfo;
+			u32 msgq_start;
+			u32 msgq_size;
+			u32 msgq_nxt;
+			u32 msgq_db_offset, q_db_offset;
+
+			chan->msgq[j] = msgq;
+			msgq_start = readl_relaxed(&msgq->start);
+			msgq_size = readl_relaxed(&msgq->size);
+			msgq_nxt = readl_relaxed(&msgq->nxt);
+			msgq_db_offset = readl_relaxed(&msgq->db_offset);
+			q_db_offset = (msgq_db_offset & ((1 << DB_SHIFT) - 1));
+			if (q_db_offset  == (~msgq_db_offset >> DB_SHIFT))
+				msgq_db_offset = q_db_offset;
+			else
+				/* fall back to default */
+				msgq_db_offset = VK_BAR0_Q_DB_BASE(j);
+
+			dev_info(dev,
+				 "MsgQ[%d] type %d num %d, @ 0x%x, db_offset 0x%x rd_idx %d wr_idx %d, size %d, nxt 0x%x\n",
+				 j,
+				 readw_relaxed(&msgq->type),
+				 readw_relaxed(&msgq->num),
+				 msgq_start,
+				 msgq_db_offset,
+				 readl_relaxed(&msgq->rd_idx),
+				 readl_relaxed(&msgq->wr_idx),
+				 msgq_size,
+				 msgq_nxt);
+
+			qinfo = &chan->sync_qinfo[j];
+			/* formulate and record static info */
+			qinfo->q_start = vk->bar[BAR_1] + msgq_start;
+			qinfo->q_size = msgq_size;
+			/* set low threshold as 50% or 1/2 */
+			qinfo->q_low = qinfo->q_size >> 1;
+			qinfo->q_mask = qinfo->q_size - 1;
+			qinfo->q_db_offset = msgq_db_offset;
+
+			msgq++;
+		}
+	}
+	atomic_set(&vk->msgq_inited, 1);
+
+	return ret;
+}
+
+static int bcm_vk_msg_chan_init(struct bcm_vk_msg_chan *chan)
+{
+	u32 i;
+
+	mutex_init(&chan->msgq_mutex);
+	spin_lock_init(&chan->pendq_lock);
+	for (i = 0; i < VK_MSGQ_MAX_NR; i++)
+		INIT_LIST_HEAD(&chan->pendq[i]);
+
+	return 0;
+}
+
+static void bcm_vk_append_pendq(struct bcm_vk_msg_chan *chan, u16 q_num,
+				struct bcm_vk_wkent *entry)
+{
+	struct bcm_vk_ctx *ctx;
+
+	spin_lock(&chan->pendq_lock);
+	list_add_tail(&entry->node, &chan->pendq[q_num]);
+	if (entry->to_h_msg) {
+		ctx = entry->ctx;
+		atomic_inc(&ctx->pend_cnt);
+		wake_up_interruptible(&ctx->rd_wq);
+	}
+	spin_unlock(&chan->pendq_lock);
+}
+
+static u32 bcm_vk_append_ib_sgl(struct bcm_vk *vk,
+				struct bcm_vk_wkent *entry,
+				struct _vk_data *data,
+				unsigned int num_planes)
+{
+	unsigned int i;
+	unsigned int item_cnt = 0;
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+	struct vk_msg_blk *msg = &entry->to_v_msg[0];
+	struct bcm_vk_msgq __iomem *msgq;
+	struct bcm_vk_sync_qinfo *qinfo;
+	u32 ib_sgl_size = 0;
+	u8 *buf = (u8 *)&entry->to_v_msg[entry->to_v_blks];
+	u32 avail;
+	u32 q_num;
+
+	/* check if high watermark is hit, and if so, skip */
+	q_num = get_q_num(msg);
+	msgq = chan->msgq[q_num];
+	qinfo = &chan->sync_qinfo[q_num];
+	avail = msgq_avail_space(msgq, qinfo);
+	if (avail < qinfo->q_low) {
+		dev_dbg(dev, "Skip inserting inband SGL, [0x%x/0x%x]\n",
+			avail, qinfo->q_size);
+		return 0;
+	}
+
+	for (i = 0; i < num_planes; i++) {
+		if (data[i].address &&
+		    (ib_sgl_size + data[i].size) <= vk->ib_sgl_size) {
+			item_cnt++;
+			memcpy(buf, entry->dma[i].sglist, data[i].size);
+			ib_sgl_size += data[i].size;
+			buf += data[i].size;
+		}
+	}
+
+	dev_dbg(dev, "Num %u sgl items appended, size 0x%x, room 0x%x\n",
+		item_cnt, ib_sgl_size, vk->ib_sgl_size);
+
+	/* round up size */
+	ib_sgl_size = (ib_sgl_size + VK_MSGQ_BLK_SIZE - 1)
+		       >> VK_MSGQ_BLK_SZ_SHIFT;
+
+	return ib_sgl_size;
+}
+
+void bcm_to_v_q_doorbell(struct bcm_vk *vk, u32 q_num, u32 db_val)
+{
+	struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+	struct bcm_vk_sync_qinfo *qinfo = &chan->sync_qinfo[q_num];
+
+	vkwrite32(vk, db_val, BAR_0, qinfo->q_db_offset);
+}
+
+static int bcm_to_v_msg_enqueue(struct bcm_vk *vk, struct bcm_vk_wkent *entry)
+{
+	static u32 seq_num;
+	struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+	struct device *dev = &vk->pdev->dev;
+	struct vk_msg_blk *src = &entry->to_v_msg[0];
+
+	struct vk_msg_blk __iomem *dst;
+	struct bcm_vk_msgq __iomem *msgq;
+	struct bcm_vk_sync_qinfo *qinfo;
+	u32 q_num = get_q_num(src);
+	u32 wr_idx; /* local copy */
+	u32 i;
+	u32 avail;
+	u32 retry;
+
+	if (entry->to_v_blks != src->size + 1) {
+		dev_err(dev, "number of blks %d not matching %d MsgId[0x%x]: func %d ctx 0x%x\n",
+			entry->to_v_blks,
+			src->size + 1,
+			get_msg_id(src),
+			src->function_id,
+			src->context_id);
+		return -EMSGSIZE;
+	}
+
+	msgq = chan->msgq[q_num];
+	qinfo = &chan->sync_qinfo[q_num];
+
+	mutex_lock(&chan->msgq_mutex);
+
+	avail = msgq_avail_space(msgq, qinfo);
+
+	/* if not enough space, return EAGAIN and let app handles it */
+	retry = 0;
+	while ((avail < entry->to_v_blks) &&
+	       (retry++ < BCM_VK_H2VK_ENQ_RETRY)) {
+		mutex_unlock(&chan->msgq_mutex);
+
+		msleep(BCM_VK_H2VK_ENQ_RETRY_DELAY_MS);
+		mutex_lock(&chan->msgq_mutex);
+		avail = msgq_avail_space(msgq, qinfo);
+	}
+	if (retry > BCM_VK_H2VK_ENQ_RETRY) {
+		mutex_unlock(&chan->msgq_mutex);
+		return -EAGAIN;
+	}
+
+	/* at this point, mutex is taken and there is enough space */
+	entry->seq_num = seq_num++; /* update debug seq number */
+	wr_idx = readl_relaxed(&msgq->wr_idx);
+
+	if (wr_idx >= qinfo->q_size) {
+		dev_crit(dev, "Invalid wr_idx 0x%x => max 0x%x!",
+			 wr_idx, qinfo->q_size);
+		bcm_vk_blk_drv_access(vk);
+		bcm_vk_set_host_alert(vk, ERR_LOG_HOST_PCIE_DWN);
+		goto idx_err;
+	}
+
+	dst = msgq_blk_addr(qinfo, wr_idx);
+	for (i = 0; i < entry->to_v_blks; i++) {
+		memcpy_toio(dst, src, sizeof(*dst));
+
+		src++;
+		wr_idx = msgq_inc(qinfo, wr_idx, 1);
+		dst = msgq_blk_addr(qinfo, wr_idx);
+	}
+
+	/* flush the write pointer */
+	writel(wr_idx, &msgq->wr_idx);
+
+	/* log new info for debugging */
+	dev_dbg(dev,
+		"MsgQ[%d] [Rd Wr] = [%d %d] blks inserted %d - Q = [u-%d a-%d]/%d\n",
+		readl_relaxed(&msgq->num),
+		readl_relaxed(&msgq->rd_idx),
+		wr_idx,
+		entry->to_v_blks,
+		msgq_occupied(msgq, qinfo),
+		msgq_avail_space(msgq, qinfo),
+		readl_relaxed(&msgq->size));
+	/*
+	 * press door bell based on queue number. 1 is added to the wr_idx
+	 * to avoid the value of 0 appearing on the VK side to distinguish
+	 * from initial value.
+	 */
+	bcm_to_v_q_doorbell(vk, q_num, wr_idx + 1);
+idx_err:
+	mutex_unlock(&chan->msgq_mutex);
+	return 0;
+}
+
+int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, u32 shut_type,
+			     const pid_t pid, const u32 q_num)
+{
+	int rc = 0;
+	struct bcm_vk_wkent *entry;
+	struct device *dev = &vk->pdev->dev;
+
+	/*
+	 * check if the marker is still good.  Sometimes, the PCIe interface may
+	 * have gone done, and if so and we ship down thing based on broken
+	 * values, kernel may panic.
+	 */
+	if (!bcm_vk_msgq_marker_valid(vk)) {
+		dev_info(dev, "PCIe comm chan - invalid marker (0x%x)!\n",
+			 vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY));
+		return -EINVAL;
+	}
+
+	entry = kzalloc(sizeof(*entry) +
+			sizeof(struct vk_msg_blk), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	/* fill up necessary data */
+	entry->to_v_msg[0].function_id = VK_FID_SHUTDOWN;
+	set_q_num(&entry->to_v_msg[0], q_num);
+	set_msg_id(&entry->to_v_msg[0], VK_SIMPLEX_MSG_ID);
+	entry->to_v_blks = 1; /* always 1 block */
+
+	entry->to_v_msg[0].cmd = shut_type;
+	entry->to_v_msg[0].arg = pid;
+
+	rc = bcm_to_v_msg_enqueue(vk, entry);
+	if (rc)
+		dev_err(dev,
+			"Sending shutdown message to q %d for pid %d fails.\n",
+			get_q_num(&entry->to_v_msg[0]), pid);
+
+	kfree(entry);
+
+	return rc;
+}
+
+static int bcm_vk_handle_last_sess(struct bcm_vk *vk, const pid_t pid,
+				   const u32 q_num)
+{
+	int rc = 0;
+	struct device *dev = &vk->pdev->dev;
+
+	/*
+	 * don't send down or do anything if message queue is not initialized
+	 * and if it is the reset session, clear it.
+	 */
+	if (!bcm_vk_drv_access_ok(vk)) {
+		if (vk->reset_pid == pid)
+			vk->reset_pid = 0;
+		return -EPERM;
+	}
+
+	dev_dbg(dev, "No more sessions, shut down pid %d\n", pid);
+
+	/* only need to do it if it is not the reset process */
+	if (vk->reset_pid != pid)
+		rc = bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_PID, pid, q_num);
+	else
+		/* put reset_pid to 0 if it is exiting last session */
+		vk->reset_pid = 0;
+
+	return rc;
+}
+
+static struct bcm_vk_wkent *bcm_vk_dequeue_pending(struct bcm_vk *vk,
+						   struct bcm_vk_msg_chan *chan,
+						   u16 q_num,
+						   u16 msg_id)
+{
+	bool found = false;
+	struct bcm_vk_wkent *entry;
+
+	spin_lock(&chan->pendq_lock);
+	list_for_each_entry(entry, &chan->pendq[q_num], node) {
+		if (get_msg_id(&entry->to_v_msg[0]) == msg_id) {
+			list_del(&entry->node);
+			found = true;
+			bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
+			break;
+		}
+	}
+	spin_unlock(&chan->pendq_lock);
+	return ((found) ? entry : NULL);
+}
+
+s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan;
+	struct vk_msg_blk *data;
+	struct vk_msg_blk __iomem *src;
+	struct vk_msg_blk *dst;
+	struct bcm_vk_msgq __iomem *msgq;
+	struct bcm_vk_sync_qinfo *qinfo;
+	struct bcm_vk_wkent *entry;
+	u32 rd_idx, wr_idx;
+	u32 q_num, msg_id, j;
+	u32 num_blks;
+	s32 total = 0;
+	int cnt = 0;
+	int msg_processed = 0;
+	int max_msg_to_process;
+	bool exit_loop;
+
+	/*
+	 * drain all the messages from the queues, and find its pending
+	 * entry in the to_v queue, based on msg_id & q_num, and move the
+	 * entry to the to_h pending queue, waiting for user space
+	 * program to extract
+	 */
+	mutex_lock(&chan->msgq_mutex);
+
+	for (q_num = 0; q_num < chan->q_nr; q_num++) {
+		msgq = chan->msgq[q_num];
+		qinfo = &chan->sync_qinfo[q_num];
+		max_msg_to_process = BCM_VK_MSG_PROC_MAX_LOOP * qinfo->q_size;
+
+		rd_idx = readl_relaxed(&msgq->rd_idx);
+		wr_idx = readl_relaxed(&msgq->wr_idx);
+		msg_processed = 0;
+		exit_loop = false;
+		while ((rd_idx != wr_idx) && !exit_loop) {
+			u8 src_size;
+
+			/*
+			 * Make a local copy and get pointer to src blk
+			 * The rd_idx is masked before getting the pointer to
+			 * avoid out of bound access in case the interface goes
+			 * down.  It will end up pointing to the last block in
+			 * the buffer, but subsequent src->size check would be
+			 * able to catch this.
+			 */
+			src = msgq_blk_addr(qinfo, rd_idx & qinfo->q_mask);
+			src_size = readb(&src->size);
+
+			if ((rd_idx >= qinfo->q_size) ||
+			    (src_size > (qinfo->q_size - 1))) {
+				dev_crit(dev,
+					 "Invalid rd_idx 0x%x or size 0x%x => max 0x%x!",
+					 rd_idx, src_size, qinfo->q_size);
+				bcm_vk_blk_drv_access(vk);
+				bcm_vk_set_host_alert(vk,
+						      ERR_LOG_HOST_PCIE_DWN);
+				goto idx_err;
+			}
+
+			num_blks = src_size + 1;
+			data = kzalloc(num_blks * VK_MSGQ_BLK_SIZE, GFP_KERNEL);
+			if (data) {
+				/* copy messages and linearize it */
+				dst = data;
+				for (j = 0; j < num_blks; j++) {
+					memcpy_fromio(dst, src, sizeof(*dst));
+
+					dst++;
+					rd_idx = msgq_inc(qinfo, rd_idx, 1);
+					src = msgq_blk_addr(qinfo, rd_idx);
+				}
+				total++;
+			} else {
+				/*
+				 * if we could not allocate memory in kernel,
+				 * that is fatal.
+				 */
+				dev_crit(dev, "Kernel mem allocation failure.\n");
+				total = -ENOMEM;
+				goto idx_err;
+			}
+
+			/* flush rd pointer after a message is dequeued */
+			writel(rd_idx, &msgq->rd_idx);
+
+			/* log new info for debugging */
+			dev_dbg(dev,
+				"MsgQ[%d] [Rd Wr] = [%d %d] blks extracted %d - Q = [u-%d a-%d]/%d\n",
+				readl_relaxed(&msgq->num),
+				rd_idx,
+				wr_idx,
+				num_blks,
+				msgq_occupied(msgq, qinfo),
+				msgq_avail_space(msgq, qinfo),
+				readl_relaxed(&msgq->size));
+
+			/*
+			 * No need to search if it is an autonomous one-way
+			 * message from driver, as these messages do not bear
+			 * a to_v pending item. Currently, only the shutdown
+			 * message falls into this category.
+			 */
+			if (data->function_id == VK_FID_SHUTDOWN) {
+				kfree(data);
+				continue;
+			}
+
+			msg_id = get_msg_id(data);
+			/* lookup original message in to_v direction */
+			entry = bcm_vk_dequeue_pending(vk,
+						       &vk->to_v_msg_chan,
+						       q_num,
+						       msg_id);
+
+			/*
+			 * if there is message to does not have prior send,
+			 * this is the location to add here
+			 */
+			if (entry) {
+				entry->to_h_blks = num_blks;
+				entry->to_h_msg = data;
+				bcm_vk_append_pendq(&vk->to_h_msg_chan,
+						    q_num, entry);
+
+			} else {
+				if (cnt++ < batch_log)
+					dev_info(dev,
+						 "Could not find MsgId[0x%x] for resp func %d bmap %d\n",
+						 msg_id, data->function_id,
+						 test_bit(msg_id, vk->bmap));
+				kfree(data);
+			}
+			/* Fetch wr_idx to handle more back-to-back events */
+			wr_idx = readl(&msgq->wr_idx);
+
+			/*
+			 * cap the max so that even we try to handle more back-to-back events,
+			 * so that it won't hold CPU too long or in case rd/wr idexes are
+			 * corrupted which triggers infinite looping.
+			 */
+			if (++msg_processed >= max_msg_to_process) {
+				dev_warn(dev, "Q[%d] Per loop processing exceeds %d\n",
+					 q_num, max_msg_to_process);
+				exit_loop = true;
+			}
+		}
+	}
+idx_err:
+	mutex_unlock(&chan->msgq_mutex);
+	dev_dbg(dev, "total %d drained from queues\n", total);
+
+	return total;
+}
+
+/*
+ * init routine for all required data structures
+ */
+static int bcm_vk_data_init(struct bcm_vk *vk)
+{
+	int i;
+
+	spin_lock_init(&vk->ctx_lock);
+	for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
+		vk->ctx[i].in_use = false;
+		vk->ctx[i].idx = i;	/* self identity */
+		vk->ctx[i].miscdev = NULL;
+	}
+	spin_lock_init(&vk->msg_id_lock);
+	spin_lock_init(&vk->host_alert_lock);
+	vk->msg_id = 0;
+
+	/* initialize hash table */
+	for (i = 0; i < VK_PID_HT_SZ; i++)
+		INIT_LIST_HEAD(&vk->pid_ht[i].head);
+
+	return 0;
+}
+
+irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id)
+{
+	struct bcm_vk *vk = dev_id;
+
+	if (!bcm_vk_drv_access_ok(vk)) {
+		dev_err(&vk->pdev->dev,
+			"Interrupt %d received when msgq not inited\n", irq);
+		goto skip_schedule_work;
+	}
+
+	queue_work(vk->wq_thread, &vk->wq_work);
+
+skip_schedule_work:
+	return IRQ_HANDLED;
+}
+
+int bcm_vk_open(struct inode *inode, struct file *p_file)
+{
+	struct bcm_vk_ctx *ctx;
+	struct miscdevice *miscdev = (struct miscdevice *)p_file->private_data;
+	struct bcm_vk *vk = container_of(miscdev, struct bcm_vk, miscdev);
+	struct device *dev = &vk->pdev->dev;
+	int rc = 0;
+
+	/* get a context and set it up for file */
+	ctx = bcm_vk_get_ctx(vk, task_tgid_nr(current));
+	if (!ctx) {
+		dev_err(dev, "Error allocating context\n");
+		rc = -ENOMEM;
+	} else {
+		/*
+		 * set up context and replace private data with context for
+		 * other methods to use.  Reason for the context is because
+		 * it is allowed for multiple sessions to open the sysfs, and
+		 * for each file open, when upper layer query the response,
+		 * only those that are tied to a specific open should be
+		 * returned.  The context->idx will be used for such binding
+		 */
+		ctx->miscdev = miscdev;
+		p_file->private_data = ctx;
+		dev_dbg(dev, "ctx_returned with idx %d, pid %d\n",
+			ctx->idx, ctx->pid);
+	}
+	return rc;
+}
+
+ssize_t bcm_vk_read(struct file *p_file,
+		    char __user *buf,
+		    size_t count,
+		    loff_t *f_pos)
+{
+	ssize_t rc = -ENOMSG;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
+					 miscdev);
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan;
+	struct bcm_vk_wkent *entry = NULL;
+	u32 q_num;
+	u32 rsp_length;
+	bool found = false;
+
+	if (!bcm_vk_drv_access_ok(vk))
+		return -EPERM;
+
+	dev_dbg(dev, "Buf count %zu\n", count);
+	found = false;
+
+	/*
+	 * search through the pendq on the to_h chan, and return only those
+	 * that belongs to the same context.  Search is always from the high to
+	 * the low priority queues
+	 */
+	spin_lock(&chan->pendq_lock);
+	for (q_num = 0; q_num < chan->q_nr; q_num++) {
+		list_for_each_entry(entry, &chan->pendq[q_num], node) {
+			if (entry->ctx->idx == ctx->idx) {
+				if (count >=
+				    (entry->to_h_blks * VK_MSGQ_BLK_SIZE)) {
+					list_del(&entry->node);
+					atomic_dec(&ctx->pend_cnt);
+					found = true;
+				} else {
+					/* buffer not big enough */
+					rc = -EMSGSIZE;
+				}
+				goto read_loop_exit;
+			}
+		}
+	}
+read_loop_exit:
+	spin_unlock(&chan->pendq_lock);
+
+	if (found) {
+		/* retrieve the passed down msg_id */
+		set_msg_id(&entry->to_h_msg[0], entry->usr_msg_id);
+		rsp_length = entry->to_h_blks * VK_MSGQ_BLK_SIZE;
+		if (copy_to_user(buf, entry->to_h_msg, rsp_length) == 0)
+			rc = rsp_length;
+
+		bcm_vk_free_wkent(dev, entry);
+	} else if (rc == -EMSGSIZE) {
+		struct vk_msg_blk tmp_msg = entry->to_h_msg[0];
+
+		/*
+		 * in this case, return just the first block, so
+		 * that app knows what size it is looking for.
+		 */
+		set_msg_id(&tmp_msg, entry->usr_msg_id);
+		tmp_msg.size = entry->to_h_blks - 1;
+		if (copy_to_user(buf, &tmp_msg, VK_MSGQ_BLK_SIZE) != 0) {
+			dev_err(dev, "Error return 1st block in -EMSGSIZE\n");
+			rc = -EFAULT;
+		}
+	}
+	return rc;
+}
+
+ssize_t bcm_vk_write(struct file *p_file,
+		     const char __user *buf,
+		     size_t count,
+		     loff_t *f_pos)
+{
+	ssize_t rc;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
+					 miscdev);
+	struct bcm_vk_msgq __iomem *msgq;
+	struct device *dev = &vk->pdev->dev;
+	struct bcm_vk_wkent *entry;
+	u32 sgl_extra_blks;
+	u32 q_num;
+	u32 msg_size;
+	u32 msgq_size;
+
+	if (!bcm_vk_drv_access_ok(vk))
+		return -EPERM;
+
+	dev_dbg(dev, "Msg count %zu\n", count);
+
+	/* first, do sanity check where count should be multiple of basic blk */
+	if (count & (VK_MSGQ_BLK_SIZE - 1)) {
+		dev_err(dev, "Failure with size %zu not multiple of %zu\n",
+			count, VK_MSGQ_BLK_SIZE);
+		rc = -EINVAL;
+		goto write_err;
+	}
+
+	/* allocate the work entry + buffer for size count and inband sgl */
+	entry = kzalloc(sizeof(*entry) + count + vk->ib_sgl_size,
+			GFP_KERNEL);
+	if (!entry) {
+		rc = -ENOMEM;
+		goto write_err;
+	}
+
+	/* now copy msg from user space, and then formulate the work entry */
+	if (copy_from_user(&entry->to_v_msg[0], buf, count)) {
+		rc = -EFAULT;
+		goto write_free_ent;
+	}
+
+	entry->to_v_blks = count >> VK_MSGQ_BLK_SZ_SHIFT;
+	entry->ctx = ctx;
+
+	/* do a check on the blk size which could not exceed queue space */
+	q_num = get_q_num(&entry->to_v_msg[0]);
+	msgq = vk->to_v_msg_chan.msgq[q_num];
+	msgq_size = readl_relaxed(&msgq->size);
+	if (entry->to_v_blks + (vk->ib_sgl_size >> VK_MSGQ_BLK_SZ_SHIFT)
+	    > (msgq_size - 1)) {
+		dev_err(dev, "Blk size %d exceed max queue size allowed %d\n",
+			entry->to_v_blks, msgq_size - 1);
+		rc = -EINVAL;
+		goto write_free_ent;
+	}
+
+	/* Use internal message id */
+	entry->usr_msg_id = get_msg_id(&entry->to_v_msg[0]);
+	rc = bcm_vk_get_msg_id(vk);
+	if (rc == VK_MSG_ID_OVERFLOW) {
+		dev_err(dev, "msg_id overflow\n");
+		rc = -EOVERFLOW;
+		goto write_free_ent;
+	}
+	set_msg_id(&entry->to_v_msg[0], rc);
+	ctx->q_num = q_num;
+
+	dev_dbg(dev,
+		"[Q-%d]Message ctx id %d, usr_msg_id 0x%x sent msg_id 0x%x\n",
+		ctx->q_num, ctx->idx, entry->usr_msg_id,
+		get_msg_id(&entry->to_v_msg[0]));
+
+	if (entry->to_v_msg[0].function_id == VK_FID_TRANS_BUF) {
+		/* Convert any pointers to sg list */
+		unsigned int num_planes;
+		int dir;
+		struct _vk_data *data;
+
+		/*
+		 * check if we are in reset, if so, no buffer transfer is
+		 * allowed and return error.
+		 */
+		if (vk->reset_pid) {
+			dev_dbg(dev, "No Transfer allowed during reset, pid %d.\n",
+				ctx->pid);
+			rc = -EACCES;
+			goto write_free_msgid;
+		}
+
+		num_planes = entry->to_v_msg[0].cmd & VK_CMD_PLANES_MASK;
+		if ((entry->to_v_msg[0].cmd & VK_CMD_MASK) == VK_CMD_DOWNLOAD)
+			dir = DMA_FROM_DEVICE;
+		else
+			dir = DMA_TO_DEVICE;
+
+		/* Calculate vk_data location */
+		/* Go to end of the message */
+		msg_size = entry->to_v_msg[0].size;
+		if (msg_size > entry->to_v_blks) {
+			rc = -EMSGSIZE;
+			goto write_free_msgid;
+		}
+
+		data = (struct _vk_data *)&entry->to_v_msg[msg_size + 1];
+
+		/* Now back up to the start of the pointers */
+		data -= num_planes;
+
+		/* Convert user addresses to DMA SG List */
+		rc = bcm_vk_sg_alloc(dev, entry->dma, dir, data, num_planes);
+		if (rc)
+			goto write_free_msgid;
+
+		atomic_inc(&ctx->dma_cnt);
+		/* try to embed inband sgl */
+		sgl_extra_blks = bcm_vk_append_ib_sgl(vk, entry, data,
+						      num_planes);
+		entry->to_v_blks += sgl_extra_blks;
+		entry->to_v_msg[0].size += sgl_extra_blks;
+	} else if (entry->to_v_msg[0].function_id == VK_FID_INIT &&
+		   entry->to_v_msg[0].context_id == VK_NEW_CTX) {
+		/*
+		 * Init happens in 2 stages, only the first stage contains the
+		 * pid that needs translating.
+		 */
+		pid_t org_pid, pid;
+
+		/*
+		 * translate the pid into the unique host space as user
+		 * may run sessions inside containers or process
+		 * namespaces.
+		 */
+#define VK_MSG_PID_MASK 0xffffff00
+#define VK_MSG_PID_SH   8
+		org_pid = (entry->to_v_msg[0].arg & VK_MSG_PID_MASK)
+			   >> VK_MSG_PID_SH;
+
+		pid = task_tgid_nr(current);
+		entry->to_v_msg[0].arg =
+			(entry->to_v_msg[0].arg & ~VK_MSG_PID_MASK) |
+			(pid << VK_MSG_PID_SH);
+		if (org_pid != pid)
+			dev_dbg(dev, "In PID 0x%x(%d), converted PID 0x%x(%d)\n",
+				org_pid, org_pid, pid, pid);
+	}
+
+	/*
+	 * store work entry to pending queue until a response is received.
+	 * This needs to be done before enqueuing the message
+	 */
+	bcm_vk_append_pendq(&vk->to_v_msg_chan, q_num, entry);
+
+	rc = bcm_to_v_msg_enqueue(vk, entry);
+	if (rc) {
+		dev_err(dev, "Fail to enqueue msg to to_v queue\n");
+
+		/* remove message from pending list */
+		entry = bcm_vk_dequeue_pending
+			       (vk,
+				&vk->to_v_msg_chan,
+				q_num,
+				get_msg_id(&entry->to_v_msg[0]));
+		goto write_free_ent;
+	}
+
+	return count;
+
+write_free_msgid:
+	bcm_vk_msgid_bitmap_clear(vk, get_msg_id(&entry->to_v_msg[0]), 1);
+write_free_ent:
+	kfree(entry);
+write_err:
+	return rc;
+}
+
+__poll_t bcm_vk_poll(struct file *p_file, struct poll_table_struct *wait)
+{
+	__poll_t ret = 0;
+	int cnt;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	struct device *dev = &vk->pdev->dev;
+
+	poll_wait(p_file, &ctx->rd_wq, wait);
+
+	cnt = atomic_read(&ctx->pend_cnt);
+	if (cnt) {
+		ret = (__force __poll_t)(POLLIN | POLLRDNORM);
+		if (cnt < 0) {
+			dev_err(dev, "Error cnt %d, setting back to 0", cnt);
+			atomic_set(&ctx->pend_cnt, 0);
+		}
+	}
+
+	return ret;
+}
+
+int bcm_vk_release(struct inode *inode, struct file *p_file)
+{
+	int ret;
+	struct bcm_vk_ctx *ctx = p_file->private_data;
+	struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+	struct device *dev = &vk->pdev->dev;
+	pid_t pid = ctx->pid;
+	int dma_cnt;
+	unsigned long timeout, start_time;
+
+	/*
+	 * if there are outstanding DMA transactions, need to delay long enough
+	 * to ensure that the card side would have stopped touching the host buffer
+	 * and its SGL list.  A race condition could happen if the host app is killed
+	 * abruptly, eg kill -9, while some DMA transfer orders are still inflight.
+	 * Nothing could be done except for a delay as host side is running in a
+	 * completely async fashion.
+	 */
+	start_time = jiffies;
+	timeout = start_time + msecs_to_jiffies(BCM_VK_DMA_DRAIN_MAX_MS);
+	do {
+		if (time_after(jiffies, timeout)) {
+			dev_warn(dev, "%d dma still pending for [fd-%d] pid %d\n",
+				 dma_cnt, ctx->idx, pid);
+			break;
+		}
+		dma_cnt = atomic_read(&ctx->dma_cnt);
+		cpu_relax();
+		cond_resched();
+	} while (dma_cnt);
+	dev_dbg(dev, "Draining for [fd-%d] pid %d - delay %d ms\n",
+		ctx->idx, pid, jiffies_to_msecs(jiffies - start_time));
+
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, ctx);
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, ctx);
+
+	ret = bcm_vk_free_ctx(vk, ctx);
+	if (ret == 0)
+		ret = bcm_vk_handle_last_sess(vk, pid, ctx->q_num);
+	else
+		ret = 0;
+
+	kref_put(&vk->kref, bcm_vk_release_data);
+
+	return ret;
+}
+
+int bcm_vk_msg_init(struct bcm_vk *vk)
+{
+	struct device *dev = &vk->pdev->dev;
+	int ret;
+
+	if (bcm_vk_data_init(vk)) {
+		dev_err(dev, "Error initializing internal data structures\n");
+		return -EINVAL;
+	}
+
+	if (bcm_vk_msg_chan_init(&vk->to_v_msg_chan) ||
+	    bcm_vk_msg_chan_init(&vk->to_h_msg_chan)) {
+		dev_err(dev, "Error initializing communication channel\n");
+		return -EIO;
+	}
+
+	/* read msgq info if ready */
+	ret = bcm_vk_sync_msgq(vk, false);
+	if (ret && (ret != -EAGAIN)) {
+		dev_err(dev, "Error reading comm msg Q info\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+void bcm_vk_msg_remove(struct bcm_vk *vk)
+{
+	bcm_vk_blk_drv_access(vk);
+
+	/* drain all pending items */
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, NULL);
+	bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, NULL);
+}
+
diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.h b/drivers/misc/bcm-vk/bcm_vk_msg.h
new file mode 100644
index 000000000000..4eaad84825d6
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_msg.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_MSG_H
+#define BCM_VK_MSG_H
+
+#include <uapi/linux/misc/bcm_vk.h>
+#include "bcm_vk_sg.h"
+
+/* Single message queue control structure */
+struct bcm_vk_msgq {
+	u16 type;	/* queue type */
+	u16 num;	/* queue number */
+	u32 start;	/* offset in BAR1 where the queue memory starts */
+
+	u32 rd_idx; /* read idx */
+	u32 wr_idx; /* write idx */
+
+	u32 size;	/*
+			 * size, which is in number of 16byte blocks,
+			 * to align with the message data structure.
+			 */
+	u32 nxt;	/*
+			 * nxt offset to the next msg queue struct.
+			 * This is to provide flexibity for alignment purposes.
+			 */
+
+/* Least significant 16 bits in below field hold doorbell register offset */
+#define DB_SHIFT 16
+
+	u32 db_offset; /* queue doorbell register offset in BAR0 */
+
+	u32 rsvd;
+};
+
+/*
+ * Structure to record static info from the msgq sync.  We keep local copy
+ * for some of these variables for both performance + checking purpose.
+ */
+struct bcm_vk_sync_qinfo {
+	void __iomem *q_start;
+	u32 q_size;
+	u32 q_mask;
+	u32 q_low;
+	u32 q_db_offset;
+};
+
+#define VK_MSGQ_MAX_NR 4 /* Maximum number of message queues */
+
+/*
+ * message block - basic unit in the message where a message's size is always
+ *		   N x sizeof(basic_block)
+ */
+struct vk_msg_blk {
+	u8 function_id;
+#define VK_FID_TRANS_BUF	5
+#define VK_FID_SHUTDOWN		8
+#define VK_FID_INIT		9
+	u8 size; /* size of the message in number of vk_msg_blk's */
+	u16 trans_id; /* transport id, queue & msg_id */
+	u32 context_id;
+#define VK_NEW_CTX		0
+	u32 cmd;
+#define VK_CMD_PLANES_MASK	0x000f /* number of planes to up/download */
+#define VK_CMD_UPLOAD		0x0400 /* memory transfer to vk */
+#define VK_CMD_DOWNLOAD		0x0500 /* memory transfer from vk */
+#define VK_CMD_MASK		0x0f00 /* command mask */
+	u32 arg;
+};
+
+/* vk_msg_blk is 16 bytes fixed */
+#define VK_MSGQ_BLK_SIZE   (sizeof(struct vk_msg_blk))
+/* shift for fast division of basic msg blk size */
+#define VK_MSGQ_BLK_SZ_SHIFT 4
+
+/* use msg_id 0 for any simplex host2vk communication */
+#define VK_SIMPLEX_MSG_ID 0
+
+/* context per session opening of sysfs */
+struct bcm_vk_ctx {
+	struct list_head node; /* use for linkage in Hash Table */
+	unsigned int idx;
+	bool in_use;
+	pid_t pid;
+	u32 hash_idx;
+	u32 q_num; /* queue number used by the stream */
+	struct miscdevice *miscdev;
+	atomic_t pend_cnt; /* number of items pending to be read from host */
+	atomic_t dma_cnt; /* any dma transaction outstanding */
+	wait_queue_head_t rd_wq;
+};
+
+/* pid hash table entry */
+struct bcm_vk_ht_entry {
+	struct list_head head;
+};
+
+#define VK_DMA_MAX_ADDRS 4 /* Max 4 DMA Addresses */
+/* structure for house keeping a single work entry */
+struct bcm_vk_wkent {
+	struct list_head node; /* for linking purpose */
+	struct bcm_vk_ctx *ctx;
+
+	/* Store up to 4 dma pointers */
+	struct bcm_vk_dma dma[VK_DMA_MAX_ADDRS];
+
+	u32 to_h_blks; /* response */
+	struct vk_msg_blk *to_h_msg;
+
+	/*
+	 * put the to_v_msg at the end so that we could simply append to_v msg
+	 * to the end of the allocated block
+	 */
+	u32 usr_msg_id;
+	u32 to_v_blks;
+	u32 seq_num;
+	struct vk_msg_blk to_v_msg[0];
+};
+
+/* queue stats counters */
+struct bcm_vk_qs_cnts {
+	u32 cnt; /* general counter, used to limit output */
+	u32 acc_sum;
+	u32 max_occ; /* max during a sampling period */
+	u32 max_abs; /* the abs max since reset */
+};
+
+/* control channel structure for either to_v or to_h communication */
+struct bcm_vk_msg_chan {
+	u32 q_nr;
+	/* Mutex to access msgq */
+	struct mutex msgq_mutex;
+	/* pointing to BAR locations */
+	struct bcm_vk_msgq __iomem *msgq[VK_MSGQ_MAX_NR];
+	/* Spinlock to access pending queue */
+	spinlock_t pendq_lock;
+	/* for temporary storing pending items, one for each queue */
+	struct list_head pendq[VK_MSGQ_MAX_NR];
+	/* static queue info from the sync */
+	struct bcm_vk_sync_qinfo sync_qinfo[VK_MSGQ_MAX_NR];
+};
+
+/* totol number of message q allowed by the driver */
+#define VK_MSGQ_PER_CHAN_MAX	3
+#define VK_MSGQ_NUM_DEFAULT	(VK_MSGQ_PER_CHAN_MAX - 1)
+
+/* total number of supported ctx, 32 ctx each for 5 components */
+#define VK_CMPT_CTX_MAX		(32 * 5)
+
+/* hash table defines to store the opened FDs */
+#define VK_PID_HT_SHIFT_BIT	7 /* 128 */
+#define VK_PID_HT_SZ		BIT(VK_PID_HT_SHIFT_BIT)
+
+/* The following are offsets of DDR info provided by the vk card */
+#define VK_BAR0_SEG_SIZE	(4 * SZ_1K) /* segment size for BAR0 */
+
+/* shutdown types supported */
+#define VK_SHUTDOWN_PID		1
+#define VK_SHUTDOWN_GRACEFUL	2
+
+#endif
diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.c b/drivers/misc/bcm-vk/bcm_vk_sg.c
new file mode 100644
index 000000000000..2e9daaf3e492
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_sg.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/pgtable.h>
+#include <linux/vmalloc.h>
+
+#include <asm/page.h>
+#include <asm/unaligned.h>
+
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk.h"
+#include "bcm_vk_msg.h"
+#include "bcm_vk_sg.h"
+
+/*
+ * Valkyrie has a hardware limitation of 16M transfer size.
+ * So limit the SGL chunks to 16M.
+ */
+#define BCM_VK_MAX_SGL_CHUNK SZ_16M
+
+static int bcm_vk_dma_alloc(struct device *dev,
+			    struct bcm_vk_dma *dma,
+			    int dir,
+			    struct _vk_data *vkdata);
+static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma);
+
+/* Uncomment to dump SGLIST */
+/* #define BCM_VK_DUMP_SGLIST */
+
+static int bcm_vk_dma_alloc(struct device *dev,
+			    struct bcm_vk_dma *dma,
+			    int direction,
+			    struct _vk_data *vkdata)
+{
+	dma_addr_t addr, sg_addr;
+	int err;
+	int i;
+	int offset;
+	u32 size;
+	u32 remaining_size;
+	u32 transfer_size;
+	u64 data;
+	unsigned long first, last;
+	struct _vk_data *sgdata;
+
+	/* Get 64-bit user address */
+	data = get_unaligned(&vkdata->address);
+
+	/* offset into first page */
+	offset = offset_in_page(data);
+
+	/* Calculate number of pages */
+	first = (data & PAGE_MASK) >> PAGE_SHIFT;
+	last  = ((data + vkdata->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
+	dma->nr_pages = last - first + 1;
+
+	/* Allocate DMA pages */
+	dma->pages = kmalloc_array(dma->nr_pages,
+				   sizeof(struct page *),
+				   GFP_KERNEL);
+	if (!dma->pages)
+		return -ENOMEM;
+
+	dev_dbg(dev, "Alloc DMA Pages [0x%llx+0x%x => %d pages]\n",
+		data, vkdata->size, dma->nr_pages);
+
+	dma->direction = direction;
+
+	/* Get user pages into memory */
+	err = get_user_pages_fast(data & PAGE_MASK,
+				  dma->nr_pages,
+				  direction == DMA_FROM_DEVICE,
+				  dma->pages);
+	if (err != dma->nr_pages) {
+		dma->nr_pages = (err >= 0) ? err : 0;
+		dev_err(dev, "get_user_pages_fast, err=%d [%d]\n",
+			err, dma->nr_pages);
+		return err < 0 ? err : -EINVAL;
+	}
+
+	/* Max size of sg list is 1 per mapped page + fields at start */
+	dma->sglen = (dma->nr_pages * sizeof(*sgdata)) +
+		     (sizeof(u32) * SGLIST_VKDATA_START);
+
+	/* Allocate sglist */
+	dma->sglist = dma_alloc_coherent(dev,
+					 dma->sglen,
+					 &dma->handle,
+					 GFP_KERNEL);
+	if (!dma->sglist)
+		return -ENOMEM;
+
+	dma->sglist[SGLIST_NUM_SG] = 0;
+	dma->sglist[SGLIST_TOTALSIZE] = vkdata->size;
+	remaining_size = vkdata->size;
+	sgdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
+
+	/* Map all pages into DMA */
+	size = min_t(size_t, PAGE_SIZE - offset, remaining_size);
+	remaining_size -= size;
+	sg_addr = dma_map_page(dev,
+			       dma->pages[0],
+			       offset,
+			       size,
+			       dma->direction);
+	transfer_size = size;
+	if (unlikely(dma_mapping_error(dev, sg_addr))) {
+		__free_page(dma->pages[0]);
+		return -EIO;
+	}
+
+	for (i = 1; i < dma->nr_pages; i++) {
+		size = min_t(size_t, PAGE_SIZE, remaining_size);
+		remaining_size -= size;
+		addr = dma_map_page(dev,
+				    dma->pages[i],
+				    0,
+				    size,
+				    dma->direction);
+		if (unlikely(dma_mapping_error(dev, addr))) {
+			__free_page(dma->pages[i]);
+			return -EIO;
+		}
+
+		/*
+		 * Compress SG list entry when pages are contiguous
+		 * and transfer size less or equal to BCM_VK_MAX_SGL_CHUNK
+		 */
+		if ((addr == (sg_addr + transfer_size)) &&
+		    ((transfer_size + size) <= BCM_VK_MAX_SGL_CHUNK)) {
+			/* pages are contiguous, add to same sg entry */
+			transfer_size += size;
+		} else {
+			/* pages are not contiguous, write sg entry */
+			sgdata->size = transfer_size;
+			put_unaligned(sg_addr, (u64 *)&sgdata->address);
+			dma->sglist[SGLIST_NUM_SG]++;
+
+			/* start new sg entry */
+			sgdata++;
+			sg_addr = addr;
+			transfer_size = size;
+		}
+	}
+	/* Write last sg list entry */
+	sgdata->size = transfer_size;
+	put_unaligned(sg_addr, (u64 *)&sgdata->address);
+	dma->sglist[SGLIST_NUM_SG]++;
+
+	/* Update pointers and size field to point to sglist */
+	put_unaligned((u64)dma->handle, &vkdata->address);
+	vkdata->size = (dma->sglist[SGLIST_NUM_SG] * sizeof(*sgdata)) +
+		       (sizeof(u32) * SGLIST_VKDATA_START);
+
+#ifdef BCM_VK_DUMP_SGLIST
+	dev_dbg(dev,
+		"sgl 0x%llx handle 0x%llx, sglen: 0x%x sgsize: 0x%x\n",
+		(u64)dma->sglist,
+		dma->handle,
+		dma->sglen,
+		vkdata->size);
+	for (i = 0; i < vkdata->size / sizeof(u32); i++)
+		dev_dbg(dev, "i:0x%x 0x%x\n", i, dma->sglist[i]);
+#endif
+
+	return 0;
+}
+
+int bcm_vk_sg_alloc(struct device *dev,
+		    struct bcm_vk_dma *dma,
+		    int dir,
+		    struct _vk_data *vkdata,
+		    int num)
+{
+	int i;
+	int rc = -EINVAL;
+
+	/* Convert user addresses to DMA SG List */
+	for (i = 0; i < num; i++) {
+		if (vkdata[i].size && vkdata[i].address) {
+			/*
+			 * If both size and address are non-zero
+			 * then DMA alloc.
+			 */
+			rc = bcm_vk_dma_alloc(dev,
+					      &dma[i],
+					      dir,
+					      &vkdata[i]);
+		} else if (vkdata[i].size ||
+			   vkdata[i].address) {
+			/*
+			 * If one of size and address are zero
+			 * there is a problem.
+			 */
+			dev_err(dev,
+				"Invalid vkdata %x 0x%x 0x%llx\n",
+				i, vkdata[i].size, vkdata[i].address);
+			rc = -EINVAL;
+		} else {
+			/*
+			 * If size and address are both zero
+			 * don't convert, but return success.
+			 */
+			rc = 0;
+		}
+
+		if (rc)
+			goto fail_alloc;
+	}
+	return rc;
+
+fail_alloc:
+	while (i > 0) {
+		i--;
+		if (dma[i].sglist)
+			bcm_vk_dma_free(dev, &dma[i]);
+	}
+	return rc;
+}
+
+static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma)
+{
+	dma_addr_t addr;
+	int i;
+	int num_sg;
+	u32 size;
+	struct _vk_data *vkdata;
+
+	dev_dbg(dev, "free sglist=%p sglen=0x%x\n", dma->sglist, dma->sglen);
+
+	/* Unmap all pages in the sglist */
+	num_sg = dma->sglist[SGLIST_NUM_SG];
+	vkdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
+	for (i = 0; i < num_sg; i++) {
+		size = vkdata[i].size;
+		addr = get_unaligned(&vkdata[i].address);
+
+		dma_unmap_page(dev, addr, size, dma->direction);
+	}
+
+	/* Free allocated sglist */
+	dma_free_coherent(dev, dma->sglen, dma->sglist, dma->handle);
+
+	/* Release lock on all pages */
+	for (i = 0; i < dma->nr_pages; i++)
+		put_page(dma->pages[i]);
+
+	/* Free allocated dma pages */
+	kfree(dma->pages);
+	dma->sglist = NULL;
+
+	return 0;
+}
+
+int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
+		   int *proc_cnt)
+{
+	int i;
+
+	*proc_cnt = 0;
+	/* Unmap and free all pages and sglists */
+	for (i = 0; i < num; i++) {
+		if (dma[i].sglist) {
+			bcm_vk_dma_free(dev, &dma[i]);
+			*proc_cnt += 1;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.h b/drivers/misc/bcm-vk/bcm_vk_sg.h
new file mode 100644
index 000000000000..81b3d0976ddb
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_sg.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_SG_H
+#define BCM_VK_SG_H
+
+#include <linux/dma-mapping.h>
+
+struct bcm_vk_dma {
+	/* for userland buffer */
+	struct page **pages;
+	int nr_pages;
+
+	/* common */
+	dma_addr_t handle;
+	/*
+	 * sglist is of the following LE format
+	 * [U32] num_sg  = number of sg addresses (N)
+	 * [U32] totalsize = totalsize of data being transferred in sglist
+	 * [U32] size[0] = size of data in address0
+	 * [U32] addr_l[0] = lower 32-bits of address0
+	 * [U32] addr_h[0] = higher 32-bits of address0
+	 * ..
+	 * [U32] size[N-1] = size of data in addressN-1
+	 * [U32] addr_l[N-1] = lower 32-bits of addressN-1
+	 * [U32] addr_h[N-1] = higher 32-bits of addressN-1
+	 */
+	u32 *sglist;
+#define SGLIST_NUM_SG		0
+#define SGLIST_TOTALSIZE	1
+#define SGLIST_VKDATA_START	2
+
+	int sglen; /* Length (bytes) of sglist */
+	int direction;
+};
+
+struct _vk_data {
+	u32 size;    /* data size in bytes */
+	u64 address; /* Pointer to data     */
+} __packed;
+
+/*
+ * Scatter-gather DMA buffer API.
+ *
+ * These functions provide a simple way to create a page list and a
+ * scatter-gather list from userspace address and map the memory
+ * for DMA operation.
+ */
+int bcm_vk_sg_alloc(struct device *dev,
+		    struct bcm_vk_dma *dma,
+		    int dir,
+		    struct _vk_data *vkdata,
+		    int num);
+
+int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
+		   int *proc_cnt);
+
+#endif
+
diff --git a/drivers/misc/bcm-vk/bcm_vk_tty.c b/drivers/misc/bcm-vk/bcm_vk_tty.c
new file mode 100644
index 000000000000..4d02692ecfc7
--- /dev/null
+++ b/drivers/misc/bcm-vk/bcm_vk_tty.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+
+#include "bcm_vk.h"
+
+/* TTYVK base offset is 0x30000 into BAR1 */
+#define BAR1_TTYVK_BASE_OFFSET	0x300000
+/* Each TTYVK channel (TO or FROM) is 0x10000 */
+#define BAR1_TTYVK_CHAN_OFFSET	0x100000
+/* Each TTYVK channel has TO and FROM, hence the * 2 */
+#define BAR1_TTYVK_BASE(index)	(BAR1_TTYVK_BASE_OFFSET + \
+				 ((index) * BAR1_TTYVK_CHAN_OFFSET * 2))
+/* TO TTYVK channel base comes before FROM for each index */
+#define TO_TTYK_BASE(index)	BAR1_TTYVK_BASE(index)
+#define FROM_TTYK_BASE(index)	(BAR1_TTYVK_BASE(index) + \
+				 BAR1_TTYVK_CHAN_OFFSET)
+
+struct bcm_vk_tty_chan {
+	u32 reserved;
+	u32 size;
+	u32 wr;
+	u32 rd;
+	u32 *data;
+};
+
+#define VK_BAR_CHAN(v, DIR, e)	((v)->DIR##_offset \
+				 + offsetof(struct bcm_vk_tty_chan, e))
+#define VK_BAR_CHAN_SIZE(v, DIR)	VK_BAR_CHAN(v, DIR, size)
+#define VK_BAR_CHAN_WR(v, DIR)		VK_BAR_CHAN(v, DIR, wr)
+#define VK_BAR_CHAN_RD(v, DIR)		VK_BAR_CHAN(v, DIR, rd)
+#define VK_BAR_CHAN_DATA(v, DIR, off)	(VK_BAR_CHAN(v, DIR, data) + (off))
+
+#define VK_BAR0_REGSEG_TTY_DB_OFFSET	0x86c
+
+/* Poll every 1/10 of second - temp hack till we use MSI interrupt */
+#define SERIAL_TIMER_VALUE (HZ / 10)
+
+static void bcm_vk_tty_poll(struct timer_list *t)
+{
+	struct bcm_vk *vk = from_timer(vk, t, serial_timer);
+
+	queue_work(vk->tty_wq_thread, &vk->tty_wq_work);
+	mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
+}
+
+irqreturn_t bcm_vk_tty_irqhandler(int irq, void *dev_id)
+{
+	struct bcm_vk *vk = dev_id;
+
+	queue_work(vk->tty_wq_thread, &vk->tty_wq_work);
+
+	return IRQ_HANDLED;
+}
+
+static void bcm_vk_tty_wq_handler(struct work_struct *work)
+{
+	struct bcm_vk *vk = container_of(work, struct bcm_vk, tty_wq_work);
+	struct bcm_vk_tty *vktty;
+	int card_status;
+	int count;
+	unsigned char c;
+	int i;
+	int wr;
+
+	card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
+	if (BCM_VK_INTF_IS_DOWN(card_status))
+		return;
+
+	for (i = 0; i < BCM_VK_NUM_TTY; i++) {
+		count = 0;
+		/* Check the card status that the tty channel is ready */
+		if ((card_status & BIT(i)) == 0)
+			continue;
+
+		vktty = &vk->tty[i];
+
+		/* Don't increment read index if tty app is closed */
+		if (!vktty->is_opened)
+			continue;
+
+		/* Fetch the wr offset in buffer from VK */
+		wr = vkread32(vk, BAR_1, VK_BAR_CHAN_WR(vktty, from));
+
+		/* safe to ignore until bar read gives proper size */
+		if (vktty->from_size == 0)
+			continue;
+
+		if (wr >= vktty->from_size) {
+			dev_err(&vk->pdev->dev,
+				"ERROR: wq handler ttyVK%d wr:0x%x > 0x%x\n",
+				i, wr, vktty->from_size);
+			/* Need to signal and close device in this case */
+			continue;
+		}
+
+		/*
+		 * Simple read of circular buffer and
+		 * insert into tty flip buffer
+		 */
+		while (vk->tty[i].rd != wr) {
+			c = vkread8(vk, BAR_1,
+				    VK_BAR_CHAN_DATA(vktty, from, vktty->rd));
+			vktty->rd++;
+			if (vktty->rd >= vktty->from_size)
+				vktty->rd = 0;
+			tty_insert_flip_char(&vktty->port, c, TTY_NORMAL);
+			count++;
+		}
+
+		if (count) {
+			tty_flip_buffer_push(&vktty->port);
+
+			/* Update read offset from shadow register to card */
+			vkwrite32(vk, vktty->rd, BAR_1,
+				  VK_BAR_CHAN_RD(vktty, from));
+		}
+	}
+}
+
+static int bcm_vk_tty_open(struct tty_struct *tty, struct file *file)
+{
+	int card_status;
+	struct bcm_vk *vk;
+	struct bcm_vk_tty *vktty;
+	int index;
+
+	/* initialize the pointer in case something fails */
+	tty->driver_data = NULL;
+
+	vk = (struct bcm_vk *)dev_get_drvdata(tty->dev);
+	index = tty->index;
+
+	if (index >= BCM_VK_NUM_TTY)
+		return -EINVAL;
+
+	vktty = &vk->tty[index];
+
+	vktty->pid = task_pid_nr(current);
+	vktty->to_offset = TO_TTYK_BASE(index);
+	vktty->from_offset = FROM_TTYK_BASE(index);
+
+	/* Do not allow tty device to be opened if tty on card not ready */
+	card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
+	if (BCM_VK_INTF_IS_DOWN(card_status) || ((card_status & BIT(index)) == 0))
+		return -EBUSY;
+
+	/*
+	 * Get shadow registers of the buffer sizes and the "to" write offset
+	 * and "from" read offset
+	 */
+	vktty->to_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, to));
+	vktty->wr = vkread32(vk, BAR_1,  VK_BAR_CHAN_WR(vktty, to));
+	vktty->from_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, from));
+	vktty->rd = vkread32(vk, BAR_1,  VK_BAR_CHAN_RD(vktty, from));
+	vktty->is_opened = true;
+
+	if (tty->count == 1 && !vktty->irq_enabled) {
+		timer_setup(&vk->serial_timer, bcm_vk_tty_poll, 0);
+		mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
+	}
+	return 0;
+}
+
+static void bcm_vk_tty_close(struct tty_struct *tty, struct file *file)
+{
+	struct bcm_vk *vk = dev_get_drvdata(tty->dev);
+
+	if (tty->index >= BCM_VK_NUM_TTY)
+		return;
+
+	vk->tty[tty->index].is_opened = false;
+
+	if (tty->count == 1)
+		del_timer_sync(&vk->serial_timer);
+}
+
+static void bcm_vk_tty_doorbell(struct bcm_vk *vk, u32 db_val)
+{
+	vkwrite32(vk, db_val, BAR_0,
+		  VK_BAR0_REGSEG_DB_BASE + VK_BAR0_REGSEG_TTY_DB_OFFSET);
+}
+
+static int bcm_vk_tty_write(struct tty_struct *tty,
+			    const unsigned char *buffer,
+			    int count)
+{
+	int index;
+	struct bcm_vk *vk;
+	struct bcm_vk_tty *vktty;
+	int i;
+
+	index = tty->index;
+	vk = dev_get_drvdata(tty->dev);
+	vktty = &vk->tty[index];
+
+	/* Simple write each byte to circular buffer */
+	for (i = 0; i < count; i++) {
+		vkwrite8(vk, buffer[i], BAR_1,
+			 VK_BAR_CHAN_DATA(vktty, to, vktty->wr));
+		vktty->wr++;
+		if (vktty->wr >= vktty->to_size)
+			vktty->wr = 0;
+	}
+	/* Update write offset from shadow register to card */
+	vkwrite32(vk, vktty->wr, BAR_1, VK_BAR_CHAN_WR(vktty, to));
+	bcm_vk_tty_doorbell(vk, 0);
+
+	return count;
+}
+
+static int bcm_vk_tty_write_room(struct tty_struct *tty)
+{
+	struct bcm_vk *vk = dev_get_drvdata(tty->dev);
+
+	return vk->tty[tty->index].to_size - 1;
+}
+
+static const struct tty_operations serial_ops = {
+	.open = bcm_vk_tty_open,
+	.close = bcm_vk_tty_close,
+	.write = bcm_vk_tty_write,
+	.write_room = bcm_vk_tty_write_room,
+};
+
+int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
+{
+	int i;
+	int err;
+	struct tty_driver *tty_drv;
+	struct device *dev = &vk->pdev->dev;
+
+	tty_drv = tty_alloc_driver
+				(BCM_VK_NUM_TTY,
+				 TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV);
+	if (IS_ERR(tty_drv))
+		return PTR_ERR(tty_drv);
+
+	/* Save struct tty_driver for uninstalling the device */
+	vk->tty_drv = tty_drv;
+
+	/* initialize the tty driver */
+	tty_drv->driver_name = KBUILD_MODNAME;
+	tty_drv->name = kstrdup(name, GFP_KERNEL);
+	if (!tty_drv->name) {
+		err = -ENOMEM;
+		goto err_put_tty_driver;
+	}
+	tty_drv->type = TTY_DRIVER_TYPE_SERIAL;
+	tty_drv->subtype = SERIAL_TYPE_NORMAL;
+	tty_drv->init_termios = tty_std_termios;
+	tty_set_operations(tty_drv, &serial_ops);
+
+	/* register the tty driver */
+	err = tty_register_driver(tty_drv);
+	if (err) {
+		dev_err(dev, "tty_register_driver failed\n");
+		goto err_kfree_tty_name;
+	}
+
+	for (i = 0; i < BCM_VK_NUM_TTY; i++) {
+		struct device *tty_dev;
+
+		tty_port_init(&vk->tty[i].port);
+		tty_dev = tty_port_register_device(&vk->tty[i].port, tty_drv,
+						   i, dev);
+		if (IS_ERR(tty_dev)) {
+			err = PTR_ERR(tty_dev);
+			goto unwind;
+		}
+		dev_set_drvdata(tty_dev, vk);
+		vk->tty[i].is_opened = false;
+	}
+
+	INIT_WORK(&vk->tty_wq_work, bcm_vk_tty_wq_handler);
+	vk->tty_wq_thread = create_singlethread_workqueue("tty");
+	if (!vk->tty_wq_thread) {
+		dev_err(dev, "Fail to create tty workqueue thread\n");
+		err = -ENOMEM;
+		goto unwind;
+	}
+	return 0;
+
+unwind:
+	while (--i >= 0)
+		tty_port_unregister_device(&vk->tty[i].port, tty_drv, i);
+	tty_unregister_driver(tty_drv);
+
+err_kfree_tty_name:
+	kfree(tty_drv->name);
+	tty_drv->name = NULL;
+
+err_put_tty_driver:
+	put_tty_driver(tty_drv);
+
+	return err;
+}
+
+void bcm_vk_tty_exit(struct bcm_vk *vk)
+{
+	int i;
+
+	del_timer_sync(&vk->serial_timer);
+	for (i = 0; i < BCM_VK_NUM_TTY; ++i) {
+		tty_port_unregister_device(&vk->tty[i].port,
+					   vk->tty_drv,
+					   i);
+		tty_port_destroy(&vk->tty[i].port);
+	}
+	tty_unregister_driver(vk->tty_drv);
+
+	kfree(vk->tty_drv->name);
+	vk->tty_drv->name = NULL;
+
+	put_tty_driver(vk->tty_drv);
+}
+
+void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk)
+{
+	struct bcm_vk_tty *vktty;
+	int i;
+
+	for (i = 0; i < BCM_VK_NUM_TTY; ++i) {
+		vktty = &vk->tty[i];
+		if (vktty->pid)
+			kill_pid(find_vpid(vktty->pid), SIGKILL, 1);
+	}
+}
+
+void bcm_vk_tty_wq_exit(struct bcm_vk *vk)
+{
+	cancel_work_sync(&vk->tty_wq_work);
+	destroy_workqueue(vk->tty_wq_thread);
+}
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
index 8859011672cb..8200af22b529 100644
--- a/drivers/misc/cardreader/rts5227.c
+++ b/drivers/misc/cardreader/rts5227.c
@@ -398,6 +398,11 @@ static int rts522a_extra_init_hw(struct rtsx_pcr *pcr)
 {
 	rts5227_extra_init_hw(pcr);
 
+	/* Power down OCP for power consumption */
+	if (!pcr->card_exist)
+		rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
+				OC_POWER_DOWN);
+
 	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG,
 		FUNC_FORCE_UPME_XMT_DBG);
 	rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04);
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index 5a491d2cd1ae..273311184669 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -59,12 +59,6 @@ static const struct pci_device_id rtsx_pci_ids[] = {
 
 MODULE_DEVICE_TABLE(pci, rtsx_pci_ids);
 
-static inline void rtsx_pci_disable_aspm(struct rtsx_pcr *pcr)
-{
-	pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
-					   PCI_EXP_LNKCTL_ASPMC, 0);
-}
-
 static int rtsx_comm_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency)
 {
 	rtsx_pci_write_register(pcr, MSGTXDATA0,
@@ -1805,7 +1799,6 @@ static int rtsx_pci_runtime_resume(struct device *device)
 	struct pci_dev *pcidev = to_pci_dev(device);
 	struct pcr_handle *handle;
 	struct rtsx_pcr *pcr;
-	int ret = 0;
 
 	handle = pci_get_drvdata(pcidev);
 	pcr = handle->pcr;
@@ -1830,7 +1823,7 @@ static int rtsx_pci_runtime_resume(struct device *device)
 	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
 
 	mutex_unlock(&pcr->pcr_mutex);
-	return ret;
+	return 0;
 }
 
 #else /* CONFIG_PM */
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index d97a243ad30c..c173a5e88c91 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -178,7 +178,7 @@ static ssize_t perst_reloads_same_image_store(struct device *device,
 	if ((rc != 1) || !(val == 1 || val == 0))
 		return -EINVAL;
 
-	adapter->perst_same_image = (val == 1 ? true : false);
+	adapter->perst_same_image = (val == 1);
 	return count;
 }
 
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index 7c45f82b4302..80114f4c80ad 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -35,6 +35,10 @@ static const struct eeprom_93xx46_devtype_data atmel_at93c46d_data = {
 		  EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH,
 };
 
+static const struct eeprom_93xx46_devtype_data microchip_93lc46b_data = {
+	.quirks = EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE,
+};
+
 struct eeprom_93xx46_dev {
 	struct spi_device *spi;
 	struct eeprom_93xx46_platform_data *pdata;
@@ -55,6 +59,11 @@ static inline bool has_quirk_instruction_length(struct eeprom_93xx46_dev *edev)
 	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH;
 }
 
+static inline bool has_quirk_extra_read_cycle(struct eeprom_93xx46_dev *edev)
+{
+	return edev->pdata->quirks & EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE;
+}
+
 static int eeprom_93xx46_read(void *priv, unsigned int off,
 			      void *val, size_t count)
 {
@@ -96,6 +105,11 @@ static int eeprom_93xx46_read(void *priv, unsigned int off,
 		dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n",
 			cmd_addr, edev->spi->max_speed_hz);
 
+		if (has_quirk_extra_read_cycle(edev)) {
+			cmd_addr <<= 1;
+			bits += 1;
+		}
+
 		spi_message_init(&m);
 
 		t[0].tx_buf = (char *)&cmd_addr;
@@ -363,6 +377,7 @@ static void select_deassert(void *context)
 static const struct of_device_id eeprom_93xx46_of_table[] = {
 	{ .compatible = "eeprom-93xx46", },
 	{ .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, },
+	{ .compatible = "microchip,93lc46b", .data = &microchip_93lc46b_data, },
 	{}
 };
 MODULE_DEVICE_TABLE(of, eeprom_93xx46_of_table);
@@ -512,3 +527,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs");
 MODULE_AUTHOR("Anatolij Gustschin <agust@denx.de>");
 MODULE_ALIAS("spi:93xx46");
+MODULE_ALIAS("spi:eeprom-93xx46");
+MODULE_ALIAS("spi:93lc46b");
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 70eb5ed942d0..f12e909034ac 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -520,12 +520,13 @@ fastrpc_map_dma_buf(struct dma_buf_attachment *attachment,
 {
 	struct fastrpc_dma_buf_attachment *a = attachment->priv;
 	struct sg_table *table;
+	int ret;
 
 	table = &a->sgt;
 
-	if (!dma_map_sgtable(attachment->dev, table, dir, 0))
-		return ERR_PTR(-ENOMEM);
-
+	ret = dma_map_sgtable(attachment->dev, table, dir, 0);
+	if (ret)
+		table = ERR_PTR(ret);
 	return table;
 }
 
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
index eccd8c7dc62d..5d8b48288cf4 100644
--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -1,7 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
+
+include $(src)/common/mmu/Makefile
+habanalabs-y += $(HL_COMMON_MMU_FILES)
+
+include $(src)/common/pci/Makefile
+habanalabs-y += $(HL_COMMON_PCI_FILES)
+
 HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
 		common/asid.o common/habanalabs_ioctl.o \
 		common/command_buffer.o common/hw_queue.o common/irq.o \
 		common/sysfs.o common/hwmon.o common/memory.o \
-		common/command_submission.o common/mmu.o common/mmu_v1.o \
-		common/firmware_if.o common/pci.o
+		common/command_submission.o common/firmware_if.o
diff --git a/drivers/misc/habanalabs/common/asid.c b/drivers/misc/habanalabs/common/asid.c
index a2fdf31cf27c..ede04c032b6e 100644
--- a/drivers/misc/habanalabs/common/asid.c
+++ b/drivers/misc/habanalabs/common/asid.c
@@ -50,8 +50,10 @@ unsigned long hl_asid_alloc(struct hl_device *hdev)
 
 void hl_asid_free(struct hl_device *hdev, unsigned long asid)
 {
-	if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
-						"Invalid ASID %lu", asid))
+	if (asid == HL_KERNEL_ASID_ID || asid >= hdev->asic_prop.max_asid) {
+		dev_crit(hdev->dev, "Invalid ASID %lu", asid);
 		return;
+	}
+
 	clear_bit(asid, hdev->asid_bitmap);
 }
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 6f6a904ab6ca..d9adb9a5e4d8 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -635,10 +635,12 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 
 	cb_handle >>= PAGE_SHIFT;
 	cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
-	/* hl_cb_get should never fail here so use kernel WARN */
-	WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
-	if (!cb)
+	/* hl_cb_get should never fail here */
+	if (!cb) {
+		dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
+				(u32) cb_handle);
 		goto destroy_cb;
+	}
 
 	return cb;
 
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b2b3d2b0f808..7bd4a03b3429 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -48,8 +48,8 @@ void hl_sob_reset_error(struct kref *ref)
 	struct hl_device *hdev = hw_sob->hdev;
 
 	dev_crit(hdev->dev,
-			"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
-			hw_sob->q_idx, hw_sob->sob_id);
+		"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
+		hw_sob->q_idx, hw_sob->sob_id);
 }
 
 /**
@@ -149,9 +149,10 @@ void hl_fence_get(struct hl_fence *fence)
 		kref_get(&fence->refcount);
 }
 
-static void hl_fence_init(struct hl_fence *fence)
+static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 {
 	kref_init(&fence->refcount);
+	fence->cs_sequence = sequence;
 	fence->error = 0;
 	fence->timestamp = ktime_set(0, 0);
 	init_completion(&fence->completion);
@@ -184,6 +185,28 @@ static void cs_job_put(struct hl_cs_job *job)
 	kref_put(&job->refcount, cs_job_do_release);
 }
 
+bool cs_needs_completion(struct hl_cs *cs)
+{
+	/* In case this is a staged CS, only the last CS in sequence should
+	 * get a completion, any non staged CS will always get a completion
+	 */
+	if (cs->staged_cs && !cs->staged_last)
+		return false;
+
+	return true;
+}
+
+bool cs_needs_timeout(struct hl_cs *cs)
+{
+	/* In case this is a staged CS, only the first CS in sequence should
+	 * get a timeout, any non staged CS will always get a timeout
+	 */
+	if (cs->staged_cs && !cs->staged_first)
+		return false;
+
+	return true;
+}
+
 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 {
 	/*
@@ -225,6 +248,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 	parser.queue_type = job->queue_type;
 	parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
 	job->patched_cb = NULL;
+	parser.completion = cs_needs_completion(job->cs);
 
 	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 
@@ -290,13 +314,153 @@ static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 
 	hl_debugfs_remove_job(hdev, job);
 
-	if (job->queue_type == QUEUE_TYPE_EXT ||
-			job->queue_type == QUEUE_TYPE_HW)
+	/* We decrement reference only for a CS that gets completion
+	 * because the reference was incremented only for this kind of CS
+	 * right before it was scheduled.
+	 *
+	 * In staged submission, only the last CS marked as 'staged_last'
+	 * gets completion, hence its release function will be called from here.
+	 * As for all the rest CS's in the staged submission which do not get
+	 * completion, their CS reference will be decremented by the
+	 * 'staged_last' CS during the CS release flow.
+	 * All relevant PQ CI counters will be incremented during the CS release
+	 * flow by calling 'hl_hw_queue_update_ci'.
+	 */
+	if (cs_needs_completion(cs) &&
+		(job->queue_type == QUEUE_TYPE_EXT ||
+			job->queue_type == QUEUE_TYPE_HW))
 		cs_put(cs);
 
 	cs_job_put(job);
 }
 
+/*
+ * hl_staged_cs_find_first - locate the first CS in this staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs_seq: staged submission sequence number
+ *
+ * @note: This function must be called under 'hdev->cs_mirror_lock'
+ *
+ * Find and return a CS pointer with the given sequence
+ */
+struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
+{
+	struct hl_cs *cs;
+
+	list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
+		if (cs->staged_cs && cs->staged_first &&
+				cs->sequence == cs_seq)
+			return cs;
+
+	return NULL;
+}
+
+/*
+ * is_staged_cs_last_exists - returns true if the last CS in sequence exists
+ *
+ * @hdev: pointer to device structure
+ * @cs: staged submission member
+ *
+ */
+bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
+{
+	struct hl_cs *last_entry;
+
+	last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
+								staged_cs_node);
+
+	if (last_entry->staged_last)
+		return true;
+
+	return false;
+}
+
+/*
+ * staged_cs_get - get CS reference if this CS is a part of a staged CS
+ *
+ * @hdev: pointer to device structure
+ * @cs: current CS
+ * @cs_seq: staged submission sequence number
+ *
+ * Increment CS reference for every CS in this staged submission except for
+ * the CS which get completion.
+ */
+static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
+{
+	/* Only the last CS in this staged submission will get a completion.
+	 * We must increment the reference for all other CS's in this
+	 * staged submission.
+	 * Once we get a completion we will release the whole staged submission.
+	 */
+	if (!cs->staged_last)
+		cs_get(cs);
+}
+
+/*
+ * staged_cs_put - put a CS in case it is part of staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs: CS to put
+ *
+ * This function decrements a CS reference (for a non completion CS)
+ */
+static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
+{
+	/* We release all CS's in a staged submission except the last
+	 * CS which we have never incremented its reference.
+	 */
+	if (!cs_needs_completion(cs))
+		cs_put(cs);
+}
+
+static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
+{
+	bool next_entry_found = false;
+	struct hl_cs *next;
+
+	if (!cs_needs_timeout(cs))
+		return;
+
+	spin_lock(&hdev->cs_mirror_lock);
+
+	/* We need to handle tdr only once for the complete staged submission.
+	 * Hence, we choose the CS that reaches this function first which is
+	 * the CS marked as 'staged_last'.
+	 */
+	if (cs->staged_cs && cs->staged_last)
+		cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+
+	spin_unlock(&hdev->cs_mirror_lock);
+
+	/* Don't cancel TDR in case this CS was timedout because we might be
+	 * running from the TDR context
+	 */
+	if (cs && (cs->timedout ||
+			hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
+		return;
+
+	if (cs && cs->tdr_active)
+		cancel_delayed_work_sync(&cs->work_tdr);
+
+	spin_lock(&hdev->cs_mirror_lock);
+
+	/* queue TDR for next CS */
+	list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
+		if (cs_needs_timeout(next)) {
+			next_entry_found = true;
+			break;
+		}
+
+	if (next_entry_found && !next->tdr_active) {
+		next->tdr_active = true;
+		schedule_delayed_work(&next->work_tdr,
+					hdev->timeout_jiffies);
+	}
+
+	spin_unlock(&hdev->cs_mirror_lock);
+}
+
 static void cs_do_release(struct kref *ref)
 {
 	struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
@@ -346,36 +510,37 @@ static void cs_do_release(struct kref *ref)
 
 	hdev->asic_funcs->hw_queues_unlock(hdev);
 
-	/* Need to update CI for internal queues */
-	hl_int_hw_queue_update_ci(cs);
+	/* Need to update CI for all queue jobs that does not get completion */
+	hl_hw_queue_update_ci(cs);
 
 	/* remove CS from CS mirror list */
 	spin_lock(&hdev->cs_mirror_lock);
 	list_del_init(&cs->mirror_node);
 	spin_unlock(&hdev->cs_mirror_lock);
 
-	/* Don't cancel TDR in case this CS was timedout because we might be
-	 * running from the TDR context
-	 */
-	if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
-		struct hl_cs *next;
-
-		if (cs->tdr_active)
-			cancel_delayed_work_sync(&cs->work_tdr);
+	cs_handle_tdr(hdev, cs);
 
-		spin_lock(&hdev->cs_mirror_lock);
-
-		/* queue TDR for next CS */
-		next = list_first_entry_or_null(&hdev->cs_mirror_list,
-						struct hl_cs, mirror_node);
+	if (cs->staged_cs) {
+		/* the completion CS decrements reference for the entire
+		 * staged submission
+		 */
+		if (cs->staged_last) {
+			struct hl_cs *staged_cs, *tmp;
 
-		if (next && !next->tdr_active) {
-			next->tdr_active = true;
-			schedule_delayed_work(&next->work_tdr,
-						hdev->timeout_jiffies);
+			list_for_each_entry_safe(staged_cs, tmp,
+					&cs->staged_cs_node, staged_cs_node)
+				staged_cs_put(hdev, staged_cs);
 		}
 
-		spin_unlock(&hdev->cs_mirror_lock);
+		/* A staged CS will be a member in the list only after it
+		 * was submitted. We used 'cs_mirror_lock' when inserting
+		 * it to list so we will use it again when removing it
+		 */
+		if (cs->submitted) {
+			spin_lock(&hdev->cs_mirror_lock);
+			list_del(&cs->staged_cs_node);
+			spin_unlock(&hdev->cs_mirror_lock);
+		}
 	}
 
 out:
@@ -461,7 +626,8 @@ static void cs_timedout(struct work_struct *work)
 }
 
 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
-			enum hl_cs_type cs_type, struct hl_cs **cs_new)
+			enum hl_cs_type cs_type, u64 user_sequence,
+			struct hl_cs **cs_new)
 {
 	struct hl_cs_counters_atomic *cntr;
 	struct hl_fence *other = NULL;
@@ -478,6 +644,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 		return -ENOMEM;
 	}
 
+	/* increment refcnt for context */
+	hl_ctx_get(hdev, ctx);
+
 	cs->ctx = ctx;
 	cs->submitted = false;
 	cs->completed = false;
@@ -507,6 +676,18 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 				(hdev->asic_prop.max_pending_cs - 1)];
 
 	if (other && !completion_done(&other->completion)) {
+		/* If the following statement is true, it means we have reached
+		 * a point in which only part of the staged submission was
+		 * submitted and we don't have enough room in the 'cs_pending'
+		 * array for the rest of the submission.
+		 * This causes a deadlock because this CS will never be
+		 * completed as it depends on future CS's for completion.
+		 */
+		if (other->cs_sequence == user_sequence)
+			dev_crit_ratelimited(hdev->dev,
+				"Staged CS %llu deadlock due to lack of resources",
+				user_sequence);
+
 		dev_dbg_ratelimited(hdev->dev,
 			"Rejecting CS because of too many in-flights CS\n");
 		atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
@@ -525,7 +706,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	}
 
 	/* init hl_fence */
-	hl_fence_init(&cs_cmpl->base_fence);
+	hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 
 	cs->sequence = cs_cmpl->cs_seq;
 
@@ -549,6 +730,7 @@ free_fence:
 	kfree(cs_cmpl);
 free_cs:
 	kfree(cs);
+	hl_ctx_put(ctx);
 	return rc;
 }
 
@@ -556,6 +738,8 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 {
 	struct hl_cs_job *job, *tmp;
 
+	staged_cs_put(hdev, cs);
+
 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 		complete_job(hdev, job);
 }
@@ -565,7 +749,9 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 	int i;
 	struct hl_cs *cs, *tmp;
 
-	/* flush all completions */
+	/* flush all completions before iterating over the CS mirror list in
+	 * order to avoid a race with the release functions
+	 */
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		flush_workqueue(hdev->cq_wq[i]);
 
@@ -574,12 +760,24 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 		cs_get(cs);
 		cs->aborted = true;
 		dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
-					cs->ctx->asid, cs->sequence);
+				cs->ctx->asid, cs->sequence);
 		cs_rollback(hdev, cs);
 		cs_put(cs);
 	}
 }
 
+void hl_pending_cb_list_flush(struct hl_ctx *ctx)
+{
+	struct hl_pending_cb *pending_cb, *tmp;
+
+	list_for_each_entry_safe(pending_cb, tmp,
+			&ctx->pending_cb_list, cb_node) {
+		list_del(&pending_cb->cb_node);
+		hl_cb_put(pending_cb->cb);
+		kfree(pending_cb);
+	}
+}
+
 static void job_wq_completion(struct work_struct *work)
 {
 	struct hl_cs_job *job = container_of(work, struct hl_cs_job,
@@ -734,6 +932,12 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 		return -EBUSY;
 	}
 
+	if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+			!hdev->supports_staged_submission) {
+		dev_err(hdev->dev, "staged submission not supported");
+		return -EPERM;
+	}
+
 	cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
 
 	if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
@@ -805,10 +1009,38 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 	return 0;
 }
 
+static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
+				u64 sequence, u32 flags)
+{
+	if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
+		return 0;
+
+	cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
+	cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
+
+	if (cs->staged_first) {
+		/* Staged CS sequence is the first CS sequence */
+		INIT_LIST_HEAD(&cs->staged_cs_node);
+		cs->staged_sequence = cs->sequence;
+	} else {
+		/* User sequence will be validated in 'hl_hw_queue_schedule_cs'
+		 * under the cs_mirror_lock
+		 */
+		cs->staged_sequence = sequence;
+	}
+
+	/* Increment CS reference if needed */
+	staged_cs_get(hdev, cs);
+
+	cs->staged_cs = true;
+
+	return 0;
+}
+
 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-				u32 num_chunks, u64 *cs_seq, bool timestamp)
+				u32 num_chunks, u64 *cs_seq, u32 flags)
 {
-	bool int_queues_only = true;
+	bool staged_mid, int_queues_only = true;
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_chunk *cs_chunk_array;
 	struct hl_cs_counters_atomic *cntr;
@@ -816,9 +1048,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	struct hl_cs_job *job;
 	struct hl_cs *cs;
 	struct hl_cb *cb;
+	u64 user_sequence;
 	int rc, i;
 
 	cntr = &hdev->aggregated_cs_counters;
+	user_sequence = *cs_seq;
 	*cs_seq = ULLONG_MAX;
 
 	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
@@ -826,20 +1060,26 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	if (rc)
 		goto out;
 
-	/* increment refcnt for context */
-	hl_ctx_get(hdev, hpriv->ctx);
+	if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
+		staged_mid = true;
+	else
+		staged_mid = false;
 
-	rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
-	if (rc) {
-		hl_ctx_put(hpriv->ctx);
+	rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
+			staged_mid ? user_sequence : ULLONG_MAX, &cs);
+	if (rc)
 		goto free_cs_chunk_array;
-	}
 
-	cs->timestamp = !!timestamp;
+	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 	*cs_seq = cs->sequence;
 
 	hl_debugfs_add_cs(cs);
 
+	rc = cs_staged_submission(hdev, cs, user_sequence, flags);
+	if (rc)
+		goto free_cs_object;
+
 	/* Validate ALL the CS chunks before submitting the CS */
 	for (i = 0 ; i < num_chunks ; i++) {
 		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
@@ -899,8 +1139,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		 * Only increment for JOB on external or H/W queues, because
 		 * only for those JOBs we get completion
 		 */
-		if (job->queue_type == QUEUE_TYPE_EXT ||
-				job->queue_type == QUEUE_TYPE_HW)
+		if (cs_needs_completion(cs) &&
+			(job->queue_type == QUEUE_TYPE_EXT ||
+				job->queue_type == QUEUE_TYPE_HW))
 			cs_get(cs);
 
 		hl_debugfs_add_job(hdev, job);
@@ -916,11 +1157,14 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		}
 	}
 
-	if (int_queues_only) {
+	/* We allow a CS with any queue type combination as long as it does
+	 * not get a completion
+	 */
+	if (int_queues_only && cs_needs_completion(cs)) {
 		atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
 		atomic64_inc(&cntr->validation_drop_cnt);
 		dev_err(hdev->dev,
-			"Reject CS %d.%llu because only internal queues jobs are present\n",
+			"Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
 			cs->ctx->asid, cs->sequence);
 		rc = -EINVAL;
 		goto free_cs_object;
@@ -954,6 +1198,129 @@ out:
 	return rc;
 }
 
+static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
+		struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
+{
+	struct hw_queue_properties *hw_queue_prop;
+	struct hl_cs_counters_atomic *cntr;
+	struct hl_cs_job *job;
+
+	hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
+	cntr = &hdev->aggregated_cs_counters;
+
+	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
+	if (!job) {
+		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+		atomic64_inc(&cntr->out_of_mem_drop_cnt);
+		dev_err(hdev->dev, "Failed to allocate a new job\n");
+		return -ENOMEM;
+	}
+
+	job->id = 0;
+	job->cs = cs;
+	job->user_cb = cb;
+	atomic_inc(&job->user_cb->cs_cnt);
+	job->user_cb_size = size;
+	job->hw_queue_id = hw_queue_id;
+	job->patched_cb = job->user_cb;
+	job->job_cb_size = job->user_cb_size;
+
+	/* increment refcount as for external queues we get completion */
+	cs_get(cs);
+
+	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+	list_add_tail(&job->cs_node, &cs->job_list);
+
+	hl_debugfs_add_job(hdev, job);
+
+	return 0;
+}
+
+static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_ctx *ctx = hpriv->ctx;
+	struct hl_pending_cb *pending_cb, *tmp;
+	struct list_head local_cb_list;
+	struct hl_cs *cs;
+	struct hl_cb *cb;
+	u32 hw_queue_id;
+	u32 cb_size;
+	int process_list, rc = 0;
+
+	if (list_empty(&ctx->pending_cb_list))
+		return 0;
+
+	process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
+
+	/* Only a single thread is allowed to process the list */
+	if (!process_list)
+		return 0;
+
+	if (list_empty(&ctx->pending_cb_list))
+		goto free_pending_cb_token;
+
+	/* move all list elements to a local list */
+	INIT_LIST_HEAD(&local_cb_list);
+	spin_lock(&ctx->pending_cb_lock);
+	list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
+								cb_node)
+		list_move_tail(&pending_cb->cb_node, &local_cb_list);
+	spin_unlock(&ctx->pending_cb_lock);
+
+	rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
+	if (rc)
+		goto add_list_elements;
+
+	hl_debugfs_add_cs(cs);
+
+	/* Iterate through pending cb list, create jobs and add to CS */
+	list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
+		cb = pending_cb->cb;
+		cb_size = pending_cb->cb_size;
+		hw_queue_id = pending_cb->hw_queue_id;
+
+		rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
+								hw_queue_id);
+		if (rc)
+			goto free_cs_object;
+	}
+
+	rc = hl_hw_queue_schedule_cs(cs);
+	if (rc) {
+		if (rc != -EAGAIN)
+			dev_err(hdev->dev,
+				"Failed to submit CS %d.%llu (%d)\n",
+				ctx->asid, cs->sequence, rc);
+		goto free_cs_object;
+	}
+
+	/* pending cb was scheduled successfully */
+	list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
+		list_del(&pending_cb->cb_node);
+		kfree(pending_cb);
+	}
+
+	cs_put(cs);
+
+	goto free_pending_cb_token;
+
+free_cs_object:
+	cs_rollback(hdev, cs);
+	cs_put(cs);
+add_list_elements:
+	spin_lock(&ctx->pending_cb_lock);
+	list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
+								cb_node)
+		list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
+	spin_unlock(&ctx->pending_cb_lock);
+free_pending_cb_token:
+	atomic_set(&ctx->thread_pending_cb_token, 1);
+
+	return rc;
+}
+
 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 				u64 *cs_seq)
 {
@@ -1003,7 +1370,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 			rc = 0;
 		} else {
 			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
-						cs_seq, false);
+								cs_seq, 0);
 		}
 
 		mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1275,15 +1642,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		}
 	}
 
-	/* increment refcnt for context */
-	hl_ctx_get(hdev, ctx);
-
-	rc = allocate_cs(hdev, ctx, cs_type, &cs);
+	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
 	if (rc) {
 		if (cs_type == CS_TYPE_WAIT ||
 			cs_type == CS_TYPE_COLLECTIVE_WAIT)
 			hl_fence_put(sig_fence);
-		hl_ctx_put(ctx);
 		goto free_cs_chunk_array;
 	}
 
@@ -1346,7 +1709,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 	enum hl_cs_type cs_type;
 	u64 cs_seq = ULONG_MAX;
 	void __user *chunks;
-	u32 num_chunks;
+	u32 num_chunks, flags;
 	int rc;
 
 	rc = hl_cs_sanity_checks(hpriv, args);
@@ -1357,10 +1720,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 	if (rc)
 		goto out;
 
+	rc = hl_submit_pending_cb(hpriv);
+	if (rc)
+		goto out;
+
 	cs_type = hl_cs_get_cs_type(args->in.cs_flags &
 					~HL_CS_FLAGS_FORCE_RESTORE);
 	chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
 	num_chunks = args->in.num_chunks_execute;
+	flags = args->in.cs_flags;
+
+	/* In case this is a staged CS, user should supply the CS sequence */
+	if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
+		cs_seq = args->in.seq;
 
 	switch (cs_type) {
 	case CS_TYPE_SIGNAL:
@@ -1371,7 +1744,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		break;
 	default:
 		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
-				args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+							args->in.cs_flags);
 		break;
 	}
 
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index f65e6559149b..cda871afb8f4 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -12,9 +12,14 @@
 static void hl_ctx_fini(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
-	u64 idle_mask = 0;
+	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 	int i;
 
+	/* Release all allocated pending cb's, those cb's were never
+	 * scheduled so it is safe to release them here
+	 */
+	hl_pending_cb_list_flush(ctx);
+
 	/*
 	 * If we arrived here, there are no jobs waiting for this context
 	 * on its queues so we can safely remove it.
@@ -50,12 +55,15 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 
 		if ((!hdev->pldm) && (hdev->pdev) &&
 				(!hdev->asic_funcs->is_device_idle(hdev,
-							&idle_mask, NULL)))
+					idle_mask,
+					HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
 			dev_notice(hdev->dev,
-				"device not idle after user context is closed (0x%llx)\n",
-				idle_mask);
+					"device not idle after user context is closed (0x%llx, 0x%llx)\n",
+						idle_mask[0], idle_mask[1]);
 	} else {
 		dev_dbg(hdev->dev, "closing kernel context\n");
+		hdev->asic_funcs->ctx_fini(ctx);
+		hl_vm_ctx_fini(ctx);
 		hl_mmu_ctx_fini(ctx);
 	}
 }
@@ -140,8 +148,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	kref_init(&ctx->refcount);
 
 	ctx->cs_sequence = 1;
+	INIT_LIST_HEAD(&ctx->pending_cb_list);
+	spin_lock_init(&ctx->pending_cb_lock);
 	spin_lock_init(&ctx->cs_lock);
 	atomic_set(&ctx->thread_ctx_switch_token, 1);
+	atomic_set(&ctx->thread_pending_cb_token, 1);
 	ctx->thread_ctx_switch_wait_token = 0;
 	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
 				sizeof(struct hl_fence *),
@@ -151,11 +162,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 
 	if (is_kernel_ctx) {
 		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
-		rc = hl_mmu_ctx_init(ctx);
+		rc = hl_vm_ctx_init(ctx);
 		if (rc) {
-			dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+			dev_err(hdev->dev, "Failed to init mem ctx module\n");
+			rc = -ENOMEM;
 			goto err_free_cs_pending;
 		}
+
+		rc = hdev->asic_funcs->ctx_init(ctx);
+		if (rc) {
+			dev_err(hdev->dev, "ctx_init failed\n");
+			goto err_vm_ctx_fini;
+		}
 	} else {
 		ctx->asid = hl_asid_alloc(hdev);
 		if (!ctx->asid) {
@@ -194,7 +212,8 @@ err_cb_va_pool_fini:
 err_vm_ctx_fini:
 	hl_vm_ctx_fini(ctx);
 err_asid_free:
-	hl_asid_free(hdev, ctx->asid);
+	if (ctx->asid != HL_KERNEL_ASID_ID)
+		hl_asid_free(hdev, ctx->asid);
 err_free_cs_pending:
 	kfree(ctx->cs_pending);
 
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index cef716643979..df847a6d19f4 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -310,8 +310,8 @@ static int mmu_show(struct seq_file *s, void *data)
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 	struct hl_device *hdev = dev_entry->hdev;
 	struct hl_ctx *ctx;
-	struct hl_mmu_hop_info hops_info;
-	u64 virt_addr = dev_entry->mmu_addr;
+	struct hl_mmu_hop_info hops_info = {0};
+	u64 virt_addr = dev_entry->mmu_addr, phys_addr;
 	int i;
 
 	if (!hdev->mmu_enable)
@@ -333,8 +333,19 @@ static int mmu_show(struct seq_file *s, void *data)
 		return 0;
 	}
 
-	seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
-			dev_entry->mmu_asid, dev_entry->mmu_addr);
+	phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
+
+	if (hops_info.scrambled_vaddr &&
+		(dev_entry->mmu_addr != hops_info.scrambled_vaddr))
+		seq_printf(s,
+			"asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",
+			dev_entry->mmu_asid, dev_entry->mmu_addr,
+			hops_info.scrambled_vaddr,
+			hops_info.unscrambled_paddr, phys_addr);
+	else
+		seq_printf(s,
+			"asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",
+			dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);
 
 	for (i = 0 ; i < hops_info.used_hops ; i++) {
 		seq_printf(s, "hop%d_addr: 0x%llx\n",
@@ -403,7 +414,7 @@ static int engines_show(struct seq_file *s, void *data)
 		return 0;
 	}
 
-	hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+	hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
 
 	return 0;
 }
@@ -865,6 +876,17 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
 	return count;
 }
 
+static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	struct hl_device *hdev = entry->hdev;
+
+	hdev->asic_funcs->ack_protection_bits_errors(hdev);
+
+	return 0;
+}
+
 static const struct file_operations hl_data32b_fops = {
 	.owner = THIS_MODULE,
 	.read = hl_data_read32,
@@ -922,6 +944,11 @@ static const struct file_operations hl_stop_on_err_fops = {
 	.write = hl_stop_on_err_write
 };
 
+static const struct file_operations hl_security_violations_fops = {
+	.owner = THIS_MODULE,
+	.read = hl_security_violations_read
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
 	{"command_buffers", command_buffers_show, NULL},
 	{"command_submission", command_submission_show, NULL},
@@ -1071,6 +1098,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 				dev_entry,
 				&hl_stop_on_err_fops);
 
+	debugfs_create_file("dump_security_violations",
+				0644,
+				dev_entry->root,
+				dev_entry,
+				&hl_security_violations_fops);
+
 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 
 		ent = debugfs_create_file(hl_debugfs_list[i].name,
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 69d04eca767f..15fcb5c31c4b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -142,6 +142,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 	switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
 	case HL_MMAP_TYPE_CB:
 		return hl_cb_mmap(hpriv, vma);
+
+	case HL_MMAP_TYPE_BLOCK:
+		return hl_hw_block_mmap(hpriv, vma);
 	}
 
 	return -EINVAL;
@@ -373,7 +376,6 @@ static int device_early_init(struct hl_device *hdev)
 
 	mutex_init(&hdev->send_cpu_message_lock);
 	mutex_init(&hdev->debug_lock);
-	mutex_init(&hdev->mmu_cache_lock);
 	INIT_LIST_HEAD(&hdev->cs_mirror_list);
 	spin_lock_init(&hdev->cs_mirror_lock);
 	INIT_LIST_HEAD(&hdev->fpriv_list);
@@ -414,7 +416,6 @@ static void device_early_fini(struct hl_device *hdev)
 {
 	int i;
 
-	mutex_destroy(&hdev->mmu_cache_lock);
 	mutex_destroy(&hdev->debug_lock);
 	mutex_destroy(&hdev->send_cpu_message_lock);
 
@@ -1158,12 +1159,20 @@ kill_processes:
 	atomic_set(&hdev->in_reset, 0);
 	hdev->needs_reset = false;
 
-	if (hard_reset)
+	dev_notice(hdev->dev, "Successfully finished resetting the device\n");
+
+	if (hard_reset) {
 		hdev->hard_reset_cnt++;
-	else
-		hdev->soft_reset_cnt++;
 
-	dev_warn(hdev->dev, "Successfully finished resetting the device\n");
+		/* After reset is done, we are ready to receive events from
+		 * the F/W. We can't do it before because we will ignore events
+		 * and if those events are fatal, we won't know about it and
+		 * the device will be operational although it shouldn't be
+		 */
+		hdev->asic_funcs->enable_events_from_fw(hdev);
+	} else {
+		hdev->soft_reset_cnt++;
+	}
 
 	return 0;
 
@@ -1314,11 +1323,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
 	hdev->compute_ctx = NULL;
 
+	hl_debugfs_add_device(hdev);
+
+	/* debugfs nodes are created in hl_ctx_init so it must be called after
+	 * hl_debugfs_add_device.
+	 */
 	rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 	if (rc) {
 		dev_err(hdev->dev, "failed to initialize kernel context\n");
 		kfree(hdev->kernel_ctx);
-		goto mmu_fini;
+		goto remove_device_from_debugfs;
 	}
 
 	rc = hl_cb_pool_init(hdev);
@@ -1327,8 +1341,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 		goto release_ctx;
 	}
 
-	hl_debugfs_add_device(hdev);
-
 	/*
 	 * From this point, in case of an error, add char devices and create
 	 * sysfs nodes as part of the error flow, to allow debugging.
@@ -1411,12 +1423,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
 	hdev->init_done = true;
 
+	/* After initialization is done, we are ready to receive events from
+	 * the F/W. We can't do it before because we will ignore events and if
+	 * those events are fatal, we won't know about it and the device will
+	 * be operational although it shouldn't be
+	 */
+	hdev->asic_funcs->enable_events_from_fw(hdev);
+
 	return 0;
 
 release_ctx:
 	if (hl_ctx_put(hdev->kernel_ctx) != 1)
 		dev_err(hdev->dev,
 			"kernel ctx is still alive on initialization failure\n");
+remove_device_from_debugfs:
+	hl_debugfs_remove_device(hdev);
 mmu_fini:
 	hl_mmu_fini(hdev);
 eq_fini:
@@ -1482,7 +1503,8 @@ void hl_device_fini(struct hl_device *hdev)
 		usleep_range(50, 200);
 		rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 		if (ktime_compare(ktime_get(), timeout) > 0) {
-			WARN(1, "Failed to remove device because reset function did not finish\n");
+			dev_crit(hdev->dev,
+				"Failed to remove device because reset function did not finish\n");
 			return;
 		}
 	}
@@ -1515,8 +1537,6 @@ void hl_device_fini(struct hl_device *hdev)
 
 	device_late_fini(hdev);
 
-	hl_debugfs_remove_device(hdev);
-
 	/*
 	 * Halt the engines and disable interrupts so we won't get any more
 	 * completions from H/W and we won't have any accesses from the
@@ -1548,6 +1568,8 @@ void hl_device_fini(struct hl_device *hdev)
 	if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
 		dev_err(hdev->dev, "kernel ctx is still alive\n");
 
+	hl_debugfs_remove_device(hdev);
+
 	hl_vm_fini(hdev);
 
 	hl_mmu_fini(hdev);
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index c9a12980218a..09706c571e95 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 				u16 len, u32 timeout, u64 *result)
 {
+	struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
 	struct cpucp_packet *pkt;
 	dma_addr_t pkt_dma_addr;
-	u32 tmp;
+	u32 tmp, expected_ack_val;
 	int rc = 0;
 
 	pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
@@ -115,14 +116,23 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 		goto out;
 	}
 
+	/* set fence to a non valid value */
+	pkt->fence = UINT_MAX;
+
 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
 		goto out;
 	}
 
+	if (hdev->asic_prop.fw_app_security_map &
+			CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
+		expected_ack_val = queue->pi;
+	else
+		expected_ack_val = CPUCP_PACKET_FENCE_VAL;
+
 	rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
-				(tmp == CPUCP_PACKET_FENCE_VAL), 1000,
+				(tmp == expected_ack_val), 1000,
 				timeout, true);
 
 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
@@ -279,8 +289,74 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
 	return rc;
 }
 
+static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
+		u32 cpu_security_boot_status_reg)
+{
+	u32 err_val, security_val;
+
+	/* Some of the firmware status codes are deprecated in newer f/w
+	 * versions. In those versions, the errors are reported
+	 * in different registers. Therefore, we need to check those
+	 * registers and print the exact errors. Moreover, there
+	 * may be multiple errors, so we need to report on each error
+	 * separately. Some of the error codes might indicate a state
+	 * that is not an error per-se, but it is an error in production
+	 * environment
+	 */
+	err_val = RREG32(boot_err0_reg);
+	if (!(err_val & CPU_BOOT_ERR0_ENABLED))
+		return 0;
+
+	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+		dev_err(hdev->dev,
+			"Device boot error - DRAM initialization failed\n");
+	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+		dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
+	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+		dev_err(hdev->dev,
+			"Device boot error - Thermal Sensor initialization failed\n");
+	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+		dev_warn(hdev->dev,
+			"Device boot warning - Skipped DRAM initialization\n");
+
+	if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
+		if (hdev->bmc_enable)
+			dev_warn(hdev->dev,
+				"Device boot error - Skipped waiting for BMC\n");
+		else
+			err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
+	}
+
+	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+		dev_err(hdev->dev,
+			"Device boot error - Serdes data from BMC not available\n");
+	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+		dev_err(hdev->dev,
+			"Device boot error - NIC F/W initialization failed\n");
+	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+		dev_warn(hdev->dev,
+			"Device boot warning - security not ready\n");
+	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
+		dev_err(hdev->dev, "Device boot error - security failure\n");
+	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
+		dev_err(hdev->dev, "Device boot error - eFuse failure\n");
+	if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
+		dev_err(hdev->dev, "Device boot error - PLL failure\n");
+
+	security_val = RREG32(cpu_security_boot_status_reg);
+	if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
+		dev_dbg(hdev->dev, "Device security status %#x\n",
+				security_val);
+
+	if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+		return -EIO;
+
+	return 0;
+}
+
 int hl_fw_cpucp_info_get(struct hl_device *hdev,
-			u32 cpu_security_boot_status_reg)
+			u32 cpu_security_boot_status_reg,
+			u32 boot_err0_reg)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct cpucp_packet pkt = {};
@@ -314,6 +390,12 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
 		goto out;
 	}
 
+	rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+	if (rc) {
+		dev_err(hdev->dev, "Errors in device boot\n");
+		goto out;
+	}
+
 	memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
 			sizeof(prop->cpucp_info));
 
@@ -483,58 +565,6 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
 	return rc;
 }
 
-static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
-		u32 cpu_security_boot_status_reg)
-{
-	u32 err_val, security_val;
-
-	/* Some of the firmware status codes are deprecated in newer f/w
-	 * versions. In those versions, the errors are reported
-	 * in different registers. Therefore, we need to check those
-	 * registers and print the exact errors. Moreover, there
-	 * may be multiple errors, so we need to report on each error
-	 * separately. Some of the error codes might indicate a state
-	 * that is not an error per-se, but it is an error in production
-	 * environment
-	 */
-	err_val = RREG32(boot_err0_reg);
-	if (!(err_val & CPU_BOOT_ERR0_ENABLED))
-		return;
-
-	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
-		dev_err(hdev->dev,
-			"Device boot error - DRAM initialization failed\n");
-	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
-		dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
-	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
-		dev_err(hdev->dev,
-			"Device boot error - Thermal Sensor initialization failed\n");
-	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
-		dev_warn(hdev->dev,
-			"Device boot warning - Skipped DRAM initialization\n");
-	if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
-		dev_warn(hdev->dev,
-			"Device boot error - Skipped waiting for BMC\n");
-	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
-		dev_err(hdev->dev,
-			"Device boot error - Serdes data from BMC not available\n");
-	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
-		dev_err(hdev->dev,
-			"Device boot error - NIC F/W initialization failed\n");
-	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
-		dev_warn(hdev->dev,
-			"Device boot warning - security not ready\n");
-	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
-		dev_err(hdev->dev, "Device boot error - security failure\n");
-	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
-		dev_err(hdev->dev, "Device boot error - eFuse failure\n");
-
-	security_val = RREG32(cpu_security_boot_status_reg);
-	if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
-		dev_dbg(hdev->dev, "Device security status %#x\n",
-				security_val);
-}
-
 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 {
 	/* Some of the status codes below are deprecated in newer f/w
@@ -659,6 +689,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 		prop->fw_security_disabled = true;
 	}
 
+	dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
+			security_status);
+
 	dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
 			prop->hard_reset_done_by_fw ? "enabled" : "disabled");
 
@@ -753,6 +786,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 		if (prop->fw_boot_cpu_security_map &
 				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
 			prop->hard_reset_done_by_fw = true;
+
+		dev_dbg(hdev->dev,
+			"Firmware boot CPU security status %#x\n",
+			prop->fw_boot_cpu_security_map);
 	}
 
 	dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
@@ -826,6 +863,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 		goto out;
 	}
 
+	rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+	if (rc)
+		return rc;
+
 	/* Clear reset status since we need to read again from app */
 	prop->hard_reset_done_by_fw = false;
 
@@ -837,6 +878,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 		if (prop->fw_app_security_map &
 				CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
 			prop->hard_reset_done_by_fw = true;
+
+		dev_dbg(hdev->dev,
+			"Firmware application CPU security status %#x\n",
+			prop->fw_app_security_map);
 	}
 
 	dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
@@ -844,6 +889,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 
 	dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 
+	return 0;
+
 out:
 	fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 41af347157e0..d933878b24d1 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -28,17 +28,18 @@
 #define HL_NAME				"habanalabs"
 
 /* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:62] - Encode mmap type
+ * bits[63:61] - Encode mmap type
  * bits[45:0]  - mmap offset value
  *
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define HL_MMAP_TYPE_SHIFT		(62 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK		(0x3ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT		(61 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK		(0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_BLOCK		(0x4ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_CB			(0x2ull << HL_MMAP_TYPE_SHIFT)
 
-#define HL_MMAP_OFFSET_VALUE_MASK	(0x3FFFFFFFFFFFull >> PAGE_SHIFT)
+#define HL_MMAP_OFFSET_VALUE_MASK	(0x1FFFFFFFFFFFull >> PAGE_SHIFT)
 #define HL_MMAP_OFFSET_VALUE_GET(off)	(off & HL_MMAP_OFFSET_VALUE_MASK)
 
 #define HL_PENDING_RESET_PER_SEC	10
@@ -408,6 +409,9 @@ struct hl_mmu_properties {
  * @sync_stream_first_mon: first monitor available for sync stream use
  * @first_available_user_sob: first sob available for the user
  * @first_available_user_mon: first monitor available for the user
+ * @first_available_user_msix_interrupt: first available msix interrupt
+ *                                       reserved for the user
+ * @first_available_cq: first available CQ for the user.
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  * @fw_security_disabled: true if security measures are disabled in firmware,
@@ -416,6 +420,7 @@ struct hl_mmu_properties {
  *                            from BOOT_DEV_STS0
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
+ * @num_functional_hbms: number of functional HBMs in each DCORE.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -468,18 +473,22 @@ struct asic_fixed_properties {
 	u16				sync_stream_first_mon;
 	u16				first_available_user_sob[HL_MAX_DCORES];
 	u16				first_available_user_mon[HL_MAX_DCORES];
+	u16				first_available_user_msix_interrupt;
+	u16				first_available_cq[HL_MAX_DCORES];
 	u8				tpc_enabled_mask;
 	u8				completion_queues_count;
 	u8				fw_security_disabled;
 	u8				fw_security_status_valid;
 	u8				dram_supports_virtual_memory;
 	u8				hard_reset_done_by_fw;
+	u8				num_functional_hbms;
 };
 
 /**
  * struct hl_fence - software synchronization primitive
  * @completion: fence is implemented using completion
  * @refcount: refcount for this fence
+ * @cs_sequence: sequence of the corresponding command submission
  * @error: mark this fence with error
  * @timestamp: timestamp upon completion
  *
@@ -487,6 +496,7 @@ struct asic_fixed_properties {
 struct hl_fence {
 	struct completion	completion;
 	struct kref		refcount;
+	u64			cs_sequence;
 	int			error;
 	ktime_t			timestamp;
 };
@@ -846,6 +856,19 @@ enum div_select_defs {
  * @collective_wait_init_cs: Generate collective master/slave packets
  *                           and place them in the relevant cs jobs
  * @collective_wait_create_jobs: allocate collective wait cs jobs
+ * @scramble_addr: Routine to scramble the address prior of mapping it
+ *                 in the MMU.
+ * @descramble_addr: Routine to de-scramble the address prior of
+ *                   showing it to users.
+ * @ack_protection_bits_errors: ack and dump all security violations
+ * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
+ *                   also returns the size of the block if caller supplies
+ *                   a valid pointer for it
+ * @hw_block_mmap: mmap a HW block with a given id.
+ * @enable_events_from_fw: send interrupt to firmware to notify them the
+ *                         driver is ready to receive asynchronous events. This
+ *                         function should be called during the first init and
+ *                         after every hard-reset of the device
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -918,8 +941,8 @@ struct hl_asic_funcs {
 	void (*set_clock_gating)(struct hl_device *hdev);
 	void (*disable_clock_gating)(struct hl_device *hdev);
 	int (*debug_coresight)(struct hl_device *hdev, void *data);
-	bool (*is_device_idle)(struct hl_device *hdev, u64 *mask,
-				struct seq_file *s);
+	bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
+					u8 mask_len, struct seq_file *s);
 	int (*soft_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -955,6 +978,14 @@ struct hl_asic_funcs {
 	int (*collective_wait_create_jobs)(struct hl_device *hdev,
 			struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
 			u32 collective_engine_id);
+	u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
+	u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
+	void (*ack_protection_bits_errors)(struct hl_device *hdev);
+	int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
+				u32 *block_size, u32 *block_id);
+	int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+			u32 block_id, u32 block_size);
+	void (*enable_events_from_fw)(struct hl_device *hdev);
 };
 
 
@@ -1012,6 +1043,20 @@ struct hl_cs_counters_atomic {
 };
 
 /**
+ * struct hl_pending_cb - pending command buffer structure
+ * @cb_node: cb node in pending cb list
+ * @cb: command buffer to send in next submission
+ * @cb_size: command buffer size
+ * @hw_queue_id: destination queue id
+ */
+struct hl_pending_cb {
+	struct list_head	cb_node;
+	struct hl_cb		*cb;
+	u32			cb_size;
+	u32			hw_queue_id;
+};
+
+/**
  * struct hl_ctx - user/kernel context.
  * @mem_hash: holds mapping from virtual address to virtual memory area
  *		descriptor (hl_vm_phys_pg_list or hl_userptr).
@@ -1026,6 +1071,8 @@ struct hl_cs_counters_atomic {
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
  *            MMU hash or walking the PGT requires talking this lock.
  * @debugfs_list: node in debugfs list of contexts.
+ * pending_cb_list: list of pending command buffers waiting to be sent upon
+ *                  next user command submission context.
  * @cs_counters: context command submission counters.
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
@@ -1034,11 +1081,17 @@ struct hl_cs_counters_atomic {
  *			index to cs_pending array.
  * @dram_default_hops: array that holds all hops addresses needed for default
  *                     DRAM mapping.
+ * @pending_cb_lock: spinlock to protect pending cb list
  * @cs_lock: spinlock to protect cs_sequence.
  * @dram_phys_mem: amount of used physical DRAM memory by this context.
  * @thread_ctx_switch_token: token to prevent multiple threads of the same
  *				context	from running the context switch phase.
  *				Only a single thread should run it.
+ * @thread_pending_cb_token: token to prevent multiple threads from processing
+ *				the pending CB list. Only a single thread should
+ *				process the list since it is protected by a
+ *				spinlock and we don't want to halt the entire
+ *				command submission sequence.
  * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
  *				the context switch phase from moving to their
  *				execution phase before the context switch phase
@@ -1057,13 +1110,16 @@ struct hl_ctx {
 	struct mutex			mem_hash_lock;
 	struct mutex			mmu_lock;
 	struct list_head		debugfs_list;
+	struct list_head		pending_cb_list;
 	struct hl_cs_counters_atomic	cs_counters;
 	struct gen_pool			*cb_va_pool;
 	u64				cs_sequence;
 	u64				*dram_default_hops;
+	spinlock_t			pending_cb_lock;
 	spinlock_t			cs_lock;
 	atomic64_t			dram_phys_mem;
 	atomic_t			thread_ctx_switch_token;
+	atomic_t			thread_pending_cb_token;
 	u32				thread_ctx_switch_wait_token;
 	u32				asid;
 	u32				handle;
@@ -1124,8 +1180,11 @@ struct hl_userptr {
  * @finish_work: workqueue object to run when CS is completed by H/W.
  * @work_tdr: delayed work node for TDR.
  * @mirror_node : node in device mirror list of command submissions.
+ * @staged_cs_node: node in the staged cs list.
  * @debugfs_list: node in debugfs list of command submissions.
  * @sequence: the sequence number of this CS.
+ * @staged_sequence: the sequence of the staged submission this CS is part of,
+ *                   relevant only if staged_cs is set.
  * @type: CS_TYPE_*.
  * @submitted: true if CS was submitted to H/W.
  * @completed: true if CS was completed by device.
@@ -1133,7 +1192,11 @@ struct hl_userptr {
  * @tdr_active: true if TDR was activated for this CS (to prevent
  *		double TDR activation).
  * @aborted: true if CS was aborted due to some device error.
- * @timestamp: true if a timestmap must be captured upon completion
+ * @timestamp: true if a timestmap must be captured upon completion.
+ * @staged_last: true if this is the last staged CS and needs completion.
+ * @staged_first: true if this is the first staged CS and we need to receive
+ *                timeout for this CS.
+ * @staged_cs: true if this CS is part of a staged submission.
  */
 struct hl_cs {
 	u16			*jobs_in_queue_cnt;
@@ -1146,8 +1209,10 @@ struct hl_cs {
 	struct work_struct	finish_work;
 	struct delayed_work	work_tdr;
 	struct list_head	mirror_node;
+	struct list_head	staged_cs_node;
 	struct list_head	debugfs_list;
 	u64			sequence;
+	u64			staged_sequence;
 	enum hl_cs_type		type;
 	u8			submitted;
 	u8			completed;
@@ -1155,6 +1220,9 @@ struct hl_cs {
 	u8			tdr_active;
 	u8			aborted;
 	u8			timestamp;
+	u8			staged_last;
+	u8			staged_first;
+	u8			staged_cs;
 };
 
 /**
@@ -1225,6 +1293,7 @@ struct hl_cs_job {
  *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
  *                    have streams so the engine can't be busy by another
  *                    stream.
+ * @completion: true if we need completion for this CS.
  */
 struct hl_cs_parser {
 	struct hl_cb		*user_cb;
@@ -1239,6 +1308,7 @@ struct hl_cs_parser {
 	u8			job_id;
 	u8			is_kernel_allocated_cb;
 	u8			contains_dma_pkt;
+	u8			completion;
 };
 
 /*
@@ -1688,12 +1758,20 @@ struct hl_mmu_per_hop_info {
  * struct hl_mmu_hop_info - A structure describing the TLB hops and their
  * hop-entries that were created in order to translate a virtual address to a
  * physical one.
+ * @scrambled_vaddr: The value of the virtual address after scrambling. This
+ *                   address replaces the original virtual-address when mapped
+ *                   in the MMU tables.
+ * @unscrambled_paddr: The un-scrambled physical address.
  * @hop_info: Array holding the per-hop information used for the translation.
  * @used_hops: The number of hops used for the translation.
+ * @range_type: virtual address range type.
  */
 struct hl_mmu_hop_info {
+	u64 scrambled_vaddr;
+	u64 unscrambled_paddr;
 	struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
 	u32 used_hops;
+	enum hl_va_range_type range_type;
 };
 
 /**
@@ -1766,7 +1844,6 @@ struct hl_mmu_funcs {
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
  * @vm: virtual memory manager for MMU.
- * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
  * @hwmon_dev: H/W monitor device.
  * @pm_mng_profile: current power management profile.
  * @hl_chip_info: ASIC's sensors information.
@@ -1844,6 +1921,7 @@ struct hl_mmu_funcs {
  *                          user processes
  * @device_fini_pending: true if device_fini was called and might be
  *                       waiting for the reset thread to finish
+ * @supports_staged_submission: true if staged submissions are supported
  */
 struct hl_device {
 	struct pci_dev			*pdev;
@@ -1881,7 +1959,6 @@ struct hl_device {
 	const struct hl_asic_funcs	*asic_funcs;
 	void				*asic_specific;
 	struct hl_vm			vm;
-	struct mutex			mmu_cache_lock;
 	struct device			*hwmon_dev;
 	enum hl_pm_mng_profile		pm_mng_profile;
 	struct hwmon_chip_info		*hl_chip_info;
@@ -1950,6 +2027,7 @@ struct hl_device {
 	u8				needs_reset;
 	u8				process_kill_trial_cnt;
 	u8				device_fini_pending;
+	u8				supports_staged_submission;
 
 	/* Parameters for bring-up */
 	u64				nic_ports_mask;
@@ -2067,7 +2145,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 int hl_hw_queue_schedule_cs(struct hl_cs *cs);
 u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
 void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
-void hl_int_hw_queue_update_ci(struct hl_cs *cs);
+void hl_hw_queue_update_ci(struct hl_cs *cs);
 void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
 
 #define hl_queue_inc_ptr(p)		hl_hw_queue_add_ptr(p, 1)
@@ -2123,6 +2201,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 			bool map_cb, u64 *handle);
 int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
 int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
 			u32 handle);
 void hl_cb_put(struct hl_cb *cb);
@@ -2136,6 +2215,7 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
 void hl_cb_va_pool_fini(struct hl_ctx *ctx);
 
 void hl_cs_rollback_all(struct hl_device *hdev);
+void hl_pending_cb_list_flush(struct hl_ctx *ctx);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 		enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);
@@ -2143,6 +2223,10 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
 void hl_fence_put(struct hl_fence *fence);
 void hl_fence_get(struct hl_fence *fence);
 void cs_get(struct hl_cs *cs);
+bool cs_needs_completion(struct hl_cs *cs);
+bool cs_needs_timeout(struct hl_cs *cs);
+bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
+struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -2184,6 +2268,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
 int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 			struct hl_mmu_hop_info *hops);
+u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
+u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
 bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
 
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
@@ -2201,7 +2287,8 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 					void *vaddr);
 int hl_fw_send_heartbeat(struct hl_device *hdev);
 int hl_fw_cpucp_info_get(struct hl_device *hdev,
-			u32 cpu_security_boot_status_reg);
+			u32 cpu_security_boot_status_reg,
+			u32 boot_err0_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index d25892d61ec9..03af61cecd37 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -57,12 +57,23 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 
 	hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
 	hw_ip.sram_base_address = prop->sram_user_base_address;
-	hw_ip.dram_base_address = prop->dram_user_base_address;
+	hw_ip.dram_base_address =
+			hdev->mmu_enable && prop->dram_supports_virtual_memory ?
+			prop->dmmu.start_addr : prop->dram_user_base_address;
 	hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
 	hw_ip.sram_size = prop->sram_size - sram_kmd_size;
-	hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+
+	if (hdev->mmu_enable)
+		hw_ip.dram_size =
+			DIV_ROUND_DOWN_ULL(prop->dram_size - dram_kmd_size,
+						prop->dram_page_size) *
+							prop->dram_page_size;
+	else
+		hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+
 	if (hw_ip.dram_size > PAGE_SIZE)
 		hw_ip.dram_enabled = 1;
+	hw_ip.dram_page_size = prop->dram_page_size;
 	hw_ip.num_of_events = prop->num_of_events;
 
 	memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
@@ -79,6 +90,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 	hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
 	hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
 
+	hw_ip.first_available_interrupt_id =
+			prop->first_available_user_msix_interrupt;
 	return copy_to_user(out, &hw_ip,
 		min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
 }
@@ -132,9 +145,10 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
 		return -EINVAL;
 
 	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
-					&hw_idle.busy_engines_mask_ext, NULL);
+					hw_idle.busy_engines_mask_ext,
+					HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
 	hw_idle.busy_engines_mask =
-			lower_32_bits(hw_idle.busy_engines_mask_ext);
+			lower_32_bits(hw_idle.busy_engines_mask_ext[0]);
 
 	return copy_to_user(out, &hw_idle,
 		min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
@@ -383,7 +397,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 			prop->first_available_user_sob[args->dcore_id];
 	sm_info.first_available_monitor =
 			prop->first_available_user_mon[args->dcore_id];
-
+	sm_info.first_available_cq =
+			prop->first_available_cq[args->dcore_id];
 
 	return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
 			sizeof(sm_info))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 76217258780a..0f335182267f 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -38,7 +38,7 @@ static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
 		return (abs(delta) - queue_len);
 }
 
-void hl_int_hw_queue_update_ci(struct hl_cs *cs)
+void hl_hw_queue_update_ci(struct hl_cs *cs)
 {
 	struct hl_device *hdev = cs->ctx->hdev;
 	struct hl_hw_queue *q;
@@ -53,8 +53,13 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
 	if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
 		return;
 
+	/* We must increment CI for every queue that will never get a
+	 * completion, there are 2 scenarios this can happen:
+	 * 1. All queues of a non completion CS will never get a completion.
+	 * 2. Internal queues never gets completion.
+	 */
 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
-		if (q->queue_type == QUEUE_TYPE_INT)
+		if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
 			atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
 	}
 }
@@ -292,6 +297,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
 	len = job->job_cb_size;
 	ptr = cb->bus_address;
 
+	/* Skip completion flow in case this is a non completion CS */
+	if (!cs_needs_completion(job->cs))
+		goto submit_bd;
+
 	cq_pkt.data = cpu_to_le32(
 			((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
 				& CQ_ENTRY_SHADOW_INDEX_MASK) |
@@ -318,6 +327,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
 
 	cq->pi = hl_cq_inc_ptr(cq->pi);
 
+submit_bd:
 	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
@@ -525,6 +535,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 	struct hl_cs_job *job, *tmp;
 	struct hl_hw_queue *q;
 	int rc = 0, i, cq_cnt;
+	bool first_entry;
 	u32 max_queues;
 
 	cntr = &hdev->aggregated_cs_counters;
@@ -548,7 +559,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 			switch (q->queue_type) {
 			case QUEUE_TYPE_EXT:
 				rc = ext_queue_sanity_checks(hdev, q,
-						cs->jobs_in_queue_cnt[i], true);
+						cs->jobs_in_queue_cnt[i],
+						cs_needs_completion(cs) ?
+								true : false);
 				break;
 			case QUEUE_TYPE_INT:
 				rc = int_queue_sanity_checks(hdev, q,
@@ -583,12 +596,38 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 		hdev->asic_funcs->collective_wait_init_cs(cs);
 
 	spin_lock(&hdev->cs_mirror_lock);
+
+	/* Verify staged CS exists and add to the staged list */
+	if (cs->staged_cs && !cs->staged_first) {
+		struct hl_cs *staged_cs;
+
+		staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+		if (!staged_cs) {
+			dev_err(hdev->dev,
+				"Cannot find staged submission sequence %llu",
+				cs->staged_sequence);
+			rc = -EINVAL;
+			goto unlock_cs_mirror;
+		}
+
+		if (is_staged_cs_last_exists(hdev, staged_cs)) {
+			dev_err(hdev->dev,
+				"Staged submission sequence %llu already submitted",
+				cs->staged_sequence);
+			rc = -EINVAL;
+			goto unlock_cs_mirror;
+		}
+
+		list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
+	}
+
 	list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
 
 	/* Queue TDR if the CS is the first entry and if timeout is wanted */
+	first_entry = list_first_entry(&hdev->cs_mirror_list,
+					struct hl_cs, mirror_node) == cs;
 	if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
-			(list_first_entry(&hdev->cs_mirror_list,
-					struct hl_cs, mirror_node) == cs)) {
+				first_entry && cs_needs_timeout(cs)) {
 		cs->tdr_active = true;
 		schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
 
@@ -623,6 +662,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 
 	goto out;
 
+unlock_cs_mirror:
+	spin_unlock(&hdev->cs_mirror_lock);
 unroll_cq_resv:
 	q = &hdev->kernel_queues[0];
 	for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 245c0159159f..1f5910517b0e 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -14,6 +14,9 @@
 
 #define HL_MMU_DEBUG	0
 
+/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
+#define DRAM_POOL_PAGE_SIZE SZ_8M
+
 /*
  * The va ranges in context object contain a list with the available chunks of
  * device virtual memory.
@@ -38,15 +41,14 @@
  */
 
 /*
- * alloc_device_memory - allocate device memory
- *
- * @ctx                 : current context
- * @args                : host parameters containing the requested size
- * @ret_handle          : result handle
+ * alloc_device_memory() - allocate device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
+ * @ret_handle: result handle.
  *
  * This function does the following:
- * - Allocate the requested size rounded up to 'dram_page_size' pages
- * - Return unique handle
+ * - Allocate the requested size rounded up to 'dram_page_size' pages.
+ * - Return unique handle for later map/unmap/free.
  */
 static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 				u32 *ret_handle)
@@ -55,15 +57,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 	struct hl_vm *vm = &hdev->vm;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	u64 paddr = 0, total_size, num_pgs, i;
-	u32 num_curr_pgs, page_size, page_shift;
+	u32 num_curr_pgs, page_size;
 	int handle, rc;
 	bool contiguous;
 
 	num_curr_pgs = 0;
 	page_size = hdev->asic_prop.dram_page_size;
-	page_shift = __ffs(page_size);
-	num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
-	total_size = num_pgs << page_shift;
+	num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
+	total_size = num_pgs * page_size;
 
 	if (!total_size) {
 		dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
@@ -182,17 +183,17 @@ pages_pack_err:
 	return rc;
 }
 
-/*
- * dma_map_host_va - DMA mapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @p_userptr: pointer to result userptr structure
+/**
+ * dma_map_host_va() - DMA mapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @p_userptr: pointer to result userptr structure.
  *
  * This function does the following:
- * - Allocate userptr structure
- * - Pin the given host memory using the userptr structure
- * - Perform DMA mapping to have the DMA addresses of the pages
+ * - Allocate userptr structure.
+ * - Pin the given host memory using the userptr structure.
+ * - Perform DMA mapping to have the DMA addresses of the pages.
  */
 static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
 				struct hl_userptr **p_userptr)
@@ -236,14 +237,14 @@ userptr_err:
 	return rc;
 }
 
-/*
- * dma_unmap_host_va - DMA unmapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @userptr: userptr to free
+/**
+ * dma_unmap_host_va() - DMA unmapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @userptr: userptr to free.
  *
  * This function does the following:
- * - Unpins the physical pages
- * - Frees the userptr structure
+ * - Unpins the physical pages.
+ * - Frees the userptr structure.
  */
 static void dma_unmap_host_va(struct hl_device *hdev,
 				struct hl_userptr *userptr)
@@ -252,14 +253,13 @@ static void dma_unmap_host_va(struct hl_device *hdev,
 	kfree(userptr);
 }
 
-/*
- * dram_pg_pool_do_release - free DRAM pages pool
- *
- * @ref                 : pointer to reference object
+/**
+ * dram_pg_pool_do_release() - free DRAM pages pool
+ * @ref: pointer to reference object.
  *
  * This function does the following:
- * - Frees the idr structure of physical pages handles
- * - Frees the generic pool of DRAM physical pages
+ * - Frees the idr structure of physical pages handles.
+ * - Frees the generic pool of DRAM physical pages.
  */
 static void dram_pg_pool_do_release(struct kref *ref)
 {
@@ -274,15 +274,15 @@ static void dram_pg_pool_do_release(struct kref *ref)
 	gen_pool_destroy(vm->dram_pg_pool);
 }
 
-/*
- * free_phys_pg_pack - free physical page pack
- * @hdev: habanalabs device structure
- * @phys_pg_pack: physical page pack to free
+/**
+ * free_phys_pg_pack() - free physical page pack.
+ * @hdev: habanalabs device structure.
+ * @phys_pg_pack: physical page pack to free.
  *
  * This function does the following:
  * - For DRAM memory only, iterate over the pack and free each physical block
- *   structure by returning it to the general pool
- * - Free the hl_vm_phys_pg_pack structure
+ *   structure by returning it to the general pool.
+ * - Free the hl_vm_phys_pg_pack structure.
  */
 static void free_phys_pg_pack(struct hl_device *hdev,
 				struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -313,20 +313,20 @@ static void free_phys_pg_pack(struct hl_device *hdev,
 	kfree(phys_pg_pack);
 }
 
-/*
- * free_device_memory - free device memory
- *
- * @ctx                  : current context
- * @handle              : handle of the memory chunk to free
+/**
+ * free_device_memory() - free device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
  *
  * This function does the following:
- * - Free the device memory related to the given handle
+ * - Free the device memory related to the given handle.
  */
-static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct hl_vm *vm = &hdev->vm;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	u32 handle = args->free.handle;
 
 	spin_lock(&vm->idr_lock);
 	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
@@ -361,16 +361,15 @@ static int free_device_memory(struct hl_ctx *ctx, u32 handle)
 	return 0;
 }
 
-/*
- * clear_va_list_locked - free virtual addresses list
- *
- * @hdev                : habanalabs device structure
- * @va_list             : list of virtual addresses to free
+/**
+ * clear_va_list_locked() - free virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to free.
  *
  * This function does the following:
- * - Iterate over the list and free each virtual addresses block
+ * - Iterate over the list and free each virtual addresses block.
  *
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
  */
 static void clear_va_list_locked(struct hl_device *hdev,
 		struct list_head *va_list)
@@ -383,16 +382,15 @@ static void clear_va_list_locked(struct hl_device *hdev,
 	}
 }
 
-/*
- * print_va_list_locked    - print virtual addresses list
- *
- * @hdev                : habanalabs device structure
- * @va_list             : list of virtual addresses to print
+/**
+ * print_va_list_locked() - print virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to print.
  *
  * This function does the following:
- * - Iterate over the list and print each virtual addresses block
+ * - Iterate over the list and print each virtual addresses block.
  *
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
  */
 static void print_va_list_locked(struct hl_device *hdev,
 		struct list_head *va_list)
@@ -409,18 +407,17 @@ static void print_va_list_locked(struct hl_device *hdev,
 #endif
 }
 
-/*
- * merge_va_blocks_locked - merge a virtual block if possible
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @va_block            : virtual block to merge with adjacent blocks
+/**
+ * merge_va_blocks_locked() - merge a virtual block if possible.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @va_block: virtual block to merge with adjacent blocks.
  *
  * This function does the following:
  * - Merge the given blocks with the adjacent blocks if their virtual ranges
- *   create a contiguous virtual range
+ *   create a contiguous virtual range.
  *
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
  */
 static void merge_va_blocks_locked(struct hl_device *hdev,
 		struct list_head *va_list, struct hl_vm_va_block *va_block)
@@ -445,19 +442,18 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
 	}
 }
 
-/*
- * add_va_block_locked - add a virtual block to the virtual addresses list
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @start               : start virtual address
- * @end                 : end virtual address
+/**
+ * add_va_block_locked() - add a virtual block to the virtual addresses list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
  *
  * This function does the following:
- * - Add the given block to the virtual blocks list and merge with other
- * blocks if a contiguous virtual block can be created
+ * - Add the given block to the virtual blocks list and merge with other blocks
+ *   if a contiguous virtual block can be created.
  *
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
  */
 static int add_va_block_locked(struct hl_device *hdev,
 		struct list_head *va_list, u64 start, u64 end)
@@ -501,16 +497,15 @@ static int add_va_block_locked(struct hl_device *hdev,
 	return 0;
 }
 
-/*
- * add_va_block - wrapper for add_va_block_locked
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @start               : start virtual address
- * @end                 : end virtual address
+/**
+ * add_va_block() - wrapper for add_va_block_locked.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
  *
  * This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
  */
 static inline int add_va_block(struct hl_device *hdev,
 		struct hl_va_range *va_range, u64 start, u64 end)
@@ -524,8 +519,9 @@ static inline int add_va_block(struct hl_device *hdev,
 	return rc;
 }
 
-/*
+/**
  * get_va_block() - get a virtual block for the given size and alignment.
+ *
  * @hdev: pointer to the habanalabs device structure.
  * @va_range: pointer to the virtual addresses range.
  * @size: requested block size.
@@ -534,33 +530,51 @@ static inline int add_va_block(struct hl_device *hdev,
  *
  * This function does the following:
  * - Iterate on the virtual block list to find a suitable virtual block for the
- *   given size and alignment.
+ *   given size, hint address and alignment.
  * - Reserve the requested block and update the list.
  * - Return the start address of the virtual block.
  */
-static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
-			u64 size, u64 hint_addr, u32 va_block_align)
+static u64 get_va_block(struct hl_device *hdev,
+				struct hl_va_range *va_range,
+				u64 size, u64 hint_addr, u32 va_block_align)
 {
 	struct hl_vm_va_block *va_block, *new_va_block = NULL;
-	u64 valid_start, valid_size, prev_start, prev_end, align_mask,
-		res_valid_start = 0, res_valid_size = 0;
+	u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
+		align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
 	bool add_prev = false;
+	bool is_align_pow_2  = is_power_of_2(va_range->page_size);
+
+	if (is_align_pow_2)
+		align_mask = ~((u64)va_block_align - 1);
+	else
+		/*
+		 * with non-power-of-2 range we work only with page granularity
+		 * and the start address is page aligned,
+		 * so no need for alignment checking.
+		 */
+		size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
+							va_range->page_size;
 
-	align_mask = ~((u64)va_block_align - 1);
+	tmp_hint_addr = hint_addr;
 
-	/* check if hint_addr is aligned */
-	if (hint_addr & (va_block_align - 1))
+	/* Check if we need to ignore hint address */
+	if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
+			(!is_align_pow_2 &&
+				do_div(tmp_hint_addr, va_range->page_size))) {
+		dev_info(hdev->dev, "Hint address 0x%llx will be ignored\n",
+					hint_addr);
 		hint_addr = 0;
+	}
 
 	mutex_lock(&va_range->lock);
 
 	print_va_list_locked(hdev, &va_range->list);
 
 	list_for_each_entry(va_block, &va_range->list, node) {
-		/* calc the first possible aligned addr */
+		/* Calc the first possible aligned addr */
 		valid_start = va_block->start;
 
-		if (valid_start & (va_block_align - 1)) {
+		if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
 			valid_start &= align_mask;
 			valid_start += va_block_align;
 			if (valid_start > va_block->end)
@@ -568,35 +582,41 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
 		}
 
 		valid_size = va_block->end - valid_start;
+		if (valid_size < size)
+			continue;
 
-		if (valid_size >= size &&
-			(!new_va_block || valid_size < res_valid_size)) {
+		/* Pick the minimal length block which has the required size */
+		if (!new_va_block || (valid_size < reserved_valid_size)) {
 			new_va_block = va_block;
-			res_valid_start = valid_start;
-			res_valid_size = valid_size;
+			reserved_valid_start = valid_start;
+			reserved_valid_size = valid_size;
 		}
 
 		if (hint_addr && hint_addr >= valid_start &&
-				((hint_addr + size) <= va_block->end)) {
+					(hint_addr + size) <= va_block->end) {
 			new_va_block = va_block;
-			res_valid_start = hint_addr;
-			res_valid_size = valid_size;
+			reserved_valid_start = hint_addr;
+			reserved_valid_size = valid_size;
 			break;
 		}
 	}
 
 	if (!new_va_block) {
 		dev_err(hdev->dev, "no available va block for size %llu\n",
-				size);
+								size);
 		goto out;
 	}
 
-	if (res_valid_start > new_va_block->start) {
+	/*
+	 * Check if there is some leftover range due to reserving the new
+	 * va block, then return it to the main virtual addresses list.
+	 */
+	if (reserved_valid_start > new_va_block->start) {
 		prev_start = new_va_block->start;
-		prev_end = res_valid_start - 1;
+		prev_end = reserved_valid_start - 1;
 
-		new_va_block->start = res_valid_start;
-		new_va_block->size = res_valid_size;
+		new_va_block->start = reserved_valid_start;
+		new_va_block->size = reserved_valid_size;
 
 		add_prev = true;
 	}
@@ -617,7 +637,7 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
 out:
 	mutex_unlock(&va_range->lock);
 
-	return res_valid_start;
+	return reserved_valid_start;
 }
 
 /*
@@ -644,9 +664,9 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 
 /**
  * hl_get_va_range_type() - get va_range type for the given address and size.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @type: returned va_range type
+ * @address: the start address of the area we want to validate.
+ * @size: the size in bytes of the area we want to validate.
+ * @type: returned va_range type.
  *
  * Return: true if the area is inside a valid range, false otherwise.
  */
@@ -667,16 +687,15 @@ static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
 	return -EINVAL;
 }
 
-/*
- * hl_unreserve_va_block - wrapper for add_va_block for unreserving a va block
- *
+/**
+ * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
  * @hdev: pointer to the habanalabs device structure
- * @ctx: current context
- * @start: start virtual address
- * @end: end virtual address
+ * @ctx: pointer to the context structure.
+ * @start: start virtual address.
+ * @end: end virtual address.
  *
  * This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
  */
 int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 		u64 start_addr, u64 size)
@@ -701,11 +720,10 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 	return rc;
 }
 
-/*
- * get_sg_info - get number of pages and the DMA address from SG list
- *
- * @sg                 : the SG list
- * @dma_addr           : pointer to DMA address to return
+/**
+ * get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
  *
  * Calculate the number of consecutive pages described by the SG list. Take the
  * offset of the address in the first page, add to it the length and round it up
@@ -719,17 +737,17 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
 			(PAGE_SIZE - 1)) >> PAGE_SHIFT;
 }
 
-/*
- * init_phys_pg_pack_from_userptr - initialize physical page pack from host
- *                                  memory
- * @ctx: current context
- * @userptr: userptr to initialize from
- * @pphys_pg_pack: result pointer
+/**
+ * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
+ *                                    memory
+ * @ctx: pointer to the context structure.
+ * @userptr: userptr to initialize from.
+ * @pphys_pg_pack: result pointer.
  *
  * This function does the following:
- * - Pin the physical pages related to the given virtual block
+ * - Pin the physical pages related to the given virtual block.
  * - Create a physical page pack from the physical pages related to the given
- *   virtual block
+ *   virtual block.
  */
 static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 				struct hl_userptr *userptr,
@@ -821,16 +839,16 @@ page_pack_arr_mem_err:
 	return rc;
 }
 
-/*
- * map_phys_pg_pack - maps the physical page pack.
- * @ctx: current context
- * @vaddr: start address of the virtual area to map from
- * @phys_pg_pack: the pack of physical pages to map to
+/**
+ * map_phys_pg_pack() - maps the physical page pack..
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to map from.
+ * @phys_pg_pack: the pack of physical pages to map to.
  *
  * This function does the following:
- * - Maps each chunk of virtual memory to matching physical chunk
- * - Stores number of successful mappings in the given argument
- * - Returns 0 on success, error code otherwise
+ * - Maps each chunk of virtual memory to matching physical chunk.
+ * - Stores number of successful mappings in the given argument.
+ * - Returns 0 on success, error code otherwise.
  */
 static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 				struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -875,11 +893,11 @@ err:
 	return rc;
 }
 
-/*
- * unmap_phys_pg_pack - unmaps the physical page pack
- * @ctx: current context
- * @vaddr: start address of the virtual area to unmap
- * @phys_pg_pack: the pack of physical pages to unmap
+/**
+ * unmap_phys_pg_pack() - unmaps the physical page pack.
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to unmap.
+ * @phys_pg_pack: the pack of physical pages to unmap.
  */
 static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 				struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -913,7 +931,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 }
 
 static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
-				u64 *paddr)
+					u64 *paddr)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct hl_vm *vm = &hdev->vm;
@@ -936,19 +954,18 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
 	return 0;
 }
 
-/*
- * map_device_va - map the given memory
- *
- * @ctx	         : current context
- * @args         : host parameters with handle/host virtual address
- * @device_addr	 : pointer to result device virtual address
+/**
+ * map_device_va() - map the given memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with handle/host virtual address.
+ * @device_addr: pointer to result device virtual address.
  *
  * This function does the following:
  * - If given a physical device memory handle, map to a device virtual block
- *   and return the start address of this block
+ *   and return the start address of this block.
  * - If given a host virtual address and size, find the related physical pages,
  *   map a device virtual block to this pages and return the start address of
- *   this block
+ *   this block.
  */
 static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		u64 *device_addr)
@@ -1034,7 +1051,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 
 		hint_addr = args->map_device.hint_addr;
 
-		/* DRAM VA alignment is the same as the DRAM page size */
+		/* DRAM VA alignment is the same as the MMU page size */
 		va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
 		va_block_align = hdev->asic_prop.dmmu.page_size;
 	}
@@ -1125,24 +1142,26 @@ init_page_pack_err:
 	return rc;
 }
 
-/*
- * unmap_device_va      - unmap the given device virtual address
- *
- * @ctx                 : current context
- * @vaddr               : device virtual address to unmap
- * @ctx_free            : true if in context free flow, false otherwise.
+/**
+ * unmap_device_va() - unmap the given device virtual address.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with device virtual address to unmap.
+ * @ctx_free: true if in context free flow, false otherwise.
  *
  * This function does the following:
- * - Unmap the physical pages related to the given virtual address
- * - return the device virtual block to the virtual block list
+ * - unmap the physical pages related to the given virtual address.
+ * - return the device virtual block to the virtual block list.
  */
-static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
+static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+				bool ctx_free)
 {
 	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
 	struct hl_vm_hash_node *hnode = NULL;
 	struct hl_userptr *userptr = NULL;
 	struct hl_va_range *va_range;
+	u64 vaddr = args->unmap.device_virt_addr;
 	enum vm_type_t *vm_type;
 	bool is_userptr;
 	int rc = 0;
@@ -1201,7 +1220,13 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
 		goto mapping_cnt_err;
 	}
 
-	vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
+	if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
+		vaddr = prop->dram_base_address +
+			DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
+						phys_pg_pack->page_size) *
+							phys_pg_pack->page_size;
+	else
+		vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
 
 	mutex_lock(&ctx->mmu_lock);
 
@@ -1264,12 +1289,90 @@ vm_type_err:
 	return rc;
 }
 
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
+			u32 *size)
+{
+	u32 block_id = 0;
+	int rc;
+
+	rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
+
+	*handle = block_id | HL_MMAP_TYPE_BLOCK;
+	*handle <<= PAGE_SHIFT;
+
+	return rc;
+}
+
+static void hw_block_vm_close(struct vm_area_struct *vma)
+{
+	struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+
+	hl_ctx_put(ctx);
+	vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hw_block_vm_ops = {
+	.close = hw_block_vm_close
+};
+
+/**
+ * hl_hw_block_mmap() - mmap a hw block to user.
+ * @hpriv: pointer to the private data of the fd
+ * @vma: pointer to vm_area_struct of the process
+ *
+ * Driver increments context reference for every HW block mapped in order
+ * to prevent user from closing FD without unmapping first
+ */
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	u32 block_id, block_size;
+	int rc;
+
+	/* We use the page offset to hold the block id and thus we need to clear
+	 * it before doing the mmap itself
+	 */
+	block_id = vma->vm_pgoff;
+	vma->vm_pgoff = 0;
+
+	/* Driver only allows mapping of a complete HW block */
+	block_size = vma->vm_end - vma->vm_start;
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+	if (!access_ok(VERIFY_WRITE,
+		(void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#else
+	if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#endif
+		dev_err(hdev->dev,
+			"user pointer is invalid - 0x%lx\n",
+			vma->vm_start);
+
+		return -EINVAL;
+	}
+
+	vma->vm_ops = &hw_block_vm_ops;
+	vma->vm_private_data = hpriv->ctx;
+
+	hl_ctx_get(hdev, hpriv->ctx);
+
+	rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
+	if (rc) {
+		hl_ctx_put(hpriv->ctx);
+		return rc;
+	}
+
+	vma->vm_pgoff = block_id;
+
+	return 0;
+}
+
 static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_ctx *ctx = hpriv->ctx;
-	u64 device_addr = 0;
-	u32 handle = 0;
+	u64 block_handle, device_addr = 0;
+	u32 handle = 0, block_size;
 	int rc;
 
 	switch (args->in.op) {
@@ -1292,7 +1395,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 		break;
 
 	case HL_MEM_OP_FREE:
-		rc = free_device_memory(ctx, args->in.free.handle);
+		rc = free_device_memory(ctx, &args->in);
 		break;
 
 	case HL_MEM_OP_MAP:
@@ -1301,7 +1404,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 			rc = 0;
 		} else {
 			rc = get_paddr_from_handle(ctx, &args->in,
-					&device_addr);
+							&device_addr);
 		}
 
 		memset(args, 0, sizeof(*args));
@@ -1312,6 +1415,13 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 		rc = 0;
 		break;
 
+	case HL_MEM_OP_MAP_BLOCK:
+		rc = map_block(hdev, args->in.map_block.block_addr,
+				&block_handle, &block_size);
+		args->out.block_handle = block_handle;
+		args->out.block_size = block_size;
+		break;
+
 	default:
 		dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
 		rc = -ENOTTY;
@@ -1328,8 +1438,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 	union hl_mem_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_ctx *ctx = hpriv->ctx;
-	u64 device_addr = 0;
-	u32 handle = 0;
+	u64 block_handle, device_addr = 0;
+	u32 handle = 0, block_size;
 	int rc;
 
 	if (!hl_device_operational(hdev, &status)) {
@@ -1400,7 +1510,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 			goto out;
 		}
 
-		rc = free_device_memory(ctx, args->in.free.handle);
+		rc = free_device_memory(ctx, &args->in);
 		break;
 
 	case HL_MEM_OP_MAP:
@@ -1411,8 +1521,14 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 		break;
 
 	case HL_MEM_OP_UNMAP:
-		rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
-					false);
+		rc = unmap_device_va(ctx, &args->in, false);
+		break;
+
+	case HL_MEM_OP_MAP_BLOCK:
+		rc = map_block(hdev, args->in.map_block.block_addr,
+				&block_handle, &block_size);
+		args->out.block_handle = block_handle;
+		args->out.block_size = block_size;
 		break;
 
 	default:
@@ -1473,16 +1589,16 @@ destroy_pages:
 	return rc;
 }
 
-/*
- * hl_pin_host_memory - pins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @userptr: pointer to hl_userptr structure
+/**
+ * hl_pin_host_memory() - pins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @userptr: pointer to hl_userptr structure.
  *
  * This function does the following:
- * - Pins the physical pages
- * - Create an SG list from those pages
+ * - Pins the physical pages.
+ * - Create an SG list from those pages.
  */
 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 					struct hl_userptr *userptr)
@@ -1571,11 +1687,10 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 	kfree(userptr->sgt);
 }
 
-/*
- * hl_userptr_delete_list - clear userptr list
- *
- * @hdev                : pointer to the habanalabs device structure
- * @userptr_list        : pointer to the list to clear
+/**
+ * hl_userptr_delete_list() - clear userptr list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
  *
  * This function does the following:
  * - Iterates over the list and unpins the host memory and frees the userptr
@@ -1594,12 +1709,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
 	INIT_LIST_HEAD(userptr_list);
 }
 
-/*
- * hl_userptr_is_pinned - returns whether the given userptr is pinned
- *
- * @hdev                : pointer to the habanalabs device structure
- * @userptr_list        : pointer to the list to clear
- * @userptr             : pointer to userptr to check
+/**
+ * hl_userptr_is_pinned() - returns whether the given userptr is pinned.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
+ * @userptr: pointer to userptr to check.
  *
  * This function does the following:
  * - Iterates over the list and checks if the given userptr is in it, means is
@@ -1617,12 +1731,12 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
 	return false;
 }
 
-/*
- * va_range_init - initialize virtual addresses range
- * @hdev: pointer to the habanalabs device structure
- * @va_range: pointer to the range to initialize
- * @start: range start address
- * @end: range end address
+/**
+ * va_range_init() - initialize virtual addresses range.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_range: pointer to the range to initialize.
+ * @start: range start address.
+ * @end: range end address.
  *
  * This function does the following:
  * - Initializes the virtual addresses list of the given range with the given
@@ -1635,15 +1749,21 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
 
 	INIT_LIST_HEAD(&va_range->list);
 
-	/* PAGE_SIZE alignment */
+	/*
+	 * PAGE_SIZE alignment
+	 * it is the callers responsibility to align the addresses if the
+	 * page size is not a power of 2
+	 */
 
-	if (start & (PAGE_SIZE - 1)) {
-		start &= PAGE_MASK;
-		start += PAGE_SIZE;
-	}
+	if (is_power_of_2(page_size)) {
+		if (start & (PAGE_SIZE - 1)) {
+			start &= PAGE_MASK;
+			start += PAGE_SIZE;
+		}
 
-	if (end & (PAGE_SIZE - 1))
-		end &= PAGE_MASK;
+		if (end & (PAGE_SIZE - 1))
+			end &= PAGE_MASK;
+	}
 
 	if (start >= end) {
 		dev_err(hdev->dev, "too small vm range for va list\n");
@@ -1664,13 +1784,13 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
 	return 0;
 }
 
-/*
- * va_range_fini() - clear a virtual addresses range
- * @hdev: pointer to the habanalabs structure
- * va_range: pointer to virtual addresses range
+/**
+ * va_range_fini() - clear a virtual addresses range.
+ * @hdev: pointer to the habanalabs structure.
+ * va_range: pointer to virtual addresses rang.e
  *
  * This function does the following:
- * - Frees the virtual addresses block list and its lock
+ * - Frees the virtual addresses block list and its lock.
  */
 static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
 {
@@ -1682,22 +1802,22 @@ static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
 	kfree(va_range);
 }
 
-/*
- * vm_ctx_init_with_ranges() - initialize virtual memory for context
- * @ctx: pointer to the habanalabs context structure
+/**
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context.
+ * @ctx: pointer to the habanalabs context structure.
  * @host_range_start: host virtual addresses range start.
  * @host_range_end: host virtual addresses range end.
  * @host_huge_range_start: host virtual addresses range start for memory
- *                          allocated with huge pages.
+ *                         allocated with huge pages.
  * @host_huge_range_end: host virtual addresses range end for memory allocated
  *                        with huge pages.
  * @dram_range_start: dram virtual addresses range start.
  * @dram_range_end: dram virtual addresses range end.
  *
  * This function initializes the following:
- * - MMU for context
- * - Virtual address to area descriptor hashtable
- * - Virtual block list of available virtual memory
+ * - MMU for context.
+ * - Virtual address to area descriptor hashtable.
+ * - Virtual block list of available virtual memory.
  */
 static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
 					u64 host_range_start,
@@ -1818,7 +1938,8 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
 
 	dram_range_start = prop->dmmu.start_addr;
 	dram_range_end = prop->dmmu.end_addr;
-	dram_page_size = prop->dmmu.page_size;
+	dram_page_size = prop->dram_page_size ?
+				prop->dram_page_size : prop->dmmu.page_size;
 	host_range_start = prop->pmmu.start_addr;
 	host_range_end = prop->pmmu.end_addr;
 	host_page_size = prop->pmmu.page_size;
@@ -1832,15 +1953,14 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
 			dram_range_start, dram_range_end, dram_page_size);
 }
 
-/*
- * hl_vm_ctx_fini       - virtual memory teardown of context
- *
- * @ctx                 : pointer to the habanalabs context structure
+/**
+ * hl_vm_ctx_fini() - virtual memory teardown of context.
+ * @ctx: pointer to the habanalabs context structure.
  *
  * This function perform teardown the following:
- * - Virtual block list of available virtual memory
- * - Virtual address to area descriptor hashtable
- * - MMU for context
+ * - Virtual block list of available virtual memory.
+ * - Virtual address to area descriptor hashtable.
+ * - MMU for context.
  *
  * In addition this function does the following:
  * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
@@ -1859,9 +1979,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 	struct hl_vm_phys_pg_pack *phys_pg_list;
 	struct hl_vm_hash_node *hnode;
 	struct hlist_node *tmp_node;
+	struct hl_mem_in args;
 	int i;
 
-	if (!ctx->hdev->mmu_enable)
+	if (!hdev->mmu_enable)
 		return;
 
 	hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
@@ -1878,13 +1999,18 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 		dev_dbg(hdev->dev,
 			"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
 			hnode->vaddr, ctx->asid);
-		unmap_device_va(ctx, hnode->vaddr, true);
+		args.unmap.device_virt_addr = hnode->vaddr;
+		unmap_device_va(ctx, &args, true);
 	}
 
+	mutex_lock(&ctx->mmu_lock);
+
 	/* invalidate the cache once after the unmapping loop */
 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
 
+	mutex_unlock(&ctx->mmu_lock);
+
 	spin_lock(&vm->idr_lock);
 	idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
 		if (phys_pg_list->asid == ctx->asid) {
@@ -1911,19 +2037,19 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 	 * because the user notifies us on allocations. If the user is no more,
 	 * all DRAM is available
 	 */
-	if (!ctx->hdev->asic_prop.dram_supports_virtual_memory)
-		atomic64_set(&ctx->hdev->dram_used_mem, 0);
+	if (ctx->asid != HL_KERNEL_ASID_ID &&
+			!hdev->asic_prop.dram_supports_virtual_memory)
+		atomic64_set(&hdev->dram_used_mem, 0);
 }
 
-/*
- * hl_vm_init           - initialize virtual memory module
- *
- * @hdev                : pointer to the habanalabs device structure
+/**
+ * hl_vm_init() - initialize virtual memory module.
+ * @hdev: pointer to the habanalabs device structure.
  *
  * This function initializes the following:
- * - MMU module
- * - DRAM physical pages pool of 2MB
- * - Idr for device memory allocation handles
+ * - MMU module.
+ * - DRAM physical pages pool of 2MB.
+ * - Idr for device memory allocation handles.
  */
 int hl_vm_init(struct hl_device *hdev)
 {
@@ -1931,7 +2057,13 @@ int hl_vm_init(struct hl_device *hdev)
 	struct hl_vm *vm = &hdev->vm;
 	int rc;
 
-	vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+	if (is_power_of_2(prop->dram_page_size))
+		vm->dram_pg_pool =
+			gen_pool_create(__ffs(prop->dram_page_size), -1);
+	else
+		vm->dram_pg_pool =
+			gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
+
 	if (!vm->dram_pg_pool) {
 		dev_err(hdev->dev, "Failed to create dram page pool\n");
 		return -ENOMEM;
@@ -1964,15 +2096,14 @@ pool_add_err:
 	return rc;
 }
 
-/*
- * hl_vm_fini           - virtual memory module teardown
- *
- * @hdev                : pointer to the habanalabs device structure
+/**
+ * hl_vm_fini() - virtual memory module teardown.
+ * @hdev: pointer to the habanalabs device structure.
  *
  * This function perform teardown to the following:
- * - Idr for device memory allocation handles
- * - DRAM physical pages pool of 2MB
- * - MMU module
+ * - Idr for device memory allocation handles.
+ * - DRAM physical pages pool of 2MB.
+ * - MMU module.
  */
 void hl_vm_fini(struct hl_device *hdev)
 {
diff --git a/drivers/misc/habanalabs/common/mmu/Makefile b/drivers/misc/habanalabs/common/mmu/Makefile
new file mode 100644
index 000000000000..d852c3874658
--- /dev/null
+++ b/drivers/misc/habanalabs/common/mmu/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 28a4638741d8..71703a32350f 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -7,7 +7,7 @@
 
 #include <linux/slab.h>
 
-#include "habanalabs.h"
+#include "../habanalabs.h"
 
 bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
 {
@@ -166,7 +166,6 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 		mmu_prop = &prop->pmmu;
 
 	pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
-
 	/*
 	 * The H/W handles mapping of specific page sizes. Hence if the page
 	 * size is bigger, we break it to sub-pages and unmap them separately.
@@ -174,11 +173,21 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 	if ((page_size % mmu_prop->page_size) == 0) {
 		real_page_size = mmu_prop->page_size;
 	} else {
-		dev_err(hdev->dev,
-			"page size of %u is not %uKB aligned, can't unmap\n",
-			page_size, mmu_prop->page_size >> 10);
+		/*
+		 * MMU page size may differ from DRAM page size.
+		 * In such case work with the DRAM page size and let the MMU
+		 * scrambling routine to handle this mismatch when
+		 * calculating the address to remove from the MMU page table
+		 */
+		if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) {
+			real_page_size = prop->dram_page_size;
+		} else {
+			dev_err(hdev->dev,
+				"page size of %u is not %uKB aligned, can't unmap\n",
+				page_size, mmu_prop->page_size >> 10);
 
-		return -EFAULT;
+			return -EFAULT;
+		}
 	}
 
 	npages = page_size / real_page_size;
@@ -253,6 +262,17 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 	 */
 	if ((page_size % mmu_prop->page_size) == 0) {
 		real_page_size = mmu_prop->page_size;
+	} else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) &&
+			(prop->dram_page_size < mmu_prop->page_size)) {
+		/*
+		 * MMU page size may differ from DRAM page size.
+		 * In such case work with the DRAM page size and let the MMU
+		 * scrambling routine handle this mismatch when calculating
+		 * the address to place in the MMU page table. (in that case
+		 * also make sure that the dram_page_size smaller than the
+		 * mmu page size)
+		 */
+		real_page_size = prop->dram_page_size;
 	} else {
 		dev_err(hdev->dev,
 			"page size of %u is not %uKB aligned, can't map\n",
@@ -261,9 +281,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		return -EFAULT;
 	}
 
-	WARN_ONCE((phys_addr & (real_page_size - 1)),
-		"Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
-		phys_addr, real_page_size);
+	/*
+	 * Verify that the phys and virt addresses are aligned with the
+	 * MMU page size (in dram this means checking the address and MMU
+	 * after scrambling)
+	 */
+	if ((is_dram_addr &&
+			((hdev->asic_funcs->scramble_addr(hdev, phys_addr) &
+				(mmu_prop->page_size - 1)) ||
+			(hdev->asic_funcs->scramble_addr(hdev, virt_addr) &
+				(mmu_prop->page_size - 1)))) ||
+		(!is_dram_addr && ((phys_addr & (real_page_size - 1)) ||
+				(virt_addr & (real_page_size - 1)))))
+		dev_crit(hdev->dev,
+			"Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size",
+			phys_addr, virt_addr, real_page_size);
 
 	npages = page_size / real_page_size;
 	real_virt_addr = virt_addr;
@@ -444,19 +476,53 @@ void hl_mmu_swap_in(struct hl_ctx *ctx)
 		hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
 }
 
+static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
+						struct hl_mmu_hop_info *hops,
+						u64 *phys_addr)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr;
+	u32 hop0_shift_off;
+	void *p;
+
+	/* last hop holds the phys address and flags */
+	if (hops->unscrambled_paddr)
+		tmp_phys_addr = hops->unscrambled_paddr;
+	else
+		tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val;
+
+	if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE)
+		p = &prop->pmmu_huge;
+	else if (hops->range_type == HL_VA_RANGE_TYPE_HOST)
+		p = &prop->pmmu;
+	else /* HL_VA_RANGE_TYPE_DRAM */
+		p = &prop->dmmu;
+
+	/*
+	 * find the correct hop shift field in hl_mmu_properties structure
+	 * in order to determine the right maks for the page offset.
+	 */
+	hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
+	p = (char *)p + hop0_shift_off;
+	p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
+	hop_shift = *(u64 *)p;
+	offset_mask = (1ull << hop_shift) - 1;
+	addr_mask = ~(offset_mask);
+	*phys_addr = (tmp_phys_addr & addr_mask) |
+					(virt_addr & offset_mask);
+}
+
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
 {
 	struct hl_mmu_hop_info hops;
-	u64 tmp_addr;
 	int rc;
 
 	rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
 	if (rc)
 		return rc;
 
-	/* last hop holds the phys address and flags */
-	tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val;
-	*phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK);
+	hl_mmu_pa_page_with_offset(ctx, virt_addr, &hops,  phys_addr);
 
 	return 0;
 }
@@ -473,6 +539,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 	if (!hdev->mmu_enable)
 		return -EOPNOTSUPP;
 
+	hops->scrambled_vaddr = virt_addr;      /* assume no scrambling */
+
 	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
 						prop->dmmu.start_addr,
 						prop->dmmu.end_addr);
@@ -491,6 +559,11 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 
 	mutex_unlock(&ctx->mmu_lock);
 
+	/* add page offset to physical address */
+	if (hops->unscrambled_paddr)
+		hl_mmu_pa_page_with_offset(ctx, virt_addr, hops,
+					&hops->unscrambled_paddr);
+
 	return rc;
 }
 
@@ -512,3 +585,28 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 
 	return 0;
 }
+
+/**
+ * hl_mmu_scramble_addr() - The generic mmu address scrambling routine.
+ * @hdev: pointer to device data.
+ * @addr: The address to scramble.
+ *
+ * Return: The scrambled address.
+ */
+u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr)
+{
+	return addr;
+}
+
+/**
+ * hl_mmu_descramble_addr() - The generic mmu address descrambling
+ * routine.
+ * @hdev: pointer to device data.
+ * @addr: The address to descramble.
+ *
+ * Return: The un-scrambled address.
+ */
+u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr)
+{
+	return addr;
+}
diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index 06d8a44dd5d4..c5e93ff32586 100644
--- a/drivers/misc/habanalabs/common/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -5,8 +5,8 @@
  * All Rights Reserved.
  */
 
-#include "habanalabs.h"
-#include "../include/hw_ip/mmu/mmu_general.h"
+#include "../habanalabs.h"
+#include "../../include/hw_ip/mmu/mmu_general.h"
 
 #include <linux/slab.h>
 
diff --git a/drivers/misc/habanalabs/common/pci/Makefile b/drivers/misc/habanalabs/common/pci/Makefile
new file mode 100644
index 000000000000..dc922a686683
--- /dev/null
+++ b/drivers/misc/habanalabs/common/pci/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+HL_COMMON_PCI_FILES := common/pci/pci.o
diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci/pci.c
index b4725e6101f6..b799f9258fb0 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -5,8 +5,8 @@
  * All Rights Reserved.
  */
 
-#include "habanalabs.h"
-#include "../include/hw_ip/pci/pci_general.h"
+#include "../habanalabs.h"
+#include "../../include/hw_ip/pci/pci_general.h"
 
 #include <linux/pci.h>
 
@@ -308,40 +308,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
 }
 
 /**
- * hl_pci_set_dma_mask() - Set DMA masks for the device.
- * @hdev: Pointer to hl_device structure.
- *
- * This function sets the DMA masks (regular and consistent) for a specified
- * value. If it doesn't succeed, it tries to set it to a fall-back value
- *
- * Return: 0 on success, non-zero for failure.
- */
-static int hl_pci_set_dma_mask(struct hl_device *hdev)
-{
-	struct pci_dev *pdev = hdev->pdev;
-	int rc;
-
-	/* set DMA mask */
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-	if (rc) {
-		dev_err(hdev->dev,
-			"Failed to set pci dma mask to %d bits, error %d\n",
-			hdev->dma_mask, rc);
-		return rc;
-	}
-
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-	if (rc) {
-		dev_err(hdev->dev,
-			"Failed to set pci consistent dma mask to %d bits, error %d\n",
-			hdev->dma_mask, rc);
-		return rc;
-	}
-
-	return 0;
-}
-
-/**
  * hl_pci_init() - PCI initialization code.
  * @hdev: Pointer to hl_device structure.
  *
@@ -377,9 +343,14 @@ int hl_pci_init(struct hl_device *hdev)
 		goto unmap_pci_bars;
 	}
 
-	rc = hl_pci_set_dma_mask(hdev);
-	if (rc)
+	rc = dma_set_mask_and_coherent(&pdev->dev,
+					DMA_BIT_MASK(hdev->dma_mask));
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to set dma mask to %d bits, error %d\n",
+			hdev->dma_mask, rc);
 		goto unmap_pci_bars;
+	}
 
 	return 0;
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index b328ddaa64ee..9152242778f5 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -225,6 +225,12 @@ gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
 	"MSG AXI LBW returned with error"
 };
 
+enum gaudi_sm_sei_cause {
+	GAUDI_SM_SEI_SO_OVERFLOW,
+	GAUDI_SM_SEI_LBW_4B_UNALIGNED,
+	GAUDI_SM_SEI_AXI_RESPONSE_ERR
+};
+
 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
@@ -354,6 +360,10 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 					struct hl_cs_job *job);
 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 					u32 size, u64 val);
+static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
+					u32 num_regs, u32 val);
+static int gaudi_schedule_register_memset(struct hl_device *hdev,
+		u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
 				u32 tpc_id);
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -517,6 +527,11 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 			prop->sync_stream_first_mon +
 			(num_sync_stream_queues * HL_RSVD_MONS);
 
+	prop->first_available_user_msix_interrupt = USHRT_MAX;
+
+	for (i = 0 ; i < HL_MAX_DCORES ; i++)
+		prop->first_available_cq[i] = USHRT_MAX;
+
 	/* disable fw security for now, set it in a later stage */
 	prop->fw_security_disabled = true;
 	prop->fw_security_status_valid = false;
@@ -913,11 +928,17 @@ static void gaudi_sob_group_hw_reset(struct kref *ref)
 	struct gaudi_hw_sob_group *hw_sob_group =
 		container_of(ref, struct gaudi_hw_sob_group, kref);
 	struct hl_device *hdev = hw_sob_group->hdev;
-	int i;
+	u64 base_addr;
+	int rc;
 
-	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
-		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-				(hw_sob_group->base_sob_id + i) * 4, 0);
+	base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+			hw_sob_group->base_sob_id * 4;
+	rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
+			base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
+	if (rc)
+		dev_err(hdev->dev,
+			"failed resetting sob group - sob base %u, count %u",
+			hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
 
 	kref_init(&hw_sob_group->kref);
 }
@@ -1008,6 +1029,8 @@ static void gaudi_collective_master_init_job(struct hl_device *hdev,
 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
 	master_monitor = prop->collective_mstr_mon_id[0];
 
+	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
+
 	dev_dbg(hdev->dev,
 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
 		master_sob_base, cprop->mstr_sob_mask[0],
@@ -1248,7 +1271,7 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
 	u32 queue_id, collective_queue, num_jobs;
 	u32 stream, nic_queue, nic_idx = 0;
 	bool skip;
-	int i, rc;
+	int i, rc = 0;
 
 	/* Verify wait queue id is configured as master */
 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
@@ -1359,8 +1382,6 @@ static int gaudi_late_init(struct hl_device *hdev)
 		return rc;
 	}
 
-	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
-
 	rc = gaudi_fetch_psoc_frequency(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
@@ -1607,6 +1628,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
 
 	hdev->supports_sync_stream = true;
 	hdev->supports_coresight = true;
+	hdev->supports_staged_submission = true;
 
 	return 0;
 
@@ -3438,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
 		enable = !!(hdev->clock_gating_mask &
 				(BIT_ULL(gaudi_dma_assignment[i])));
 
+		/* GC sends work to DMA engine through Upper CP in DMA5 so
+		 * we need to not enable clock gating in that DMA
+		 */
+		if (i == GAUDI_HBM_DMA_4)
+			enable = 0;
+
 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
@@ -3704,6 +3732,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_eq *eq;
 	u32 status;
 	struct hl_hw_queue *cpu_pq =
@@ -3760,6 +3789,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 		return -EIO;
 	}
 
+	/* update FW application security bits */
+	if (prop->fw_security_status_valid)
+		prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
+
 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
 	return 0;
 }
@@ -4417,9 +4450,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 	/* ring the doorbell */
 	WREG32(db_reg_offset, db_value);
 
-	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
+	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
+		/* make sure device CPU will read latest data from host */
+		mb();
 		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
 				GAUDI_EVENT_PI_UPDATE);
+	}
 }
 
 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
@@ -4518,7 +4554,6 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct gaudi_device *gaudi = hdev->asic_specific;
-	u64 idle_mask = 0;
 	int rc = 0;
 	u64 val = 0;
 
@@ -4531,8 +4566,8 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
 				hdev,
 				mmDMA0_CORE_STS0/* dummy */,
 				val/* dummy */,
-				(hdev->asic_funcs->is_device_idle(hdev,
-						&idle_mask, NULL)),
+				(hdev->asic_funcs->is_device_idle(hdev, NULL,
+						0, NULL)),
 						1000,
 						HBM_SCRUBBING_TIMEOUT_US);
 		if (rc) {
@@ -5060,7 +5095,8 @@ static int gaudi_validate_cb(struct hl_device *hdev,
 	 * 1. A packet that will act as a completion packet
 	 * 2. A packet that will generate MSI-X interrupt
 	 */
-	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
+	if (parser->completion)
+		parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
 
 	return rc;
 }
@@ -5287,8 +5323,11 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
 	 * 1. A packet that will act as a completion packet
 	 * 2. A packet that will generate MSI interrupt
 	 */
-	parser->patched_cb_size = parser->user_cb_size +
-			sizeof(struct packet_msg_prot) * 2;
+	if (parser->completion)
+		parser->patched_cb_size = parser->user_cb_size +
+				sizeof(struct packet_msg_prot) * 2;
+	else
+		parser->patched_cb_size = parser->user_cb_size;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
 				parser->patched_cb_size, false, false,
@@ -5304,10 +5343,10 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
 	patched_cb_handle >>= PAGE_SHIFT;
 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
 				(u32) patched_cb_handle);
-	/* hl_cb_get should never fail here so use kernel WARN */
-	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-			(u32) patched_cb_handle);
+	/* hl_cb_get should never fail */
 	if (!parser->patched_cb) {
+		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+			(u32) patched_cb_handle);
 		rc = -EFAULT;
 		goto out;
 	}
@@ -5376,10 +5415,10 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
 	patched_cb_handle >>= PAGE_SHIFT;
 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
 				(u32) patched_cb_handle);
-	/* hl_cb_get should never fail here so use kernel WARN */
-	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-			(u32) patched_cb_handle);
+	/* hl_cb_get should never fail here */
 	if (!parser->patched_cb) {
+		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+				(u32) patched_cb_handle);
 		rc = -EFAULT;
 		goto out;
 	}
@@ -5579,31 +5618,206 @@ release_cb:
 	return rc;
 }
 
-static void gaudi_restore_sm_registers(struct hl_device *hdev)
+static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
+					u32 num_regs, u32 val)
+{
+	struct packet_msg_long *pkt;
+	struct hl_cs_job *job;
+	u32 cb_size, ctl;
+	struct hl_cb *cb;
+	int i, rc;
+
+	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
+
+	if (cb_size > SZ_2M) {
+		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
+		return -ENOMEM;
+	}
+
+	cb = hl_cb_kernel_create(hdev, cb_size, false);
+	if (!cb)
+		return -EFAULT;
+
+	pkt = cb->kernel_address;
+
+	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
+	for (i = 0; i < num_regs ; i++, pkt++) {
+		pkt->ctl = cpu_to_le32(ctl);
+		pkt->value = cpu_to_le32(val);
+		pkt->addr = cpu_to_le64(reg_base + (i * 4));
+	}
+
+	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+	if (!job) {
+		dev_err(hdev->dev, "Failed to allocate a new job\n");
+		rc = -ENOMEM;
+		goto release_cb;
+	}
+
+	job->id = 0;
+	job->user_cb = cb;
+	atomic_inc(&job->user_cb->cs_cnt);
+	job->user_cb_size = cb_size;
+	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
+	job->patched_cb = job->user_cb;
+	job->job_cb_size = cb_size;
+
+	hl_debugfs_add_job(hdev, job);
+
+	rc = gaudi_send_job_on_qman0(hdev, job);
+	hl_debugfs_remove_job(hdev, job);
+	kfree(job);
+	atomic_dec(&cb->cs_cnt);
+
+release_cb:
+	hl_cb_put(cb);
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+	return rc;
+}
+
+static int gaudi_schedule_register_memset(struct hl_device *hdev,
+		u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
 {
+	struct hl_ctx *ctx = hdev->compute_ctx;
+	struct hl_pending_cb *pending_cb;
+	struct packet_msg_long *pkt;
+	u32 cb_size, ctl;
+	struct hl_cb *cb;
 	int i;
 
-	for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
-		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
-		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
-		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
+	/* If no compute context available or context is going down
+	 * memset registers directly
+	 */
+	if (!ctx || kref_read(&ctx->refcount) == 0)
+		return gaudi_memset_registers(hdev, reg_base, num_regs, val);
+
+	cb_size = (sizeof(*pkt) * num_regs) +
+			sizeof(struct packet_msg_prot) * 2;
+
+	if (cb_size > SZ_2M) {
+		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
+		return -ENOMEM;
+	}
+
+	pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
+	if (!pending_cb)
+		return -ENOMEM;
+
+	cb = hl_cb_kernel_create(hdev, cb_size, false);
+	if (!cb) {
+		kfree(pending_cb);
+		return -EFAULT;
 	}
 
-	for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
-		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
-		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
-		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
+	pkt = cb->kernel_address;
+
+	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
+	for (i = 0; i < num_regs ; i++, pkt++) {
+		pkt->ctl = cpu_to_le32(ctl);
+		pkt->value = cpu_to_le32(val);
+		pkt->addr = cpu_to_le64(reg_base + (i * 4));
 	}
 
-	i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
 
-	for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
-		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
+	pending_cb->cb = cb;
+	pending_cb->cb_size = cb_size;
+	/* The queue ID MUST be an external queue ID. Otherwise, we will
+	 * have undefined behavior
+	 */
+	pending_cb->hw_queue_id = hw_queue_id;
 
-	i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
+	spin_lock(&ctx->pending_cb_lock);
+	list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
+	spin_unlock(&ctx->pending_cb_lock);
 
-	for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
-		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
+	return 0;
+}
+
+static int gaudi_restore_sm_registers(struct hl_device *hdev)
+{
+	u64 base_addr;
+	u32 num_regs;
+	int rc;
+
+	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
+	num_regs = NUM_OF_SOB_IN_BLOCK;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
+	num_regs = NUM_OF_SOB_IN_BLOCK;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
+	num_regs = NUM_OF_SOB_IN_BLOCK;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
+	num_regs = NUM_OF_MONITORS_IN_BLOCK;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
+	num_regs = NUM_OF_MONITORS_IN_BLOCK;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
+	num_regs = NUM_OF_MONITORS_IN_BLOCK;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
+	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
+			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
+	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
+	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+	if (rc) {
+		dev_err(hdev->dev, "failed resetting SM registers");
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 static void gaudi_restore_dma_registers(struct hl_device *hdev)
@@ -5660,18 +5874,23 @@ static void gaudi_restore_qm_registers(struct hl_device *hdev)
 	}
 }
 
-static void gaudi_restore_user_registers(struct hl_device *hdev)
+static int gaudi_restore_user_registers(struct hl_device *hdev)
 {
-	gaudi_restore_sm_registers(hdev);
+	int rc;
+
+	rc = gaudi_restore_sm_registers(hdev);
+	if (rc)
+		return rc;
+
 	gaudi_restore_dma_registers(hdev);
 	gaudi_restore_qm_registers(hdev);
+
+	return 0;
 }
 
 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
 {
-	gaudi_restore_user_registers(hdev);
-
-	return 0;
+	return gaudi_restore_user_registers(hdev);
 }
 
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
@@ -5730,8 +5949,6 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
 		}
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
 	} else {
 		rc = -EFAULT;
 	}
@@ -5777,8 +5994,6 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
 		}
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
 	} else {
 		rc = -EFAULT;
 	}
@@ -5828,8 +6043,6 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
 		}
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
 	} else {
 		rc = -EFAULT;
 	}
@@ -5878,8 +6091,6 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
 		}
 		if (hbm_bar_addr == U64_MAX)
 			rc = -EIO;
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
 	} else {
 		rc = -EFAULT;
 	}
@@ -5924,7 +6135,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
 		return;
 
 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
-		WARN(1, "asid %u is too big\n", asid);
+		dev_crit(hdev->dev, "asid %u is too big\n", asid);
 		return;
 	}
 
@@ -6227,7 +6438,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 	else
 		timeout = HL_DEVICE_TIMEOUT_USEC;
 
-	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
+	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
 		dev_err_ratelimited(hdev->dev,
 			"Can't send driver job on QMAN0 because the device is not idle\n");
 		return -EBUSY;
@@ -6658,6 +6869,34 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
 	}
 }
 
+static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
+		struct hl_eq_sm_sei_data *sei_data)
+{
+	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
+
+	switch (sei_data->sei_cause) {
+	case SM_SEI_SO_OVERFLOW:
+		dev_err(hdev->dev,
+			"SM %u SEI Error: SO %u overflow/underflow",
+			index, le32_to_cpu(sei_data->sei_log));
+		break;
+	case SM_SEI_LBW_4B_UNALIGNED:
+		dev_err(hdev->dev,
+			"SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
+			index, le32_to_cpu(sei_data->sei_log));
+		break;
+	case SM_SEI_AXI_RESPONSE_ERR:
+		dev_err(hdev->dev,
+			"SM %u SEI Error: AXI ID %u response error",
+			index, le32_to_cpu(sei_data->sei_log));
+		break;
+	default:
+		dev_err(hdev->dev, "Unknown SM SEI cause %u",
+				le32_to_cpu(sei_data->sei_log));
+		break;
+	}
+}
+
 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		struct hl_eq_ecc_data *ecc_data)
 {
@@ -6874,7 +7113,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
 	int err = 0;
 
-	if (!hdev->asic_prop.fw_security_disabled) {
+	if (hdev->asic_prop.fw_security_status_valid &&
+			(hdev->asic_prop.fw_app_security_map &
+				CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
 		if (!hbm_ecc_data) {
 			dev_err(hdev->dev, "No FW ECC data");
 			return 0;
@@ -6896,14 +7137,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
 
 		dev_err(hdev->dev,
-			"HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
-			device, ch, type, wr_par, rd_par, ca_par, serr, derr);
+			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
+			device, ch, wr_par, rd_par, ca_par, serr, derr);
+		dev_err(hdev->dev,
+			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
+			device, ch, hbm_ecc_data->first_addr, type,
+			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
+			hbm_ecc_data->dec_cnt);
 
 		err = 1;
 
 		return 0;
 	}
 
+	if (!hdev->asic_prop.fw_security_disabled) {
+		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
+		return 0;
+	}
+
 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
@@ -7153,6 +7404,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 		gaudi_hbm_read_interrupts(hdev,
 				gaudi_hbm_event_to_dev(event_type),
 				&eq_entry->hbm_ecc_data);
+		hl_fw_unmask_irq(hdev, event_type);
 		break;
 
 	case GAUDI_EVENT_TPC0_DEC:
@@ -7281,6 +7533,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 		hl_fw_unmask_irq(hdev, event_type);
 		break;
 
+	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
+		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_sm_sei_info(hdev, event_type,
+					&eq_entry->sm_sei_data);
+		hl_fw_unmask_irq(hdev, event_type);
+		break;
+
 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
 		gaudi_print_clk_change_info(hdev, event_type);
 		hl_fw_unmask_irq(hdev, event_type);
@@ -7330,8 +7589,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
 	else
 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
 
-	mutex_lock(&hdev->mmu_cache_lock);
-
 	/* L0 & L1 invalidation */
 	WREG32(mmSTLB_INV_PS, 3);
 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
@@ -7347,8 +7604,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
 
 	WREG32(mmSTLB_INV_SET, 0);
 
-	mutex_unlock(&hdev->mmu_cache_lock);
-
 	if (rc) {
 		dev_err_ratelimited(hdev->dev,
 					"MMU cache invalidation timeout\n");
@@ -7371,8 +7626,6 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
 		hdev->hard_reset_pending)
 		return 0;
 
-	mutex_lock(&hdev->mmu_cache_lock);
-
 	if (hdev->pldm)
 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
 	else
@@ -7400,8 +7653,6 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
 		1000,
 		timeout_usec);
 
-	mutex_unlock(&hdev->mmu_cache_lock);
-
 	if (rc) {
 		dev_err_ratelimited(hdev->dev,
 					"MMU cache invalidation timeout\n");
@@ -7463,7 +7714,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
+	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
 	if (rc)
 		return rc;
 
@@ -7483,13 +7734,14 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 	return 0;
 }
 
-static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
-					struct seq_file *s)
+static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
+					u8 mask_len, struct seq_file *s)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
+	unsigned long *mask = (unsigned long *)mask_arr;
 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
 	bool is_idle = true, is_eng_idle, is_slave;
 	u64 offset;
@@ -7515,9 +7767,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
 				IS_DMA_IDLE(dma_core_sts0);
 		is_idle &= is_eng_idle;
 
-		if (mask)
-			*mask |= ((u64) !is_eng_idle) <<
-					(GAUDI_ENGINE_ID_DMA_0 + dma_id);
+		if (mask && !is_eng_idle)
+			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
 		if (s)
 			seq_printf(s, fmt, dma_id,
 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
@@ -7538,9 +7789,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
 				IS_TPC_IDLE(tpc_cfg_sts);
 		is_idle &= is_eng_idle;
 
-		if (mask)
-			*mask |= ((u64) !is_eng_idle) <<
-						(GAUDI_ENGINE_ID_TPC_0 + i);
+		if (mask && !is_eng_idle)
+			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
 		if (s)
 			seq_printf(s, fmt, i,
 				is_eng_idle ? "Y" : "N",
@@ -7567,9 +7817,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
 
 		is_idle &= is_eng_idle;
 
-		if (mask)
-			*mask |= ((u64) !is_eng_idle) <<
-						(GAUDI_ENGINE_ID_MME_0 + i);
+		if (mask && !is_eng_idle)
+			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
 		if (s) {
 			if (!is_slave)
 				seq_printf(s, fmt, i,
@@ -7595,9 +7844,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
 			is_idle &= is_eng_idle;
 
-			if (mask)
-				*mask |= ((u64) !is_eng_idle) <<
-						(GAUDI_ENGINE_ID_NIC_0 + port);
+			if (mask && !is_eng_idle)
+				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
 			if (s)
 				seq_printf(s, nic_fmt, port,
 						is_eng_idle ? "Y" : "N",
@@ -7611,9 +7859,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
 			is_idle &= is_eng_idle;
 
-			if (mask)
-				*mask |= ((u64) !is_eng_idle) <<
-						(GAUDI_ENGINE_ID_NIC_0 + port);
+			if (mask && !is_eng_idle)
+				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
 			if (s)
 				seq_printf(s, nic_fmt, port,
 						is_eng_idle ? "Y" : "N",
@@ -7876,18 +8123,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 
 static int gaudi_ctx_init(struct hl_ctx *ctx)
 {
+	if (ctx->asid == HL_KERNEL_ASID_ID)
+		return 0;
+
 	gaudi_mmu_prepare(ctx->hdev, ctx->asid);
 	return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
 }
 
 static void gaudi_ctx_fini(struct hl_ctx *ctx)
 {
-	struct hl_device *hdev = ctx->hdev;
-
-	/* Gaudi will NEVER support more then a single compute context.
-	 * Therefore, don't clear anything unless it is the compute context
-	 */
-	if (hdev->compute_ctx != ctx)
+	if (ctx->asid == HL_KERNEL_ASID_ID)
 		return;
 
 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
@@ -7928,10 +8173,10 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 
 	pkt->value = cpu_to_le32(value);
 	pkt->ctl = cpu_to_le32(ctl);
@@ -7948,10 +8193,10 @@ static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
 
 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
 
 	pkt->value = cpu_to_le32(value);
 	pkt->ctl = cpu_to_le32(ctl);
@@ -7997,10 +8242,10 @@ static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 
 	pkt->value = cpu_to_le32(value);
 	pkt->ctl = cpu_to_le32(ctl);
@@ -8018,10 +8263,10 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
 
-	ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
+	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 
 	pkt->cfg = cpu_to_le32(cfg);
 	pkt->ctl = cpu_to_le32(ctl);
@@ -8217,12 +8462,16 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
 {
 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
+	int rc;
 
 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
 		hw_sob->sob_id);
 
-	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
-		0);
+	rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
+			CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+			hw_sob->sob_id * 4, 1, 0);
+	if (rc)
+		dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
 
 	kref_init(&hw_sob->kref);
 }
@@ -8246,6 +8495,24 @@ static u64 gaudi_get_device_time(struct hl_device *hdev)
 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
 }
 
+static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
+				u32 *block_size, u32 *block_id)
+{
+	return -EPERM;
+}
+
+static int gaudi_block_mmap(struct hl_device *hdev,
+				struct vm_area_struct *vma,
+				u32 block_id, u32 block_size)
+{
+	return -EPERM;
+}
+
+static void gaudi_enable_events_from_fw(struct hl_device *hdev)
+{
+	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
 	.early_init = gaudi_early_init,
 	.early_fini = gaudi_early_fini,
@@ -8322,7 +8589,13 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
 	.get_device_time = gaudi_get_device_time,
 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
-	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs
+	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
+	.scramble_addr = hl_mmu_scramble_addr,
+	.descramble_addr = hl_mmu_descramble_addr,
+	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
+	.get_hw_block_id = gaudi_get_hw_block_id,
+	.hw_block_mmap = gaudi_block_mmap,
+	.enable_events_from_fw = gaudi_enable_events_from_fw
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index a7ab2d7e57d4..50bb4ad570fd 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -251,11 +251,13 @@ enum gaudi_nic_mask {
  * @hdev: habanalabs device structure.
  * @kref: refcount of this SOB group. group will reset once refcount is zero.
  * @base_sob_id: base sob id of this SOB group.
+ * @queue_id: id of the queue that waits on this sob group
  */
 struct gaudi_hw_sob_group {
 	struct hl_device	*hdev;
 	struct kref		kref;
 	u32			base_sob_id;
+	u32			queue_id;
 };
 
 #define NUM_SOB_GROUPS (HL_RSVD_SOBS * QMAN_STREAMS)
@@ -333,6 +335,7 @@ struct gaudi_device {
 };
 
 void gaudi_init_security(struct hl_device *hdev);
+void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
 void gaudi_add_device_attr(struct hl_device *hdev,
 			struct attribute_group *dev_attr_grp);
 void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index 88a09d42e111..6e56fa1c6c69 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -634,9 +634,21 @@ static int gaudi_config_etr(struct hl_device *hdev,
 		WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
 		WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
 		WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-		/* Workaround for H3 #HW-2075 bug: use small data chunks */
-		WREG32(mmPSOC_ETR_AXICTL, (is_host ? 0 : 0x700) |
-					PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+		if (hdev->asic_prop.fw_security_disabled) {
+			/* make ETR not privileged */
+			val = FIELD_PREP(
+					PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
+			/* make ETR non-secured (inverted logic) */
+			val |= FIELD_PREP(
+					PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
+			/*
+			 * Workaround for H3 #HW-2075 bug: use small data
+			 * chunks
+			 */
+			val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK,
+							is_host ? 0 : 7);
+			WREG32(mmPSOC_ETR_AXICTL, val);
+		}
 		WREG32(mmPSOC_ETR_DBALO,
 				lower_32_bits(input->buffer_address));
 		WREG32(mmPSOC_ETR_DBAHI,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index e10181692d0b..7085f45814ae 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -13052,3 +13052,8 @@ void gaudi_init_security(struct hl_device *hdev)
 
 	gaudi_init_protection_bits(hdev);
 }
+
+void gaudi_ack_protection_bits_errors(struct hl_device *hdev)
+{
+
+}
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 63679a747d2c..ed566c52ccaa 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -455,6 +455,11 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 
 	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 
+	prop->first_available_user_msix_interrupt = USHRT_MAX;
+
+	for (i = 0 ; i < HL_MAX_DCORES ; i++)
+		prop->first_available_cq[i] = USHRT_MAX;
+
 	/* disable fw security for now, set it in a later stage */
 	prop->fw_security_disabled = true;
 	prop->fw_security_status_valid = false;
@@ -792,9 +797,6 @@ int goya_late_init(struct hl_device *hdev)
 		return rc;
 	}
 
-	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
-			GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
-
 	return 0;
 }
 
@@ -1186,6 +1188,7 @@ static int goya_stop_external_queues(struct hl_device *hdev)
 int goya_init_cpu_queues(struct hl_device *hdev)
 {
 	struct goya_device *goya = hdev->asic_specific;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_eq *eq;
 	u32 status;
 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
@@ -1238,6 +1241,10 @@ int goya_init_cpu_queues(struct hl_device *hdev)
 		return -EIO;
 	}
 
+	/* update FW application security bits */
+	if (prop->fw_security_status_valid)
+		prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
+
 	goya->hw_cap_initialized |= HW_CAP_CPU_Q;
 	return 0;
 }
@@ -2804,9 +2811,12 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 	/* ring the doorbell */
 	WREG32(db_reg_offset, db_value);
 
-	if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
+	if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
+		/* make sure device CPU will read latest data from host */
+		mb();
 		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
 				GOYA_ASYNC_EVENT_ID_PI_UPDATE);
+	}
 }
 
 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
@@ -2914,7 +2924,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
 	else
 		timeout = HL_DEVICE_TIMEOUT_USEC;
 
-	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
+	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
 		dev_err_ratelimited(hdev->dev,
 			"Can't send driver job on QMAN0 because the device is not idle\n");
 		return -EBUSY;
@@ -3876,10 +3886,10 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
 	patched_cb_handle >>= PAGE_SHIFT;
 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
 				(u32) patched_cb_handle);
-	/* hl_cb_get should never fail here so use kernel WARN */
-	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-			(u32) patched_cb_handle);
+	/* hl_cb_get should never fail here */
 	if (!parser->patched_cb) {
+		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+			(u32) patched_cb_handle);
 		rc = -EFAULT;
 		goto out;
 	}
@@ -3948,10 +3958,10 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
 	patched_cb_handle >>= PAGE_SHIFT;
 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
 				(u32) patched_cb_handle);
-	/* hl_cb_get should never fail here so use kernel WARN */
-	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-			(u32) patched_cb_handle);
+	/* hl_cb_get should never fail here */
 	if (!parser->patched_cb) {
+		dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+			(u32) patched_cb_handle);
 		rc = -EFAULT;
 		goto out;
 	}
@@ -4122,9 +4132,6 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
 		if (ddr_bar_addr == U64_MAX)
 			rc = -EIO;
 
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
 	} else {
 		rc = -EFAULT;
 	}
@@ -4178,9 +4185,6 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
 		if (ddr_bar_addr == U64_MAX)
 			rc = -EIO;
 
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
 	} else {
 		rc = -EFAULT;
 	}
@@ -4223,9 +4227,6 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
 		if (ddr_bar_addr == U64_MAX)
 			rc = -EIO;
 
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
 	} else {
 		rc = -EFAULT;
 	}
@@ -4266,9 +4267,6 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
 		if (ddr_bar_addr == U64_MAX)
 			rc = -EIO;
 
-	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
 	} else {
 		rc = -EFAULT;
 	}
@@ -4877,8 +4875,6 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
 
 	WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
 
-	goya_mmu_prepare(hdev, asid);
-
 	goya_clear_sm_regs(hdev);
 
 	return 0;
@@ -5044,7 +5040,7 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
 		return;
 
 	if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
-		WARN(1, "asid %u is too big\n", asid);
+		dev_crit(hdev->dev, "asid %u is too big\n", asid);
 		return;
 	}
 
@@ -5073,8 +5069,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
 	else
 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
 
-	mutex_lock(&hdev->mmu_cache_lock);
-
 	/* L0 & L1 invalidation */
 	WREG32(mmSTLB_INV_ALL_START, 1);
 
@@ -5086,8 +5080,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
 		1000,
 		timeout_usec);
 
-	mutex_unlock(&hdev->mmu_cache_lock);
-
 	if (rc) {
 		dev_err_ratelimited(hdev->dev,
 					"MMU cache invalidation timeout\n");
@@ -5117,8 +5109,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
 	else
 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
 
-	mutex_lock(&hdev->mmu_cache_lock);
-
 	/*
 	 * TODO: currently invalidate entire L0 & L1 as in regular hard
 	 * invalidation. Need to apply invalidation of specific cache lines with
@@ -5141,8 +5131,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
 		1000,
 		timeout_usec);
 
-	mutex_unlock(&hdev->mmu_cache_lock);
-
 	if (rc) {
 		dev_err_ratelimited(hdev->dev,
 					"MMU cache invalidation timeout\n");
@@ -5172,7 +5160,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
+	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
 	if (rc)
 		return rc;
 
@@ -5207,11 +5195,12 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
 	/* clock gating not supported in Goya */
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
-				struct seq_file *s)
+static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
+					u8 mask_len, struct seq_file *s)
 {
 	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
 	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
+	unsigned long *mask = (unsigned long *)mask_arr;
 	u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
 		mme_arch_sts;
 	bool is_idle = true, is_eng_idle;
@@ -5231,9 +5220,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
 				IS_DMA_IDLE(dma_core_sts0);
 		is_idle &= is_eng_idle;
 
-		if (mask)
-			*mask |= ((u64) !is_eng_idle) <<
-						(GOYA_ENGINE_ID_DMA_0 + i);
+		if (mask && !is_eng_idle)
+			set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
 		if (s)
 			seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
 					qm_glbl_sts0, dma_core_sts0);
@@ -5255,9 +5243,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
 				IS_TPC_IDLE(tpc_cfg_sts);
 		is_idle &= is_eng_idle;
 
-		if (mask)
-			*mask |= ((u64) !is_eng_idle) <<
-						(GOYA_ENGINE_ID_TPC_0 + i);
+		if (mask && !is_eng_idle)
+			set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
 		if (s)
 			seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
 				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
@@ -5276,8 +5263,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
 			IS_MME_IDLE(mme_arch_sts);
 	is_idle &= is_eng_idle;
 
-	if (mask)
-		*mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
+	if (mask && !is_eng_idle)
+		set_bit(GOYA_ENGINE_ID_MME_0, mask);
 	if (s) {
 		seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
 				cmdq_glbl_sts0, mme_arch_sts);
@@ -5321,6 +5308,9 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
 
 static int goya_ctx_init(struct hl_ctx *ctx)
 {
+	if (ctx->asid != HL_KERNEL_ASID_ID)
+		goya_mmu_prepare(ctx->hdev, ctx->asid);
+
 	return 0;
 }
 
@@ -5399,6 +5389,24 @@ static void goya_ctx_fini(struct hl_ctx *ctx)
 
 }
 
+static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
+			u32 *block_size, u32 *block_id)
+{
+	return -EPERM;
+}
+
+static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+				u32 block_id, u32 block_size)
+{
+	return -EPERM;
+}
+
+static void goya_enable_events_from_fw(struct hl_device *hdev)
+{
+	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
+			GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5475,7 +5483,13 @@ static const struct hl_asic_funcs goya_funcs = {
 	.set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
 	.get_device_time = goya_get_device_time,
 	.collective_wait_init_cs = goya_collective_wait_init_cs,
-	.collective_wait_create_jobs = goya_collective_wait_create_jobs
+	.collective_wait_create_jobs = goya_collective_wait_create_jobs,
+	.scramble_addr = hl_mmu_scramble_addr,
+	.descramble_addr = hl_mmu_descramble_addr,
+	.ack_protection_bits_errors = goya_ack_protection_bits_errors,
+	.get_hw_block_id = goya_get_hw_block_id,
+	.hw_block_mmap = goya_block_mmap,
+	.enable_events_from_fw = goya_enable_events_from_fw
 };
 
 /*
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 8b3408211af6..23fe099ed218 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -173,6 +173,7 @@ void goya_init_mme_qmans(struct hl_device *hdev);
 void goya_init_tpc_qmans(struct hl_device *hdev);
 int goya_init_cpu_queues(struct hl_device *hdev);
 void goya_init_security(struct hl_device *hdev);
+void goya_ack_protection_bits_errors(struct hl_device *hdev);
 int goya_late_init(struct hl_device *hdev);
 void goya_late_fini(struct hl_device *hdev);
 
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index 6fa03933b438..6b7445cca580 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -434,8 +434,15 @@ static int goya_config_etr(struct hl_device *hdev,
 		WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
 		WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
 		WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-		WREG32(mmPSOC_ETR_AXICTL,
-				0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+		if (hdev->asic_prop.fw_security_disabled) {
+			/* make ETR not privileged */
+			val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
+			/* make ETR non-secured (inverted logic) */
+			val |= FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
+			/* burst size 8 */
+			val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK, 7);
+			WREG32(mmPSOC_ETR_AXICTL, val);
+		}
 		WREG32(mmPSOC_ETR_DBALO,
 				lower_32_bits(input->buffer_address));
 		WREG32(mmPSOC_ETR_DBAHI,
diff --git a/drivers/misc/habanalabs/goya/goya_security.c b/drivers/misc/habanalabs/goya/goya_security.c
index 14701836f92b..14c3bae3ccdc 100644
--- a/drivers/misc/habanalabs/goya/goya_security.c
+++ b/drivers/misc/habanalabs/goya/goya_security.c
@@ -3120,3 +3120,8 @@ void goya_init_security(struct hl_device *hdev)
 
 	goya_init_protection_bits(hdev);
 }
+
+void goya_ack_protection_bits_errors(struct hl_device *hdev)
+{
+
+}
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 00bd9b392f93..b77c1c16c32c 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -58,11 +58,25 @@ struct hl_eq_ecc_data {
 	__u8 pad[7];
 };
 
+enum hl_sm_sei_cause {
+	SM_SEI_SO_OVERFLOW,
+	SM_SEI_LBW_4B_UNALIGNED,
+	SM_SEI_AXI_RESPONSE_ERR
+};
+
+struct hl_eq_sm_sei_data {
+	__le32 sei_log;
+	/* enum hl_sm_sei_cause */
+	__u8 sei_cause;
+	__u8 pad[3];
+};
+
 struct hl_eq_entry {
 	struct hl_eq_header hdr;
 	union {
 		struct hl_eq_ecc_data ecc_data;
 		struct hl_eq_hbm_ecc_data hbm_ecc_data;
+		struct hl_eq_sm_sei_data sm_sei_data;
 		__le64 data[7];
 	};
 };
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index b637dfd69f6e..e87f5a98e193 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -70,6 +70,9 @@
  *					checksum. Trying to program image again
  *					might solve this.
  *
+ * CPU_BOOT_ERR0_PLL_FAIL		PLL settings failed, meaning that one
+ *					of the PLLs remains in REF_CLK
+ *
  * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
  *					This is a main indication that the
  *					running FW populates the error
@@ -88,6 +91,7 @@
 #define CPU_BOOT_ERR0_EFUSE_FAIL		(1 << 9)
 #define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL		(1 << 10)
 #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL		(1 << 11)
+#define CPU_BOOT_ERR0_PLL_FAIL			(1 << 12)
 #define CPU_BOOT_ERR0_ENABLED			(1 << 31)
 
 /*
@@ -150,10 +154,22 @@
  * CPU_BOOT_DEV_STS0_PLL_INFO_EN	FW retrieval of PLL info is enabled.
  *					Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_SP_SRAM_EN		SP SRAM is initialized and available
+ *					for use.
+ *					Initialized in: preboot
+ *
  * CPU_BOOT_DEV_STS0_CLK_GATE_EN	Clock Gating enabled.
  *					FW initialized Clock Gating.
  *					Initialized in: preboot
  *
+ * CPU_BOOT_DEV_STS0_HBM_ECC_EN		HBM ECC handling Enabled.
+ *					FW handles HBM ECC indications.
+ *					Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN	Packets ack value used in the armcpd
+ *					is set to the PI counter.
+ *					Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
  *					running FW populates the device status
@@ -175,7 +191,10 @@
 #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN			(1 << 9)
 #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN		(1 << 10)
 #define CPU_BOOT_DEV_STS0_PLL_INFO_EN			(1 << 11)
+#define CPU_BOOT_DEV_STS0_SP_SRAM_EN			(1 << 12)
 #define CPU_BOOT_DEV_STS0_CLK_GATE_EN			(1 << 13)
+#define CPU_BOOT_DEV_STS0_HBM_ECC_EN			(1 << 14)
+#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN			(1 << 15)
 #define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
 
 enum cpu_boot_status {
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index 9ccba8437ec9..49335e8334b4 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -212,6 +212,10 @@ enum gaudi_async_event_id {
 	GAUDI_EVENT_NIC_SEI_2 = 266,
 	GAUDI_EVENT_NIC_SEI_3 = 267,
 	GAUDI_EVENT_NIC_SEI_4 = 268,
+	GAUDI_EVENT_DMA_IF_SEI_0 = 277,
+	GAUDI_EVENT_DMA_IF_SEI_1 = 278,
+	GAUDI_EVENT_DMA_IF_SEI_2 = 279,
+	GAUDI_EVENT_DMA_IF_SEI_3 = 280,
 	GAUDI_EVENT_PCIE_FLR = 290,
 	GAUDI_EVENT_TPC0_BMON_SPMU = 300,
 	GAUDI_EVENT_TPC0_KRN_ERR = 301,
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
index b9b90d079e23..b53aeda9a982 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
@@ -388,7 +388,10 @@ enum axi_id {
 #define RAZWI_INITIATOR_ID_X_Y_TPC6		RAZWI_INITIATOR_ID_X_Y(7, 6)
 #define RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5	RAZWI_INITIATOR_ID_X_Y(8, 6)
 
-#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT                           1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT	1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK	0x1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK	0x2
+#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK		0xF00
 
 /* STLB_CACHE_INV */
 #define STLB_CACHE_INV_PRODUCER_INDEX_SHIFT                          0
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
index f30f2c0458d7..6e097ace2e96 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
@@ -78,6 +78,9 @@ struct packet_wreg_bulk {
 	__le64 values[0]; /* data starts here */
 };
 
+#define GAUDI_PKT_LONG_CTL_OP_SHIFT		20
+#define GAUDI_PKT_LONG_CTL_OP_MASK		0x00300000
+
 struct packet_msg_long {
 	__le32 value;
 	__le32 ctl;
@@ -111,18 +114,6 @@ struct packet_msg_long {
 #define GAUDI_PKT_SHORT_CTL_BASE_SHIFT		22
 #define GAUDI_PKT_SHORT_CTL_BASE_MASK		0x00C00000
 
-#define GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT	24
-#define GAUDI_PKT_SHORT_CTL_OPCODE_MASK		0x1F000000
-
-#define GAUDI_PKT_SHORT_CTL_EB_SHIFT		29
-#define GAUDI_PKT_SHORT_CTL_EB_MASK		0x20000000
-
-#define GAUDI_PKT_SHORT_CTL_RB_SHIFT		30
-#define GAUDI_PKT_SHORT_CTL_RB_MASK		0x40000000
-
-#define GAUDI_PKT_SHORT_CTL_MB_SHIFT		31
-#define GAUDI_PKT_SHORT_CTL_MB_MASK		0x80000000
-
 struct packet_msg_short {
 	__le32 value;
 	__le32 ctl;
@@ -146,18 +137,6 @@ struct packet_msg_prot {
 #define GAUDI_PKT_FENCE_CTL_PRED_SHIFT		0
 #define GAUDI_PKT_FENCE_CTL_PRED_MASK		0x0000001F
 
-#define GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT	24
-#define GAUDI_PKT_FENCE_CTL_OPCODE_MASK		0x1F000000
-
-#define GAUDI_PKT_FENCE_CTL_EB_SHIFT		29
-#define GAUDI_PKT_FENCE_CTL_EB_MASK		0x20000000
-
-#define GAUDI_PKT_FENCE_CTL_RB_SHIFT		30
-#define GAUDI_PKT_FENCE_CTL_RB_MASK		0x40000000
-
-#define GAUDI_PKT_FENCE_CTL_MB_SHIFT		31
-#define GAUDI_PKT_FENCE_CTL_MB_MASK		0x80000000
-
 struct packet_fence {
 	__le32 cfg;
 	__le32 ctl;
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
index 067489bd048e..9ff3cb245580 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
@@ -259,6 +259,9 @@
 #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 
-#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT                           1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT	1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK	0x1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK	0x2
+#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK		0xF00
 
 #endif /* ASIC_REG_GOYA_MASKS_H_ */
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 2907db260fba..935acc6bbf3c 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -44,7 +44,8 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, u8 vtag,
 	bus = cl->dev;
 
 	mutex_lock(&bus->device_lock);
-	if (bus->dev_state != MEI_DEV_ENABLED) {
+	if (bus->dev_state != MEI_DEV_ENABLED &&
+	    bus->dev_state != MEI_DEV_POWERING_DOWN) {
 		rets = -ENODEV;
 		goto out;
 	}
@@ -60,6 +61,13 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, u8 vtag,
 		goto out;
 	}
 
+	if (vtag) {
+		/* Check if vtag is supported by client */
+		rets = mei_cl_vt_support_check(cl);
+		if (rets)
+			goto out;
+	}
+
 	if (length > mei_cl_mtu(cl)) {
 		rets = -EFBIG;
 		goto out;
@@ -128,7 +136,8 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, u8 *vtag,
 	bus = cl->dev;
 
 	mutex_lock(&bus->device_lock);
-	if (bus->dev_state != MEI_DEV_ENABLED) {
+	if (bus->dev_state != MEI_DEV_ENABLED &&
+	    bus->dev_state != MEI_DEV_POWERING_DOWN) {
 		rets = -ENODEV;
 		goto out;
 	}
@@ -878,22 +887,17 @@ static int mei_cl_device_probe(struct device *dev)
 static int mei_cl_device_remove(struct device *dev)
 {
 	struct mei_cl_device *cldev = to_mei_cl_device(dev);
-	struct mei_cl_driver *cldrv;
-	int ret = 0;
+	struct mei_cl_driver *cldrv = to_mei_cl_driver(dev->driver);
 
-	if (!cldev || !dev->driver)
-		return 0;
-
-	cldrv = to_mei_cl_driver(dev->driver);
 	if (cldrv->remove)
-		ret = cldrv->remove(cldev);
+		cldrv->remove(cldev);
 
 	mei_cldev_unregister_callbacks(cldev);
 
 	mei_cl_bus_module_put(cldev);
 	module_put(THIS_MODULE);
 
-	return ret;
+	return 0;
 }
 
 static ssize_t name_show(struct device *dev, struct device_attribute *a,
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index a56d41321f32..4378a9b25848 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
 
 #include <linux/mei.h>
 
@@ -990,7 +991,8 @@ int mei_cl_disconnect(struct mei_cl *cl)
 		return 0;
 	}
 
-	if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+	if (dev->dev_state == MEI_DEV_POWERING_DOWN ||
+	    dev->dev_state == MEI_DEV_POWER_DOWN) {
 		cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n");
 		mei_cl_set_disconnected(cl);
 		return 0;
@@ -1737,7 +1739,7 @@ static inline u8 mei_ext_hdr_set_vtag(struct mei_ext_hdr *ext, u8 vtag)
  *
  * @cb: message callback structure
  *
- * Return: a pointer to initialized header
+ * Return: a pointer to initialized header or ERR_PTR on failure
  */
 static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb)
 {
@@ -2113,6 +2115,8 @@ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
 	case MEI_FOP_DISCONNECT:
 	case MEI_FOP_NOTIFY_STOP:
 	case MEI_FOP_NOTIFY_START:
+	case MEI_FOP_DMA_MAP:
+	case MEI_FOP_DMA_UNMAP:
 		if (waitqueue_active(&cl->wait))
 			wake_up(&cl->wait);
 
@@ -2139,3 +2143,286 @@ void mei_cl_all_disconnect(struct mei_device *dev)
 	list_for_each_entry(cl, &dev->file_list, link)
 		mei_cl_set_disconnected(cl);
 }
+
+static struct mei_cl *mei_cl_dma_map_find(struct mei_device *dev, u8 buffer_id)
+{
+	struct mei_cl *cl;
+
+	list_for_each_entry(cl, &dev->file_list, link)
+		if (cl->dma.buffer_id == buffer_id)
+			return cl;
+	return NULL;
+}
+
+/**
+ * mei_cl_irq_dma_map - send client dma map request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_dma_map(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_dma_map_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_dma_map_req(dev, cl);
+	if (ret) {
+		cl->status = ret;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	return 0;
+}
+
+/**
+ * mei_cl_irq_dma_unmap - send client dma unmap request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_dma_unmap(struct mei_cl *cl, struct mei_cl_cb *cb,
+			 struct list_head *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_hbm2slots(sizeof(struct hbm_client_dma_unmap_request));
+	slots = mei_hbuf_empty_slots(dev);
+	if (slots < 0)
+		return -EOVERFLOW;
+
+	if ((u32)slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_hbm_cl_dma_unmap_req(dev, cl);
+	if (ret) {
+		cl->status = ret;
+		list_move_tail(&cb->list, cmpl_list);
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	return 0;
+}
+
+static int mei_cl_dma_alloc(struct mei_cl *cl, u8 buf_id, size_t size)
+{
+	cl->dma.vaddr = dmam_alloc_coherent(cl->dev->dev, size,
+					    &cl->dma.daddr, GFP_KERNEL);
+	if (!cl->dma.vaddr)
+		return -ENOMEM;
+
+	cl->dma.buffer_id = buf_id;
+	cl->dma.size = size;
+
+	return 0;
+}
+
+static void mei_cl_dma_free(struct mei_cl *cl)
+{
+	cl->dma.buffer_id = 0;
+	dmam_free_coherent(cl->dev->dev,
+			   cl->dma.size, cl->dma.vaddr, cl->dma.daddr);
+	cl->dma.size = 0;
+	cl->dma.vaddr = NULL;
+	cl->dma.daddr = 0;
+}
+
+/**
+ * mei_cl_alloc_and_map - send client dma map request
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ * @buffer_id: id of the mapped buffer
+ * @size: size of the buffer
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return:
+ * * -ENODEV
+ * * -EINVAL
+ * * -EOPNOTSUPP
+ * * -EPROTO
+ * * -ENOMEM;
+ */
+int mei_cl_dma_alloc_and_map(struct mei_cl *cl, const struct file *fp,
+			     u8 buffer_id, size_t size)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_cd_supported) {
+		cl_dbg(dev, cl, "client dma is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (buffer_id == 0)
+		return -EINVAL;
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	if (cl->dma_mapped)
+		return -EPROTO;
+
+	if (mei_cl_dma_map_find(dev, buffer_id)) {
+		cl_dbg(dev, cl, "client dma with id %d is already allocated\n",
+		       cl->dma.buffer_id);
+		return -EPROTO;
+	}
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	rets = mei_cl_dma_alloc(cl, buffer_id, size);
+	if (rets) {
+		pm_runtime_put_noidle(dev->dev);
+		return rets;
+	}
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DMA_MAP, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		if (mei_hbm_cl_dma_map_req(dev, cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+		list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   cl->dma_mapped ||
+			   cl->status ||
+			   !mei_cl_is_connected(cl),
+			   mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	if (!cl->dma_mapped && !cl->status)
+		cl->status = -EFAULT;
+
+	rets = cl->status;
+
+out:
+	if (rets)
+		mei_cl_dma_free(cl);
+
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+	return rets;
+}
+
+/**
+ * mei_cl_unmap_and_free - send client dma unmap request
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_dma_unmap(struct mei_cl *cl, const struct file *fp)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!dev->hbm_f_cd_supported) {
+		cl_dbg(dev, cl, "client dma is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	if (!cl->dma_mapped)
+		return -EPROTO;
+
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DMA_UNMAP, fp);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		if (mei_hbm_cl_dma_unmap_req(dev, cl)) {
+			rets = -ENODEV;
+			goto out;
+		}
+		list_move_tail(&cb->list, &dev->ctrl_rd_list);
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait,
+			   !cl->dma_mapped ||
+			   cl->status ||
+			   !mei_cl_is_connected(cl),
+			   mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	if (cl->dma_mapped && !cl->status)
+		cl->status = -EFAULT;
+
+	rets = cl->status;
+
+	if (!rets)
+		mei_cl_dma_free(cl);
+out:
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
+	return rets;
+}
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 9e08a9843bba..b12cdcde9436 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -265,6 +265,14 @@ void mei_cl_notify(struct mei_cl *cl);
 
 void mei_cl_all_disconnect(struct mei_device *dev);
 
+int mei_cl_irq_dma_map(struct mei_cl *cl, struct mei_cl_cb *cb,
+		       struct list_head *cmpl_list);
+int mei_cl_irq_dma_unmap(struct mei_cl *cl, struct mei_cl_cb *cb,
+			 struct list_head *cmpl_list);
+int mei_cl_dma_alloc_and_map(struct mei_cl *cl, const struct file *fp,
+			     u8 buffer_id, size_t size);
+int mei_cl_dma_unmap(struct mei_cl *cl, const struct file *fp);
+
 #define MEI_CL_FMT "cl:host=%02d me=%02d "
 #define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl)
 
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index 3ab1a431d810..1ce61e9e24fc 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -106,6 +106,7 @@ static int mei_dbgfs_devstate_show(struct seq_file *m, void *unused)
 		seq_printf(m, "\tDR: %01d\n", dev->hbm_f_dr_supported);
 		seq_printf(m, "\tVT: %01d\n", dev->hbm_f_vt_supported);
 		seq_printf(m, "\tCAP: %01d\n", dev->hbm_f_cap_supported);
+		seq_printf(m, "\tCD: %01d\n", dev->hbm_f_cd_supported);
 	}
 
 	seq_printf(m, "pg:  %s, %s\n",
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 686e8b6a4c55..d0277c7fed10 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -339,7 +339,9 @@ static int mei_hbm_capabilities_req(struct mei_device *dev)
 	memset(&req, 0, sizeof(req));
 	req.hbm_cmd = MEI_HBM_CAPABILITIES_REQ_CMD;
 	if (dev->hbm_f_vt_supported)
-		req.capability_requested[0] = HBM_CAP_VT;
+		req.capability_requested[0] |= HBM_CAP_VT;
+	if (dev->hbm_f_cd_supported)
+		req.capability_requested[0] |= HBM_CAP_CD;
 
 	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
 	if (ret) {
@@ -593,6 +595,117 @@ static void mei_hbm_cl_notify(struct mei_device *dev,
 }
 
 /**
+ * mei_hbm_cl_dma_map_req - send client dma map request
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_dma_map_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_client_dma_map_request req;
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+
+	req.hbm_cmd = MEI_HBM_CLIENT_DMA_MAP_REQ_CMD;
+	req.client_buffer_id = cl->dma.buffer_id;
+	req.address_lsb = lower_32_bits(cl->dma.daddr);
+	req.address_msb = upper_32_bits(cl->dma.daddr);
+	req.size = cl->dma.size;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "dma map request failed: ret = %d\n", ret);
+
+	return ret;
+}
+
+/**
+ * mei_hbm_cl_dma_unmap_req - send client dma unmap request
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_dma_unmap_req(struct mei_device *dev, struct mei_cl *cl)
+{
+	struct mei_msg_hdr mei_hdr;
+	struct hbm_client_dma_unmap_request req;
+	int ret;
+
+	mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+	memset(&req, 0, sizeof(req));
+
+	req.hbm_cmd = MEI_HBM_CLIENT_DMA_UNMAP_REQ_CMD;
+	req.client_buffer_id = cl->dma.buffer_id;
+
+	ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+	if (ret)
+		dev_err(dev->dev, "dma unmap request failed: ret = %d\n", ret);
+
+	return ret;
+}
+
+static void mei_hbm_cl_dma_map_res(struct mei_device *dev,
+				   struct hbm_client_dma_response *res)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+
+	cl = NULL;
+	list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+		if (cb->fop_type != MEI_FOP_DMA_MAP)
+			continue;
+		if (!cb->cl->dma.buffer_id || cb->cl->dma_mapped)
+			continue;
+
+		cl = cb->cl;
+		break;
+	}
+	if (!cl)
+		return;
+
+	dev_dbg(dev->dev, "cl dma map result = %d\n", res->status);
+	cl->status = res->status;
+	if (!cl->status)
+		cl->dma_mapped = 1;
+	wake_up(&cl->wait);
+}
+
+static void mei_hbm_cl_dma_unmap_res(struct mei_device *dev,
+				     struct hbm_client_dma_response *res)
+{
+	struct mei_cl *cl;
+	struct mei_cl_cb *cb, *next;
+
+	cl = NULL;
+	list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+		if (cb->fop_type != MEI_FOP_DMA_UNMAP)
+			continue;
+		if (!cb->cl->dma.buffer_id || !cb->cl->dma_mapped)
+			continue;
+
+		cl = cb->cl;
+		break;
+	}
+	if (!cl)
+		return;
+
+	dev_dbg(dev->dev, "cl dma unmap result = %d\n", res->status);
+	cl->status = res->status;
+	if (!cl->status)
+		cl->dma_mapped = 0;
+	wake_up(&cl->wait);
+}
+
+/**
  * mei_hbm_prop_req - request property for a single client
  *
  * @dev: the device structure
@@ -1085,6 +1198,13 @@ static void mei_hbm_config_features(struct mei_device *dev)
 	    (dev->version.major_version == HBM_MAJOR_VERSION_CAP &&
 	     dev->version.minor_version >= HBM_MINOR_VERSION_CAP))
 		dev->hbm_f_cap_supported = 1;
+
+	/* Client DMA Support */
+	dev->hbm_f_cd_supported = 0;
+	if (dev->version.major_version > HBM_MAJOR_VERSION_CD ||
+	    (dev->version.major_version == HBM_MAJOR_VERSION_CD &&
+	     dev->version.minor_version >= HBM_MINOR_VERSION_CD))
+		dev->hbm_f_cd_supported = 1;
 }
 
 /**
@@ -1124,6 +1244,7 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 	struct mei_hbm_cl_cmd *cl_cmd;
 	struct hbm_client_connect_request *disconnect_req;
 	struct hbm_flow_control *fctrl;
+	struct hbm_client_dma_response *client_dma_res;
 
 	/* read the message to our buffer */
 	BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf));
@@ -1177,6 +1298,10 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
 		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
 		    dev->hbm_state != MEI_HBM_STARTING) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+				dev_dbg(dev->dev, "hbm: start: on shutdown, ignoring\n");
+				return 0;
+			}
 			dev_err(dev->dev, "hbm: start: state mismatch, [%d, %d]\n",
 				dev->dev_state, dev->hbm_state);
 			return -EPROTO;
@@ -1215,7 +1340,12 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
 		dev->init_clients_timer = 0;
 
-		if (dev->hbm_state != MEI_HBM_CAP_SETUP) {
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_CAP_SETUP) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+				dev_dbg(dev->dev, "hbm: capabilities response: on shutdown, ignoring\n");
+				return 0;
+			}
 			dev_err(dev->dev, "hbm: capabilities response: state mismatch, [%d, %d]\n",
 				dev->dev_state, dev->hbm_state);
 			return -EPROTO;
@@ -1224,6 +1354,8 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 		capability_res = (struct hbm_capability_response *)mei_msg;
 		if (!(capability_res->capability_granted[0] & HBM_CAP_VT))
 			dev->hbm_f_vt_supported = 0;
+		if (!(capability_res->capability_granted[0] & HBM_CAP_CD))
+			dev->hbm_f_cd_supported = 0;
 
 		if (dev->hbm_f_dr_supported) {
 			if (mei_dmam_ring_alloc(dev))
@@ -1247,7 +1379,12 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
 		dev->init_clients_timer = 0;
 
-		if (dev->hbm_state != MEI_HBM_DR_SETUP) {
+		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+		    dev->hbm_state != MEI_HBM_DR_SETUP) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+				dev_dbg(dev->dev, "hbm: dma setup response: on shutdown, ignoring\n");
+				return 0;
+			}
 			dev_err(dev->dev, "hbm: dma setup response: state mismatch, [%d, %d]\n",
 				dev->dev_state, dev->hbm_state);
 			return -EPROTO;
@@ -1311,6 +1448,10 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
 		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
 		    dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+				dev_dbg(dev->dev, "hbm: properties response: on shutdown, ignoring\n");
+				return 0;
+			}
 			dev_err(dev->dev, "hbm: properties response: state mismatch, [%d, %d]\n",
 				dev->dev_state, dev->hbm_state);
 			return -EPROTO;
@@ -1349,6 +1490,10 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
 		if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
 		    dev->hbm_state != MEI_HBM_ENUM_CLIENTS) {
+			if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+				dev_dbg(dev->dev, "hbm: enumeration response: on shutdown, ignoring\n");
+				return 0;
+			}
 			dev_err(dev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n",
 				dev->dev_state, dev->hbm_state);
 			return -EPROTO;
@@ -1373,7 +1518,7 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 			return -EPROTO;
 		}
 
-		dev->dev_state = MEI_DEV_POWER_DOWN;
+		mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
 		dev_info(dev->dev, "hbm: stop response: resetting.\n");
 		/* force the reset */
 		return -EPROTO;
@@ -1426,6 +1571,18 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 		mei_hbm_cl_notify(dev, cl_cmd);
 		break;
 
+	case MEI_HBM_CLIENT_DMA_MAP_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client dma map response: message received.\n");
+		client_dma_res = (struct hbm_client_dma_response *)mei_msg;
+		mei_hbm_cl_dma_map_res(dev, client_dma_res);
+		break;
+
+	case MEI_HBM_CLIENT_DMA_UNMAP_RES_CMD:
+		dev_dbg(dev->dev, "hbm: client dma unmap response: message received.\n");
+		client_dma_res = (struct hbm_client_dma_response *)mei_msg;
+		mei_hbm_cl_dma_unmap_res(dev, client_dma_res);
+		break;
+
 	default:
 		WARN(1, "hbm: wrong command %d\n", mei_msg->hbm_cmd);
 		return -EPROTO;
diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h
index 4d95e38e4ddf..cd5b08ca34b6 100644
--- a/drivers/misc/mei/hbm.h
+++ b/drivers/misc/mei/hbm.h
@@ -10,6 +10,7 @@
 struct mei_device;
 struct mei_msg_hdr;
 struct mei_cl;
+struct mei_dma_data;
 
 /**
  * enum mei_hbm_state - host bus message protocol state
@@ -51,6 +52,7 @@ int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd);
 void mei_hbm_pg_resume(struct mei_device *dev);
 int mei_hbm_cl_notify_req(struct mei_device *dev,
 			  struct mei_cl *cl, u8 request);
-
+int mei_hbm_cl_dma_map_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_dma_unmap_req(struct mei_device *dev, struct mei_cl *cl);
 #endif /* _MEI_HBM_H_ */
 
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c
index 3506a3534294..ec2a4fce8581 100644
--- a/drivers/misc/mei/hdcp/mei_hdcp.c
+++ b/drivers/misc/mei/hdcp/mei_hdcp.c
@@ -844,16 +844,19 @@ enable_err_exit:
 	return ret;
 }
 
-static int mei_hdcp_remove(struct mei_cl_device *cldev)
+static void mei_hdcp_remove(struct mei_cl_device *cldev)
 {
 	struct i915_hdcp_comp_master *comp_master =
 						mei_cldev_get_drvdata(cldev);
+	int ret;
 
 	component_master_del(&cldev->dev, &mei_component_master_ops);
 	kfree(comp_master);
 	mei_cldev_set_drvdata(cldev, NULL);
 
-	return mei_cldev_disable(cldev);
+	ret = mei_cldev_disable(cldev);
+	if (ret)
+		dev_warn(&cldev->dev, "mei_cldev_disable() failed\n");
 }
 
 #define MEI_UUID_HDCP GUID_INIT(0xB638AB7E, 0x94E2, 0x4EA2, 0xA5, \
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 9cf8d8f60cfe..14be76d4c2e6 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -101,6 +101,11 @@
 #define MEI_DEV_ID_MCC        0x4B70  /* Mule Creek Canyon (EHL) */
 #define MEI_DEV_ID_MCC_4      0x4B75  /* Mule Creek Canyon 4 (EHL) */
 
+#define MEI_DEV_ID_EBG        0x1BE0  /* Emmitsburg WS */
+
+#define MEI_DEV_ID_ADP_S      0x7AE8  /* Alder Lake Point S */
+#define MEI_DEV_ID_ADP_LP     0x7A60  /* Alder Lake Point LP */
+
 /*
  * MEI HW Section
  */
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
index df2fb9520dd8..b10606550613 100644
--- a/drivers/misc/mei/hw.h
+++ b/drivers/misc/mei/hw.h
@@ -88,6 +88,12 @@
 #define HBM_MINOR_VERSION_CAP              2
 #define HBM_MAJOR_VERSION_CAP              2
 
+/*
+ * MEI version with client DMA support
+ */
+#define HBM_MINOR_VERSION_CD               2
+#define HBM_MAJOR_VERSION_CD               2
+
 /* Host bus message command opcode */
 #define MEI_HBM_CMD_OP_MSK                  0x7f
 /* Host bus message command RESPONSE */
@@ -136,6 +142,12 @@
 #define MEI_HBM_CAPABILITIES_REQ_CMD        0x13
 #define MEI_HBM_CAPABILITIES_RES_CMD        0x93
 
+#define MEI_HBM_CLIENT_DMA_MAP_REQ_CMD      0x14
+#define MEI_HBM_CLIENT_DMA_MAP_RES_CMD      0x94
+
+#define MEI_HBM_CLIENT_DMA_UNMAP_REQ_CMD    0x15
+#define MEI_HBM_CLIENT_DMA_UNMAP_RES_CMD    0x95
+
 /*
  * MEI Stop Reason
  * used by hbm_host_stop_request.reason
@@ -648,6 +660,8 @@ struct hbm_dma_ring_ctrl {
 
 /* virtual tag supported */
 #define HBM_CAP_VT BIT(0)
+/* client dma supported */
+#define HBM_CAP_CD BIT(2)
 
 /**
  * struct hbm_capability_request - capability request from host to fw
@@ -671,4 +685,51 @@ struct hbm_capability_response {
 	u8 capability_granted[3];
 } __packed;
 
+/**
+ * struct hbm_client_dma_map_request - client dma map request from host to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @client_buffer_id: client buffer id
+ * @reserved: reserved
+ * @address_lsb: DMA address LSB
+ * @address_msb: DMA address MSB
+ * @size: DMA size
+ */
+struct hbm_client_dma_map_request {
+	u8 hbm_cmd;
+	u8 client_buffer_id;
+	u8 reserved[2];
+	u32 address_lsb;
+	u32 address_msb;
+	u32 size;
+} __packed;
+
+/**
+ * struct hbm_client_dma_unmap_request
+ *    client dma unmap request from the host to the firmware
+ *
+ * @hbm_cmd: bus message command header
+ * @status: unmap status
+ * @client_buffer_id: client buffer id
+ * @reserved: reserved
+ */
+struct hbm_client_dma_unmap_request {
+	u8 hbm_cmd;
+	u8 status;
+	u8 client_buffer_id;
+	u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_client_dma_response
+ *   client dma unmap response from the firmware to the host
+ *
+ * @hbm_cmd: bus message command header
+ * @status: command status
+ */
+struct hbm_client_dma_response {
+	u8 hbm_cmd;
+	u8 status;
+} __packed;
+
 #endif
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index bcee77768b91..5c8cb679b997 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -303,9 +303,12 @@ void mei_stop(struct mei_device *dev)
 	dev_dbg(dev->dev, "stopping the device.\n");
 
 	mutex_lock(&dev->device_lock);
-	mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
+	mei_set_devstate(dev, MEI_DEV_POWERING_DOWN);
 	mutex_unlock(&dev->device_lock);
 	mei_cl_bus_remove_devices(dev);
+	mutex_lock(&dev->device_lock);
+	mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
+	mutex_unlock(&dev->device_lock);
 
 	mei_cancel_work(dev);
 
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index 326955b04fda..a98f6b895af7 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -295,12 +295,17 @@ static inline bool hdr_is_fixed(struct mei_msg_hdr *mei_hdr)
 static inline int hdr_is_valid(u32 msg_hdr)
 {
 	struct mei_msg_hdr *mei_hdr;
+	u32 expected_len = 0;
 
 	mei_hdr = (struct mei_msg_hdr *)&msg_hdr;
 	if (!msg_hdr || mei_hdr->reserved)
 		return -EBADMSG;
 
-	if (mei_hdr->dma_ring && mei_hdr->length != MEI_SLOT_SIZE)
+	if (mei_hdr->dma_ring)
+		expected_len += MEI_SLOT_SIZE;
+	if (mei_hdr->extended)
+		expected_len += MEI_SLOT_SIZE;
+	if (mei_hdr->length < expected_len)
 		return -EBADMSG;
 
 	return 0;
@@ -324,6 +329,8 @@ int mei_irq_read_handler(struct mei_device *dev,
 	struct mei_cl *cl;
 	int ret;
 	u32 ext_meta_hdr_u32;
+	u32 hdr_size_left;
+	u32 hdr_size_ext;
 	int i;
 	int ext_hdr_end;
 
@@ -353,6 +360,7 @@ int mei_irq_read_handler(struct mei_device *dev,
 	}
 
 	ext_hdr_end = 1;
+	hdr_size_left = mei_hdr->length;
 
 	if (mei_hdr->extended) {
 		if (!dev->rd_msg_hdr[1]) {
@@ -363,8 +371,21 @@ int mei_irq_read_handler(struct mei_device *dev,
 			dev_dbg(dev->dev, "extended header is %08x\n",
 				ext_meta_hdr_u32);
 		}
-		meta_hdr = ((struct mei_ext_meta_hdr *)
-				dev->rd_msg_hdr + 1);
+		meta_hdr = ((struct mei_ext_meta_hdr *)dev->rd_msg_hdr + 1);
+		if (check_add_overflow((u32)sizeof(*meta_hdr),
+				       mei_slots2data(meta_hdr->size),
+				       &hdr_size_ext)) {
+			dev_err(dev->dev, "extended message size too big %d\n",
+				meta_hdr->size);
+			return -EBADMSG;
+		}
+		if (hdr_size_left < hdr_size_ext) {
+			dev_err(dev->dev, "corrupted message header len %d\n",
+				mei_hdr->length);
+			return -EBADMSG;
+		}
+		hdr_size_left -= hdr_size_ext;
+
 		ext_hdr_end = meta_hdr->size + 2;
 		for (i = dev->rd_msg_hdr_count; i < ext_hdr_end; i++) {
 			dev->rd_msg_hdr[i] = mei_read_hdr(dev);
@@ -376,6 +397,12 @@ int mei_irq_read_handler(struct mei_device *dev,
 	}
 
 	if (mei_hdr->dma_ring) {
+		if (hdr_size_left != sizeof(dev->rd_msg_hdr[ext_hdr_end])) {
+			dev_err(dev->dev, "corrupted message header len %d\n",
+				mei_hdr->length);
+			return -EBADMSG;
+		}
+
 		dev->rd_msg_hdr[ext_hdr_end] = mei_read_hdr(dev);
 		dev->rd_msg_hdr_count++;
 		(*slots)--;
@@ -520,6 +547,16 @@ int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list)
 			if (ret)
 				return ret;
 			break;
+		case MEI_FOP_DMA_MAP:
+			ret = mei_cl_irq_dma_map(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
+		case MEI_FOP_DMA_UNMAP:
+			ret = mei_cl_irq_dma_unmap(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
 		default:
 			BUG();
 		}
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 9f6682033ed7..28937b6e7e0c 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -1026,7 +1026,7 @@ static ssize_t tx_queue_limit_show(struct device *device,
 	size = dev->tx_queue_limit;
 	mutex_unlock(&dev->device_lock);
 
-	return snprintf(buf, PAGE_SIZE, "%u\n", size);
+	return sysfs_emit(buf, "%u\n", size);
 }
 
 static ssize_t tx_queue_limit_store(struct device *device,
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 8c395bfdf6f3..b7b6ef344e80 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -57,6 +57,7 @@ enum mei_dev_state {
 	MEI_DEV_ENABLED,
 	MEI_DEV_RESETTING,
 	MEI_DEV_DISABLED,
+	MEI_DEV_POWERING_DOWN,
 	MEI_DEV_POWER_DOWN,
 	MEI_DEV_POWER_UP
 };
@@ -78,6 +79,8 @@ enum mei_file_transaction_states {
  * @MEI_FOP_DISCONNECT_RSP: disconnect response
  * @MEI_FOP_NOTIFY_START:   start notification
  * @MEI_FOP_NOTIFY_STOP:    stop notification
+ * @MEI_FOP_DMA_MAP:   request client dma map
+ * @MEI_FOP_DMA_UNMAP: request client dma unmap
  */
 enum mei_cb_file_ops {
 	MEI_FOP_READ = 0,
@@ -87,6 +90,8 @@ enum mei_cb_file_ops {
 	MEI_FOP_DISCONNECT_RSP,
 	MEI_FOP_NOTIFY_START,
 	MEI_FOP_NOTIFY_STOP,
+	MEI_FOP_DMA_MAP,
+	MEI_FOP_DMA_UNMAP,
 };
 
 /**
@@ -112,6 +117,13 @@ struct mei_msg_data {
 	unsigned char *data;
 };
 
+struct mei_dma_data {
+	u8 buffer_id;
+	void *vaddr;
+	dma_addr_t daddr;
+	size_t size;
+};
+
 /**
  * struct mei_dma_dscr - dma address descriptor
  *
@@ -235,6 +247,8 @@ struct mei_cl_vtag {
  * @rd_pending: pending read credits
  * @rd_completed_lock: protects rd_completed queue
  * @rd_completed: completed read
+ * @dma: dma settings
+ * @dma_mapped: dma buffer is currently mapped.
  *
  * @cldev: device on the mei client bus
  */
@@ -262,6 +276,8 @@ struct mei_cl {
 	struct list_head rd_pending;
 	spinlock_t rd_completed_lock; /* protects rd_completed queue */
 	struct list_head rd_completed;
+	struct mei_dma_data dma;
+	u8 dma_mapped;
 
 	struct mei_cl_device *cldev;
 };
@@ -450,6 +466,7 @@ struct mei_fw_version {
  * @hbm_f_dr_supported  : hbm feature dma ring supported
  * @hbm_f_vt_supported  : hbm feature vtag supported
  * @hbm_f_cap_supported : hbm feature capabilities message supported
+ * @hbm_f_cd_supported  : hbm feature client dma supported
  *
  * @fw_ver : FW versions
  *
@@ -537,6 +554,7 @@ struct mei_device {
 	unsigned int hbm_f_dr_supported:1;
 	unsigned int hbm_f_vt_supported:1;
 	unsigned int hbm_f_cap_supported:1;
+	unsigned int hbm_f_cd_supported:1;
 
 	struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
 
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 1de9ef7a272b..a7e179626b63 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -107,6 +107,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
 
 	{MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)},
 
+	{MEI_PCI_DEVICE(MEI_DEV_ID_EBG, MEI_ME_PCH15_SPS_CFG)},
+
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)},
+
 	/* required last entry */
 	{0, }
 };
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
deleted file mode 100644
index 7236ae527b19..000000000000
--- a/drivers/misc/pti.c
+++ /dev/null
@@ -1,978 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  pti.c - PTI driver for cJTAG data extration
- *
- *  Copyright (C) Intel 2010
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The PTI (Parallel Trace Interface) driver directs trace data routed from
- * various parts in the system out through the Intel Penwell PTI port and
- * out of the mobile device for analysis with a debugging tool
- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
- * compact JTAG, standard.
- */
-
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/tty.h>
-#include <linux/tty_driver.h>
-#include <linux/pci.h>
-#include <linux/mutex.h>
-#include <linux/miscdevice.h>
-#include <linux/intel-pti.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#define DRIVERNAME		"pti"
-#define PCINAME			"pciPTI"
-#define TTYNAME			"ttyPTI"
-#define CHARNAME		"pti"
-#define PTITTY_MINOR_START	0
-#define PTITTY_MINOR_NUM	2
-#define MAX_APP_IDS		16   /* 128 channel ids / u8 bit size */
-#define MAX_OS_IDS		16   /* 128 channel ids / u8 bit size */
-#define MAX_MODEM_IDS		16   /* 128 channel ids / u8 bit size */
-#define MODEM_BASE_ID		71   /* modem master ID address    */
-#define CONTROL_ID		72   /* control master ID address  */
-#define CONSOLE_ID		73   /* console master ID address  */
-#define OS_BASE_ID		74   /* base OS master ID address  */
-#define APP_BASE_ID		80   /* base App master ID address */
-#define CONTROL_FRAME_LEN	32   /* PTI control frame maximum size */
-#define USER_COPY_SIZE		8192 /* 8Kb buffer for user space copy */
-#define APERTURE_14		0x3800000 /* offset to first OS write addr */
-#define APERTURE_LEN		0x400000  /* address length */
-
-struct pti_tty {
-	struct pti_masterchannel *mc;
-};
-
-struct pti_dev {
-	struct tty_port port[PTITTY_MINOR_NUM];
-	unsigned long pti_addr;
-	unsigned long aperture_base;
-	void __iomem *pti_ioaddr;
-	u8 ia_app[MAX_APP_IDS];
-	u8 ia_os[MAX_OS_IDS];
-	u8 ia_modem[MAX_MODEM_IDS];
-};
-
-/*
- * This protects access to ia_app, ia_os, and ia_modem,
- * which keeps track of channels allocated in
- * an aperture write id.
- */
-static DEFINE_MUTEX(alloclock);
-
-static const struct pci_device_id pci_ids[] = {
-		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)},
-		{0}
-};
-
-static struct tty_driver *pti_tty_driver;
-static struct pti_dev *drv_data;
-
-static unsigned int pti_console_channel;
-static unsigned int pti_control_channel;
-
-/**
- *  pti_write_to_aperture()- The private write function to PTI HW.
- *
- *  @mc: The 'aperture'. It's part of a write address that holds
- *       a master and channel ID.
- *  @buf: Data being written to the HW that will ultimately be seen
- *        in a debugging tool (Fido, Lauterbach).
- *  @len: Size of buffer.
- *
- *  Since each aperture is specified by a unique
- *  master/channel ID, no two processes will be writing
- *  to the same aperture at the same time so no lock is required. The
- *  PTI-Output agent will send these out in the order that they arrived, and
- *  thus, it will intermix these messages. The debug tool can then later
- *  regroup the appropriate message segments together reconstituting each
- *  message.
- */
-static void pti_write_to_aperture(struct pti_masterchannel *mc,
-				  u8 *buf,
-				  int len)
-{
-	int dwordcnt;
-	int final;
-	int i;
-	u32 ptiword;
-	u32 __iomem *aperture;
-	u8 *p = buf;
-
-	/*
-	 * calculate the aperture offset from the base using the master and
-	 * channel id's.
-	 */
-	aperture = drv_data->pti_ioaddr + (mc->master << 15)
-		+ (mc->channel << 8);
-
-	dwordcnt = len >> 2;
-	final = len - (dwordcnt << 2);	    /* final = trailing bytes    */
-	if (final == 0 && dwordcnt != 0) {  /* always need a final dword */
-		final += 4;
-		dwordcnt--;
-	}
-
-	for (i = 0; i < dwordcnt; i++) {
-		ptiword = be32_to_cpu(*(u32 *)p);
-		p += 4;
-		iowrite32(ptiword, aperture);
-	}
-
-	aperture += PTI_LASTDWORD_DTS;	/* adding DTS signals that is EOM */
-
-	ptiword = 0;
-	for (i = 0; i < final; i++)
-		ptiword |= *p++ << (24-(8*i));
-
-	iowrite32(ptiword, aperture);
-	return;
-}
-
-/**
- *  pti_control_frame_built_and_sent()- control frame build and send function.
- *
- *  @mc:          The master / channel structure on which the function
- *                built a control frame.
- *  @thread_name: The thread name associated with the master / channel or
- *                'NULL' if using the 'current' global variable.
- *
- *  To be able to post process the PTI contents on host side, a control frame
- *  is added before sending any PTI content. So the host side knows on
- *  each PTI frame the name of the thread using a dedicated master / channel.
- *  The thread name is retrieved from 'current' global variable if 'thread_name'
- *  is 'NULL', else it is retrieved from 'thread_name' parameter.
- *  This function builds this frame and sends it to a master ID CONTROL_ID.
- *  The overhead is only 32 bytes since the driver only writes to HW
- *  in 32 byte chunks.
- */
-static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
-					     const char *thread_name)
-{
-	/*
-	 * Since we access the comm member in current's task_struct, we only
-	 * need to be as large as what 'comm' in that structure is.
-	 */
-	char comm[TASK_COMM_LEN];
-	struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
-					      .channel = 0};
-	const char *thread_name_p;
-	const char *control_format = "%3d %3d %s";
-	u8 control_frame[CONTROL_FRAME_LEN];
-
-	if (!thread_name) {
-		if (!in_interrupt())
-			get_task_comm(comm, current);
-		else
-			strncpy(comm, "Interrupt", TASK_COMM_LEN);
-
-		/* Absolutely ensure our buffer is zero terminated. */
-		comm[TASK_COMM_LEN-1] = 0;
-		thread_name_p = comm;
-	} else {
-		thread_name_p = thread_name;
-	}
-
-	mccontrol.channel = pti_control_channel;
-	pti_control_channel = (pti_control_channel + 1) & 0x7f;
-
-	snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master,
-		mc->channel, thread_name_p);
-	pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame));
-}
-
-/**
- *  pti_write_full_frame_to_aperture()- high level function to
- *					write to PTI.
- *
- *  @mc:  The 'aperture'. It's part of a write address that holds
- *        a master and channel ID.
- *  @buf: Data being written to the HW that will ultimately be seen
- *        in a debugging tool (Fido, Lauterbach).
- *  @len: Size of buffer.
- *
- *  All threads sending data (either console, user space application, ...)
- *  are calling the high level function to write to PTI meaning that it is
- *  possible to add a control frame before sending the content.
- */
-static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc,
-						const unsigned char *buf,
-						int len)
-{
-	pti_control_frame_built_and_sent(mc, NULL);
-	pti_write_to_aperture(mc, (u8 *)buf, len);
-}
-
-/**
- * get_id()- Allocate a master and channel ID.
- *
- * @id_array:    an array of bits representing what channel
- *               id's are allocated for writing.
- * @max_ids:     The max amount of available write IDs to use.
- * @base_id:     The starting SW channel ID, based on the Intel
- *               PTI arch.
- * @thread_name: The thread name associated with the master / channel or
- *               'NULL' if using the 'current' global variable.
- *
- * Returns:
- *	pti_masterchannel struct with master, channel ID address
- *	0 for error
- *
- * Each bit in the arrays ia_app and ia_os correspond to a master and
- * channel id. The bit is one if the id is taken and 0 if free. For
- * every master there are 128 channel id's.
- */
-static struct pti_masterchannel *get_id(u8 *id_array,
-					int max_ids,
-					int base_id,
-					const char *thread_name)
-{
-	struct pti_masterchannel *mc;
-	int i, j, mask;
-
-	mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL);
-	if (mc == NULL)
-		return NULL;
-
-	/* look for a byte with a free bit */
-	for (i = 0; i < max_ids; i++)
-		if (id_array[i] != 0xff)
-			break;
-	if (i == max_ids) {
-		kfree(mc);
-		return NULL;
-	}
-	/* find the bit in the 128 possible channel opportunities */
-	mask = 0x80;
-	for (j = 0; j < 8; j++) {
-		if ((id_array[i] & mask) == 0)
-			break;
-		mask >>= 1;
-	}
-
-	/* grab it */
-	id_array[i] |= mask;
-	mc->master  = base_id;
-	mc->channel = ((i & 0xf)<<3) + j;
-	/* write new master Id / channel Id allocation to channel control */
-	pti_control_frame_built_and_sent(mc, thread_name);
-	return mc;
-}
-
-/*
- * The following three functions:
- * pti_request_mastercahannel(), mipi_release_masterchannel()
- * and pti_writedata() are an API for other kernel drivers to
- * access PTI.
- */
-
-/**
- * pti_request_masterchannel()- Kernel API function used to allocate
- *				a master, channel ID address
- *				to write to PTI HW.
- *
- * @type:        0- request Application  master, channel aperture ID
- *                  write address.
- *               1- request OS master, channel aperture ID write
- *                  address.
- *               2- request Modem master, channel aperture ID
- *                  write address.
- *               Other values, error.
- * @thread_name: The thread name associated with the master / channel or
- *               'NULL' if using the 'current' global variable.
- *
- * Returns:
- *	pti_masterchannel struct
- *	0 for error
- */
-struct pti_masterchannel *pti_request_masterchannel(u8 type,
-						    const char *thread_name)
-{
-	struct pti_masterchannel *mc;
-
-	mutex_lock(&alloclock);
-
-	switch (type) {
-
-	case 0:
-		mc = get_id(drv_data->ia_app, MAX_APP_IDS,
-			    APP_BASE_ID, thread_name);
-		break;
-
-	case 1:
-		mc = get_id(drv_data->ia_os, MAX_OS_IDS,
-			    OS_BASE_ID, thread_name);
-		break;
-
-	case 2:
-		mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS,
-			    MODEM_BASE_ID, thread_name);
-		break;
-	default:
-		mc = NULL;
-	}
-
-	mutex_unlock(&alloclock);
-	return mc;
-}
-EXPORT_SYMBOL_GPL(pti_request_masterchannel);
-
-/**
- * pti_release_masterchannel()- Kernel API function used to release
- *				a master, channel ID address
- *				used to write to PTI HW.
- *
- * @mc: master, channel apeture ID address to be released.  This
- *      will de-allocate the structure via kfree().
- */
-void pti_release_masterchannel(struct pti_masterchannel *mc)
-{
-	u8 master, channel, i;
-
-	mutex_lock(&alloclock);
-
-	if (mc) {
-		master = mc->master;
-		channel = mc->channel;
-
-		if (master == APP_BASE_ID) {
-			i = channel >> 3;
-			drv_data->ia_app[i] &=  ~(0x80>>(channel & 0x7));
-		} else if (master == OS_BASE_ID) {
-			i = channel >> 3;
-			drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7));
-		} else {
-			i = channel >> 3;
-			drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7));
-		}
-
-		kfree(mc);
-	}
-
-	mutex_unlock(&alloclock);
-}
-EXPORT_SYMBOL_GPL(pti_release_masterchannel);
-
-/**
- * pti_writedata()- Kernel API function used to write trace
- *                  debugging data to PTI HW.
- *
- * @mc:    Master, channel aperture ID address to write to.
- *         Null value will return with no write occurring.
- * @buf:   Trace debuging data to write to the PTI HW.
- *         Null value will return with no write occurring.
- * @count: Size of buf. Value of 0 or a negative number will
- *         return with no write occuring.
- */
-void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count)
-{
-	/*
-	 * since this function is exported, this is treated like an
-	 * API function, thus, all parameters should
-	 * be checked for validity.
-	 */
-	if ((mc != NULL) && (buf != NULL) && (count > 0))
-		pti_write_to_aperture(mc, buf, count);
-	return;
-}
-EXPORT_SYMBOL_GPL(pti_writedata);
-
-/*
- * for the tty_driver_*() basic function descriptions, see tty_driver.h.
- * Specific header comments made for PTI-related specifics.
- */
-
-/**
- * pti_tty_driver_open()- Open an Application master, channel aperture
- * ID to the PTI device via tty device.
- *
- * @tty: tty interface.
- * @filp: filp interface pased to tty_port_open() call.
- *
- * Returns:
- *	int, 0 for success
- *	otherwise, fail value
- *
- * The main purpose of using the tty device interface is for
- * each tty port to have a unique PTI write aperture.  In an
- * example use case, ttyPTI0 gets syslogd and an APP aperture
- * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route
- * modem messages into PTI.  Modem trace data does not have to
- * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct
- * master IDs.  These messages go through the PTI HW and out of
- * the handheld platform and to the Fido/Lauterbach device.
- */
-static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp)
-{
-	/*
-	 * we actually want to allocate a new channel per open, per
-	 * system arch.  HW gives more than plenty channels for a single
-	 * system task to have its own channel to write trace data. This
-	 * also removes a locking requirement for the actual write
-	 * procedure.
-	 */
-	return tty_port_open(tty->port, tty, filp);
-}
-
-/**
- * pti_tty_driver_close()- close tty device and release Application
- * master, channel aperture ID to the PTI device via tty device.
- *
- * @tty: tty interface.
- * @filp: filp interface pased to tty_port_close() call.
- *
- * The main purpose of using the tty device interface is to route
- * syslog daemon messages to the PTI HW and out of the handheld platform
- * and to the Fido/Lauterbach device.
- */
-static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp)
-{
-	tty_port_close(tty->port, tty, filp);
-}
-
-/**
- * pti_tty_install()- Used to set up specific master-channels
- *		      to tty ports for organizational purposes when
- *		      tracing viewed from debuging tools.
- *
- * @driver: tty driver information.
- * @tty: tty struct containing pti information.
- *
- * Returns:
- *	0 for success
- *	otherwise, error
- */
-static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty)
-{
-	int idx = tty->index;
-	struct pti_tty *pti_tty_data;
-	int ret = tty_standard_install(driver, tty);
-
-	if (ret == 0) {
-		pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL);
-		if (pti_tty_data == NULL)
-			return -ENOMEM;
-
-		if (idx == PTITTY_MINOR_START)
-			pti_tty_data->mc = pti_request_masterchannel(0, NULL);
-		else
-			pti_tty_data->mc = pti_request_masterchannel(2, NULL);
-
-		if (pti_tty_data->mc == NULL) {
-			kfree(pti_tty_data);
-			return -ENXIO;
-		}
-		tty->driver_data = pti_tty_data;
-	}
-
-	return ret;
-}
-
-/**
- * pti_tty_cleanup()- Used to de-allocate master-channel resources
- *		      tied to tty's of this driver.
- *
- * @tty: tty struct containing pti information.
- */
-static void pti_tty_cleanup(struct tty_struct *tty)
-{
-	struct pti_tty *pti_tty_data = tty->driver_data;
-	if (pti_tty_data == NULL)
-		return;
-	pti_release_masterchannel(pti_tty_data->mc);
-	kfree(pti_tty_data);
-	tty->driver_data = NULL;
-}
-
-/**
- * pti_tty_driver_write()-  Write trace debugging data through the char
- * interface to the PTI HW.  Part of the misc device implementation.
- *
- * @tty: tty struct containing pti information.
- * @buf: trace data to be written.
- * @len:  # of byte to write.
- *
- * Returns:
- *	int, # of bytes written
- *	otherwise, error
- */
-static int pti_tty_driver_write(struct tty_struct *tty,
-	const unsigned char *buf, int len)
-{
-	struct pti_tty *pti_tty_data = tty->driver_data;
-	if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) {
-		pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len);
-		return len;
-	}
-	/*
-	 * we can't write to the pti hardware if the private driver_data
-	 * and the mc address is not there.
-	 */
-	else
-		return -EFAULT;
-}
-
-/**
- * pti_tty_write_room()- Always returns 2048.
- *
- * @tty: contains tty info of the pti driver.
- */
-static int pti_tty_write_room(struct tty_struct *tty)
-{
-	return 2048;
-}
-
-/**
- * pti_char_open()- Open an Application master, channel aperture
- * ID to the PTI device. Part of the misc device implementation.
- *
- * @inode: not used.
- * @filp:  Output- will have a masterchannel struct set containing
- *                 the allocated application PTI aperture write address.
- *
- * Returns:
- *	int, 0 for success
- *	otherwise, a fail value
- */
-static int pti_char_open(struct inode *inode, struct file *filp)
-{
-	struct pti_masterchannel *mc;
-
-	/*
-	 * We really do want to fail immediately if
-	 * pti_request_masterchannel() fails,
-	 * before assigning the value to filp->private_data.
-	 * Slightly easier to debug if this driver needs debugging.
-	 */
-	mc = pti_request_masterchannel(0, NULL);
-	if (mc == NULL)
-		return -ENOMEM;
-	filp->private_data = mc;
-	return 0;
-}
-
-/**
- * pti_char_release()-  Close a char channel to the PTI device. Part
- * of the misc device implementation.
- *
- * @inode: Not used in this implementaiton.
- * @filp:  Contains private_data that contains the master, channel
- *         ID to be released by the PTI device.
- *
- * Returns:
- *	always 0
- */
-static int pti_char_release(struct inode *inode, struct file *filp)
-{
-	pti_release_masterchannel(filp->private_data);
-	filp->private_data = NULL;
-	return 0;
-}
-
-/**
- * pti_char_write()-  Write trace debugging data through the char
- * interface to the PTI HW.  Part of the misc device implementation.
- *
- * @filp:  Contains private data which is used to obtain
- *         master, channel write ID.
- * @data:  trace data to be written.
- * @len:   # of byte to write.
- * @ppose: Not used in this function implementation.
- *
- * Returns:
- *	int, # of bytes written
- *	otherwise, error value
- *
- * Notes: From side discussions with Alan Cox and experimenting
- * with PTI debug HW like Nokia's Fido box and Lauterbach
- * devices, 8192 byte write buffer used by USER_COPY_SIZE was
- * deemed an appropriate size for this type of usage with
- * debugging HW.
- */
-static ssize_t pti_char_write(struct file *filp, const char __user *data,
-			      size_t len, loff_t *ppose)
-{
-	struct pti_masterchannel *mc;
-	void *kbuf;
-	const char __user *tmp;
-	size_t size = USER_COPY_SIZE;
-	size_t n = 0;
-
-	tmp = data;
-	mc = filp->private_data;
-
-	kbuf = kmalloc(size, GFP_KERNEL);
-	if (kbuf == NULL)  {
-		pr_err("%s(%d): buf allocation failed\n",
-			__func__, __LINE__);
-		return -ENOMEM;
-	}
-
-	do {
-		if (len - n > USER_COPY_SIZE)
-			size = USER_COPY_SIZE;
-		else
-			size = len - n;
-
-		if (copy_from_user(kbuf, tmp, size)) {
-			kfree(kbuf);
-			return n ? n : -EFAULT;
-		}
-
-		pti_write_to_aperture(mc, kbuf, size);
-		n  += size;
-		tmp += size;
-
-	} while (len > n);
-
-	kfree(kbuf);
-	return len;
-}
-
-static const struct tty_operations pti_tty_driver_ops = {
-	.open		= pti_tty_driver_open,
-	.close		= pti_tty_driver_close,
-	.write		= pti_tty_driver_write,
-	.write_room	= pti_tty_write_room,
-	.install	= pti_tty_install,
-	.cleanup	= pti_tty_cleanup
-};
-
-static const struct file_operations pti_char_driver_ops = {
-	.owner		= THIS_MODULE,
-	.write		= pti_char_write,
-	.open		= pti_char_open,
-	.release	= pti_char_release,
-};
-
-static struct miscdevice pti_char_driver = {
-	.minor		= MISC_DYNAMIC_MINOR,
-	.name		= CHARNAME,
-	.fops		= &pti_char_driver_ops
-};
-
-/**
- * pti_console_write()-  Write to the console that has been acquired.
- *
- * @c:   Not used in this implementaiton.
- * @buf: Data to be written.
- * @len: Length of buf.
- */
-static void pti_console_write(struct console *c, const char *buf, unsigned len)
-{
-	static struct pti_masterchannel mc = {.master  = CONSOLE_ID,
-					      .channel = 0};
-
-	mc.channel = pti_console_channel;
-	pti_console_channel = (pti_console_channel + 1) & 0x7f;
-
-	pti_write_full_frame_to_aperture(&mc, buf, len);
-}
-
-/**
- * pti_console_device()-  Return the driver tty structure and set the
- *			  associated index implementation.
- *
- * @c:     Console device of the driver.
- * @index: index associated with c.
- *
- * Returns:
- *	always value of pti_tty_driver structure when this function
- *	is called.
- */
-static struct tty_driver *pti_console_device(struct console *c, int *index)
-{
-	*index = c->index;
-	return pti_tty_driver;
-}
-
-/**
- * pti_console_setup()-  Initialize console variables used by the driver.
- *
- * @c:     Not used.
- * @opts:  Not used.
- *
- * Returns:
- *	always 0.
- */
-static int pti_console_setup(struct console *c, char *opts)
-{
-	pti_console_channel = 0;
-	pti_control_channel = 0;
-	return 0;
-}
-
-/*
- * pti_console struct, used to capture OS printk()'s and shift
- * out to the PTI device for debugging.  This cannot be
- * enabled upon boot because of the possibility of eating
- * any serial console printk's (race condition discovered).
- * The console should be enabled upon when the tty port is
- * used for the first time.  Since the primary purpose for
- * the tty port is to hook up syslog to it, the tty port
- * will be open for a really long time.
- */
-static struct console pti_console = {
-	.name		= TTYNAME,
-	.write		= pti_console_write,
-	.device		= pti_console_device,
-	.setup		= pti_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= 0,
-};
-
-/**
- * pti_port_activate()- Used to start/initialize any items upon
- * first opening of tty_port().
- *
- * @port: The tty port number of the PTI device.
- * @tty:  The tty struct associated with this device.
- *
- * Returns:
- *	always returns 0
- *
- * Notes: The primary purpose of the PTI tty port 0 is to hook
- * the syslog daemon to it; thus this port will be open for a
- * very long time.
- */
-static int pti_port_activate(struct tty_port *port, struct tty_struct *tty)
-{
-	if (port->tty->index == PTITTY_MINOR_START)
-		console_start(&pti_console);
-	return 0;
-}
-
-/**
- * pti_port_shutdown()- Used to stop/shutdown any items upon the
- * last tty port close.
- *
- * @port: The tty port number of the PTI device.
- *
- * Notes: The primary purpose of the PTI tty port 0 is to hook
- * the syslog daemon to it; thus this port will be open for a
- * very long time.
- */
-static void pti_port_shutdown(struct tty_port *port)
-{
-	if (port->tty->index == PTITTY_MINOR_START)
-		console_stop(&pti_console);
-}
-
-static const struct tty_port_operations tty_port_ops = {
-	.activate = pti_port_activate,
-	.shutdown = pti_port_shutdown,
-};
-
-/*
- * Note the _probe() call sets everything up and ties the char and tty
- * to successfully detecting the PTI device on the pci bus.
- */
-
-/**
- * pti_pci_probe()- Used to detect pti on the pci bus and set
- *		    things up in the driver.
- *
- * @pdev: pci_dev struct values for pti.
- * @ent:  pci_device_id struct for pti driver.
- *
- * Returns:
- *	0 for success
- *	otherwise, error
- */
-static int pti_pci_probe(struct pci_dev *pdev,
-		const struct pci_device_id *ent)
-{
-	unsigned int a;
-	int retval;
-	int pci_bar = 1;
-
-	dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__,
-			__func__, __LINE__, pdev->vendor, pdev->device);
-
-	retval = misc_register(&pti_char_driver);
-	if (retval) {
-		pr_err("%s(%d): CHAR registration failed of pti driver\n",
-			__func__, __LINE__);
-		pr_err("%s(%d): Error value returned: %d\n",
-			__func__, __LINE__, retval);
-		goto err;
-	}
-
-	retval = pci_enable_device(pdev);
-	if (retval != 0) {
-		dev_err(&pdev->dev,
-			"%s: pci_enable_device() returned error %d\n",
-			__func__, retval);
-		goto err_unreg_misc;
-	}
-
-	drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL);
-	if (drv_data == NULL) {
-		retval = -ENOMEM;
-		dev_err(&pdev->dev,
-			"%s(%d): kmalloc() returned NULL memory.\n",
-			__func__, __LINE__);
-		goto err_disable_pci;
-	}
-	drv_data->pti_addr = pci_resource_start(pdev, pci_bar);
-
-	retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
-	if (retval != 0) {
-		dev_err(&pdev->dev,
-			"%s(%d): pci_request_region() returned error %d\n",
-			__func__, __LINE__, retval);
-		goto err_free_dd;
-	}
-	drv_data->aperture_base = drv_data->pti_addr+APERTURE_14;
-	drv_data->pti_ioaddr =
-		ioremap((u32)drv_data->aperture_base,
-		APERTURE_LEN);
-	if (!drv_data->pti_ioaddr) {
-		retval = -ENOMEM;
-		goto err_rel_reg;
-	}
-
-	pci_set_drvdata(pdev, drv_data);
-
-	for (a = 0; a < PTITTY_MINOR_NUM; a++) {
-		struct tty_port *port = &drv_data->port[a];
-		tty_port_init(port);
-		port->ops = &tty_port_ops;
-
-		tty_port_register_device(port, pti_tty_driver, a, &pdev->dev);
-	}
-
-	register_console(&pti_console);
-
-	return 0;
-err_rel_reg:
-	pci_release_region(pdev, pci_bar);
-err_free_dd:
-	kfree(drv_data);
-err_disable_pci:
-	pci_disable_device(pdev);
-err_unreg_misc:
-	misc_deregister(&pti_char_driver);
-err:
-	return retval;
-}
-
-/**
- * pti_pci_remove()- Driver exit method to remove PTI from
- *		   PCI bus.
- * @pdev: variable containing pci info of PTI.
- */
-static void pti_pci_remove(struct pci_dev *pdev)
-{
-	struct pti_dev *drv_data = pci_get_drvdata(pdev);
-	unsigned int a;
-
-	unregister_console(&pti_console);
-
-	for (a = 0; a < PTITTY_MINOR_NUM; a++) {
-		tty_unregister_device(pti_tty_driver, a);
-		tty_port_destroy(&drv_data->port[a]);
-	}
-
-	iounmap(drv_data->pti_ioaddr);
-	kfree(drv_data);
-	pci_release_region(pdev, 1);
-	pci_disable_device(pdev);
-
-	misc_deregister(&pti_char_driver);
-}
-
-static struct pci_driver pti_pci_driver = {
-	.name		= PCINAME,
-	.id_table	= pci_ids,
-	.probe		= pti_pci_probe,
-	.remove		= pti_pci_remove,
-};
-
-/**
- * pti_init()- Overall entry/init call to the pti driver.
- *             It starts the registration process with the kernel.
- *
- * Returns:
- *	int __init, 0 for success
- *	otherwise value is an error
- *
- */
-static int __init pti_init(void)
-{
-	int retval;
-
-	/* First register module as tty device */
-
-	pti_tty_driver = alloc_tty_driver(PTITTY_MINOR_NUM);
-	if (pti_tty_driver == NULL) {
-		pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n",
-			__func__, __LINE__);
-		return -ENOMEM;
-	}
-
-	pti_tty_driver->driver_name		= DRIVERNAME;
-	pti_tty_driver->name			= TTYNAME;
-	pti_tty_driver->major			= 0;
-	pti_tty_driver->minor_start		= PTITTY_MINOR_START;
-	pti_tty_driver->type			= TTY_DRIVER_TYPE_SYSTEM;
-	pti_tty_driver->subtype			= SYSTEM_TYPE_SYSCONS;
-	pti_tty_driver->flags			= TTY_DRIVER_REAL_RAW |
-						  TTY_DRIVER_DYNAMIC_DEV;
-	pti_tty_driver->init_termios		= tty_std_termios;
-
-	tty_set_operations(pti_tty_driver, &pti_tty_driver_ops);
-
-	retval = tty_register_driver(pti_tty_driver);
-	if (retval) {
-		pr_err("%s(%d): TTY registration failed of pti driver\n",
-			__func__, __LINE__);
-		pr_err("%s(%d): Error value returned: %d\n",
-			__func__, __LINE__, retval);
-
-		goto put_tty;
-	}
-
-	retval = pci_register_driver(&pti_pci_driver);
-	if (retval) {
-		pr_err("%s(%d): PCI registration failed of pti driver\n",
-			__func__, __LINE__);
-		pr_err("%s(%d): Error value returned: %d\n",
-			__func__, __LINE__, retval);
-		goto unreg_tty;
-	}
-
-	return 0;
-unreg_tty:
-	tty_unregister_driver(pti_tty_driver);
-put_tty:
-	put_tty_driver(pti_tty_driver);
-	pti_tty_driver = NULL;
-	return retval;
-}
-
-/**
- * pti_exit()- Unregisters this module as a tty and pci driver.
- */
-static void __exit pti_exit(void)
-{
-	tty_unregister_driver(pti_tty_driver);
-	pci_unregister_driver(&pti_pci_driver);
-	put_tty_driver(pti_tty_driver);
-}
-
-module_init(pti_init);
-module_exit(pti_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ken Mills, Jay Freyensee");
-MODULE_DESCRIPTION("PTI Driver");
-
diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c
index 41cab297d66e..9f350e05ef68 100644
--- a/drivers/misc/pvpanic.c
+++ b/drivers/misc/pvpanic.c
@@ -19,6 +19,47 @@
 #include <uapi/misc/pvpanic.h>
 
 static void __iomem *base;
+static unsigned int capability = PVPANIC_PANICKED | PVPANIC_CRASH_LOADED;
+static unsigned int events;
+
+static ssize_t capability_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%x\n", capability);
+}
+static DEVICE_ATTR_RO(capability);
+
+static ssize_t events_show(struct device *dev,  struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%x\n", events);
+}
+
+static ssize_t events_store(struct device *dev,  struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned int tmp;
+	int err;
+
+	err = kstrtouint(buf, 16, &tmp);
+	if (err)
+		return err;
+
+	if ((tmp & capability) != tmp)
+		return -EINVAL;
+
+	events = tmp;
+
+	return count;
+
+}
+static DEVICE_ATTR_RW(events);
+
+static struct attribute *pvpanic_dev_attrs[] = {
+	&dev_attr_capability.attr,
+	&dev_attr_events.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(pvpanic_dev);
 
 MODULE_AUTHOR("Hu Tao <hutao@cn.fujitsu.com>");
 MODULE_DESCRIPTION("pvpanic device driver");
@@ -27,7 +68,8 @@ MODULE_LICENSE("GPL");
 static void
 pvpanic_send_event(unsigned int event)
 {
-	iowrite8(event, base);
+	if (event & capability & events)
+		iowrite8(event, base);
 }
 
 static int
@@ -73,8 +115,13 @@ static int pvpanic_mmio_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	atomic_notifier_chain_register(&panic_notifier_list,
-				       &pvpanic_panic_nb);
+	/* initlize capability by RDPT */
+	capability &= ioread8(base);
+	events = capability;
+
+	if (capability)
+		atomic_notifier_chain_register(&panic_notifier_list,
+					       &pvpanic_panic_nb);
 
 	return 0;
 }
@@ -82,8 +129,9 @@ static int pvpanic_mmio_probe(struct platform_device *pdev)
 static int pvpanic_mmio_remove(struct platform_device *pdev)
 {
 
-	atomic_notifier_chain_unregister(&panic_notifier_list,
-					 &pvpanic_panic_nb);
+	if (capability)
+		atomic_notifier_chain_unregister(&panic_notifier_list,
+						 &pvpanic_panic_nb);
 
 	return 0;
 }
@@ -104,6 +152,7 @@ static struct platform_driver pvpanic_mmio_driver = {
 		.name = "pvpanic-mmio",
 		.of_match_table = pvpanic_mmio_match,
 		.acpi_match_table = pvpanic_device_ids,
+		.dev_groups = pvpanic_dev_groups,
 	},
 	.probe = pvpanic_mmio_probe,
 	.remove = pvpanic_mmio_remove,
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 23837d0d6f4a..2508f83bdc3f 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -208,7 +208,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 	} else {
 		dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
 		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
-			"xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
+			"xp_remote_memcpy(0x%p, 0x%p, %u)\n", dst,
 					  (void *)msg->buf_pa, msg->size);
 
 		ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
@@ -218,7 +218,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
 			 * !!! appears in_use and we can't just call
 			 * !!! dev_kfree_skb.
 			 */
-			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
+			dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%x) "
 				"returned error=0x%x\n", dst,
 				(void *)msg->buf_pa, msg->size, ret);
 
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index c49065887e8f..880c33ab9f47 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -237,7 +237,9 @@ static struct qp_list qp_guest_endpoints = {
 #define QPE_NUM_PAGES(_QPE) ((u32) \
 			     (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
 			      DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
-
+#define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \
+	((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \
+	 (_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY)
 
 /*
  * Frees kernel VA space for a given queue and its queue header, and
@@ -528,7 +530,7 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size)
 	u64 num_pages;
 	const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
 
-	if (size > SIZE_MAX - PAGE_SIZE)
+	if (size > min_t(size_t, VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - PAGE_SIZE))
 		return NULL;
 	num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
 	if (num_pages > (SIZE_MAX - queue_size) /
@@ -537,6 +539,9 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size)
 
 	queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page);
 
+	if (queue_size + queue_page_size > KMALLOC_MAX_SIZE)
+		return NULL;
+
 	queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
 	if (queue) {
 		queue->q_header = NULL;
@@ -630,7 +635,7 @@ static void qp_release_pages(struct page **pages,
 
 	for (i = 0; i < num_pages; i++) {
 		if (dirty)
-			set_page_dirty(pages[i]);
+			set_page_dirty_lock(pages[i]);
 
 		put_page(pages[i]);
 		pages[i] = NULL;
@@ -1207,7 +1212,7 @@ static int qp_alloc_guest_work(struct vmci_handle *handle,
 	} else {
 		result = qp_alloc_hypercall(queue_pair_entry);
 		if (result < VMCI_SUCCESS) {
-			pr_warn("qp_alloc_hypercall result = %d\n", result);
+			pr_devel("qp_alloc_hypercall result = %d\n", result);
 			goto error;
 		}
 	}
@@ -1929,6 +1934,9 @@ int vmci_qp_broker_alloc(struct vmci_handle handle,
 			 struct vmci_qp_page_store *page_store,
 			 struct vmci_ctx *context)
 {
+	if (!QP_SIZES_ARE_VALID(produce_size, consume_size))
+		return VMCI_ERROR_NO_RESOURCES;
+
 	return qp_broker_alloc(handle, peer, flags, priv_flags,
 			       produce_size, consume_size,
 			       page_store, context, NULL, NULL, NULL, NULL);
@@ -2685,8 +2693,7 @@ int vmci_qpair_alloc(struct vmci_qp **qpair,
 	 * used by the device is NO_RESOURCES, so use that here too.
 	 */
 
-	if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
-	    produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
+	if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize))
 		return VMCI_ERROR_NO_RESOURCES;
 
 	retval = vmci_route(&src, &dst, false, &route);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.h b/drivers/misc/vmw_vmci/vmci_queue_pair.h
index 00017fc29a52..c4e6e924d9be 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.h
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.h
@@ -104,7 +104,7 @@ struct vmci_qp_dtch_info {
 struct vmci_qp_page_store {
 	/* Reference to pages backing the queue pair. */
 	u64 pages;
-	/* Length of pageList/virtual addres range (in pages). */
+	/* Length of pageList/virtual address range (in pages). */
 	u32 len;
 };
 
diff --git a/drivers/most/core.c b/drivers/most/core.c
index 353ab277cbc6..e4412c7d25b0 100644
--- a/drivers/most/core.c
+++ b/drivers/most/core.c
@@ -379,7 +379,7 @@ static struct attribute *channel_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group channel_attr_group = {
+static const struct attribute_group channel_attr_group = {
 	.attrs = channel_attrs,
 	.is_visible = channel_attr_is_visible,
 };
@@ -436,7 +436,7 @@ static struct attribute *interface_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group interface_attr_group = {
+static const struct attribute_group interface_attr_group = {
 	.attrs = interface_attrs,
 };
 
@@ -718,7 +718,7 @@ static struct attribute *mc_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group mc_attr_group = {
+static const struct attribute_group mc_attr_group = {
 	.attrs = mc_attrs,
 };
 
diff --git a/drivers/nfc/microread/mei.c b/drivers/nfc/microread/mei.c
index 5dad8847a9b3..8fa7771085eb 100644
--- a/drivers/nfc/microread/mei.c
+++ b/drivers/nfc/microread/mei.c
@@ -44,15 +44,13 @@ static int microread_mei_probe(struct mei_cl_device *cldev,
 	return 0;
 }
 
-static int microread_mei_remove(struct mei_cl_device *cldev)
+static void microread_mei_remove(struct mei_cl_device *cldev)
 {
 	struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
 
 	microread_remove(phy->hdev);
 
 	nfc_mei_phy_free(phy);
-
-	return 0;
 }
 
 static struct mei_cl_device_id microread_mei_tbl[] = {
diff --git a/drivers/nfc/pn544/mei.c b/drivers/nfc/pn544/mei.c
index 579bc599f545..5c10aac085a4 100644
--- a/drivers/nfc/pn544/mei.c
+++ b/drivers/nfc/pn544/mei.c
@@ -42,7 +42,7 @@ static int pn544_mei_probe(struct mei_cl_device *cldev,
 	return 0;
 }
 
-static int pn544_mei_remove(struct mei_cl_device *cldev)
+static void pn544_mei_remove(struct mei_cl_device *cldev)
 {
 	struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
 
@@ -51,8 +51,6 @@ static int pn544_mei_remove(struct mei_cl_device *cldev)
 	pn544_hci_remove(phy->hdev);
 
 	nfc_mei_phy_free(phy);
-
-	return 0;
 }
 
 static struct mei_cl_device_id pn544_mei_tbl[] = {
diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig
index 954d3b4a52ab..75d2594c16e1 100644
--- a/drivers/nvmem/Kconfig
+++ b/drivers/nvmem/Kconfig
@@ -270,4 +270,12 @@ config SPRD_EFUSE
 	  This driver can also be built as a module. If so, the module
 	  will be called nvmem-sprd-efuse.
 
+config NVMEM_RMEM
+	tristate "Reserved Memory Based Driver Support"
+	help
+	  This driver maps reserved memory into an nvmem device. It might be
+	  useful to expose information left by firmware in memory.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called nvmem-rmem.
 endif
diff --git a/drivers/nvmem/Makefile b/drivers/nvmem/Makefile
index a7c377218341..5376b8e0dae5 100644
--- a/drivers/nvmem/Makefile
+++ b/drivers/nvmem/Makefile
@@ -55,3 +55,5 @@ obj-$(CONFIG_NVMEM_ZYNQMP)	+= nvmem_zynqmp_nvmem.o
 nvmem_zynqmp_nvmem-y		:= zynqmp_nvmem.o
 obj-$(CONFIG_SPRD_EFUSE)	+= nvmem_sprd_efuse.o
 nvmem_sprd_efuse-y		:= sprd-efuse.o
+obj-$(CONFIG_NVMEM_RMEM) 	+= nvmem-rmem.o
+nvmem-rmem-y			:= rmem.o
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 177f5bf27c6d..a5ab1e0c74cf 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -682,7 +682,9 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
 
 	for_each_child_of_node(parent, child) {
 		addr = of_get_property(child, "reg", &len);
-		if (!addr || (len < 2 * sizeof(u32))) {
+		if (!addr)
+			continue;
+		if (len < 2 * sizeof(u32)) {
 			dev_err(dev, "nvmem: invalid reg on %pOF\n", child);
 			return -EINVAL;
 		}
@@ -713,6 +715,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
 				cell->name, nvmem->stride);
 			/* Cells already added will be freed later. */
 			kfree_const(cell->name);
+			of_node_put(cell->np);
 			kfree(cell);
 			return -EINVAL;
 		}
diff --git a/drivers/nvmem/imx-iim.c b/drivers/nvmem/imx-iim.c
index 701704b87dc9..c86339a7f583 100644
--- a/drivers/nvmem/imx-iim.c
+++ b/drivers/nvmem/imx-iim.c
@@ -96,7 +96,6 @@ MODULE_DEVICE_TABLE(of, imx_iim_dt_ids);
 
 static int imx_iim_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *of_id;
 	struct device *dev = &pdev->dev;
 	struct iim_priv *iim;
 	struct nvmem_device *nvmem;
@@ -111,11 +110,7 @@ static int imx_iim_probe(struct platform_device *pdev)
 	if (IS_ERR(iim->base))
 		return PTR_ERR(iim->base);
 
-	of_id = of_match_device(imx_iim_dt_ids, dev);
-	if (!of_id)
-		return -ENODEV;
-
-	drvdata = of_id->data;
+	drvdata = of_device_get_match_data(&pdev->dev);
 
 	iim->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(iim->clk))
diff --git a/drivers/nvmem/qcom-spmi-sdam.c b/drivers/nvmem/qcom-spmi-sdam.c
index a72704cd0468..f6e9f96933ca 100644
--- a/drivers/nvmem/qcom-spmi-sdam.c
+++ b/drivers/nvmem/qcom-spmi-sdam.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2017, 2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2017, 2020-2021, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/device.h>
@@ -18,7 +18,6 @@
 #define SDAM_PBS_TRIG_CLR		0xE6
 
 struct sdam_chip {
-	struct platform_device		*pdev;
 	struct regmap			*regmap;
 	struct nvmem_config		sdam_config;
 	unsigned int			base;
@@ -65,7 +64,7 @@ static int sdam_read(void *priv, unsigned int offset, void *val,
 				size_t bytes)
 {
 	struct sdam_chip *sdam = priv;
-	struct device *dev = &sdam->pdev->dev;
+	struct device *dev = sdam->sdam_config.dev;
 	int rc;
 
 	if (!sdam_is_valid(sdam, offset, bytes)) {
@@ -86,7 +85,7 @@ static int sdam_write(void *priv, unsigned int offset, void *val,
 				size_t bytes)
 {
 	struct sdam_chip *sdam = priv;
-	struct device *dev = &sdam->pdev->dev;
+	struct device *dev = sdam->sdam_config.dev;
 	int rc;
 
 	if (!sdam_is_valid(sdam, offset, bytes)) {
diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c
new file mode 100644
index 000000000000..b11c3c974b3d
--- /dev/null
+++ b/drivers/nvmem/rmem.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2020 Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+
+struct rmem {
+	struct device *dev;
+	struct nvmem_device *nvmem;
+	struct reserved_mem *mem;
+
+	phys_addr_t size;
+};
+
+static int rmem_read(void *context, unsigned int offset,
+		     void *val, size_t bytes)
+{
+	struct rmem *priv = context;
+	size_t available = priv->mem->size;
+	loff_t off = offset;
+	void *addr;
+	int count;
+
+	/*
+	 * Only map the reserved memory at this point to avoid potential rogue
+	 * kernel threads inadvertently modifying it. Based on the current
+	 * uses-cases for this driver, the performance hit isn't a concern.
+	 * Nor is likely to be, given the nature of the subsystem. Most nvmem
+	 * devices operate over slow buses to begin with.
+	 *
+	 * An alternative would be setting the memory as RO, set_memory_ro(),
+	 * but as of Dec 2020 this isn't possible on arm64.
+	 */
+	addr = memremap(priv->mem->base, available, MEMREMAP_WB);
+	if (IS_ERR(addr)) {
+		dev_err(priv->dev, "Failed to remap memory region\n");
+		return PTR_ERR(addr);
+	}
+
+	count = memory_read_from_buffer(val, bytes, &off, addr, available);
+
+	memunmap(addr);
+
+	return count;
+}
+
+static int rmem_probe(struct platform_device *pdev)
+{
+	struct nvmem_config config = { };
+	struct device *dev = &pdev->dev;
+	struct reserved_mem *mem;
+	struct rmem *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	priv->dev = dev;
+
+	mem = of_reserved_mem_lookup(dev->of_node);
+	if (!mem) {
+		dev_err(dev, "Failed to lookup reserved memory\n");
+		return -EINVAL;
+	}
+	priv->mem = mem;
+
+	config.dev = dev;
+	config.priv = priv;
+	config.name = "rmem";
+	config.size = mem->size;
+	config.reg_read = rmem_read;
+
+	return PTR_ERR_OR_ZERO(devm_nvmem_register(dev, &config));
+}
+
+static const struct of_device_id rmem_match[] = {
+	{ .compatible = "nvmem-rmem", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, rmem_match);
+
+static struct platform_driver rmem_driver = {
+	.probe = rmem_probe,
+	.driver = {
+		.name = "rmem",
+		.of_match_table = rmem_match,
+	},
+};
+module_platform_driver(rmem_driver);
+
+MODULE_AUTHOR("Nicolas Saenz Julienne <nsaenzjulienne@suse.de>");
+MODULE_DESCRIPTION("Reserved Memory Based nvmem Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 020bf860c72c..0da86209ddaa 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -511,6 +511,7 @@ static const struct of_device_id reserved_mem_matches[] = {
 	{ .compatible = "qcom,rmtfs-mem" },
 	{ .compatible = "qcom,cmd-db" },
 	{ .compatible = "ramoops" },
+	{ .compatible = "nvmem-rmem" },
 	{}
 };
 
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 00dabe5fab8a..68d9c2f6a5ca 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -52,6 +52,7 @@ config PHY_XGENE
 config USB_LGM_PHY
 	tristate "INTEL Lightning Mountain USB PHY Driver"
 	depends on USB_SUPPORT
+	depends on X86 || COMPILE_TEST
 	select USB_PHY
 	select REGULATOR
 	select REGULATOR_FIXED_VOLTAGE
diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig
index a1f1a9c90d0d..09256339bd04 100644
--- a/drivers/phy/broadcom/Kconfig
+++ b/drivers/phy/broadcom/Kconfig
@@ -91,10 +91,11 @@ config PHY_BRCM_SATA
 
 config PHY_BRCM_USB
 	tristate "Broadcom STB USB PHY driver"
-	depends on ARCH_BRCMSTB || COMPILE_TEST
+	depends on ARCH_BCM4908 || ARCH_BRCMSTB || COMPILE_TEST
 	depends on OF
 	select GENERIC_PHY
 	select SOC_BRCMSTB
+	default ARCH_BCM4908
 	default ARCH_BRCMSTB
 	help
 	  Enable this to support the Broadcom STB USB PHY.
diff --git a/drivers/phy/broadcom/phy-brcm-sata.c b/drivers/phy/broadcom/phy-brcm-sata.c
index 3ecf41359591..769c707d9b71 100644
--- a/drivers/phy/broadcom/phy-brcm-sata.c
+++ b/drivers/phy/broadcom/phy-brcm-sata.c
@@ -651,7 +651,7 @@ static int brcm_dsl_sata_init(struct brcm_sata_port *port)
 			break;
 		msleep(20);
 		try--;
-	};
+	}
 
 	if (!try) {
 		/* PLL did not lock; give up */
diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index 99fbc7e4138b..116fb23aebd9 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
@@ -34,7 +35,7 @@ struct value_to_name_map {
 };
 
 struct match_chip_info {
-	void *init_func;
+	void (*init_func)(struct brcm_usb_init_params *params);
 	u8 required_regs[BRCM_REGS_MAX + 1];
 	u8 optional_reg;
 };
@@ -286,6 +287,10 @@ static const struct match_chip_info chip_info_7445 = {
 
 static const struct of_device_id brcm_usb_dt_ids[] = {
 	{
+		.compatible = "brcm,bcm4908-usb-phy",
+		.data = &chip_info_7445,
+	},
+	{
 		.compatible = "brcm,bcm7216-usb-phy",
 		.data = &chip_info_7216,
 	},
@@ -427,8 +432,6 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
 	struct device_node *dn = pdev->dev.of_node;
 	int err;
 	const char *mode;
-	const struct of_device_id *match;
-	void (*dvr_init)(struct brcm_usb_init_params *params);
 	const struct match_chip_info *info;
 	struct regmap *rmap;
 	int x;
@@ -441,10 +444,11 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
 	priv->ini.family_id = brcmstb_get_family_id();
 	priv->ini.product_id = brcmstb_get_product_id();
 
-	match = of_match_node(brcm_usb_dt_ids, dev->of_node);
-	info = match->data;
-	dvr_init = info->init_func;
-	(*dvr_init)(&priv->ini);
+	info = of_device_get_match_data(&pdev->dev);
+	if (!info)
+		return -ENOENT;
+
+	info->init_func(&priv->ini);
 
 	dev_dbg(dev, "Best mapping table is for %s\n",
 		priv->ini.family_name);
diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c
index f310e15d94cb..591a15834b48 100644
--- a/drivers/phy/cadence/phy-cadence-torrent.c
+++ b/drivers/phy/cadence/phy-cadence-torrent.c
@@ -2298,6 +2298,7 @@ static int cdns_torrent_phy_probe(struct platform_device *pdev)
 
 	if (total_num_lanes > MAX_NUM_LANES) {
 		dev_err(dev, "Invalid lane configuration\n");
+		ret = -EINVAL;
 		goto put_lnk_rst;
 	}
 
diff --git a/drivers/phy/ingenic/phy-ingenic-usb.c b/drivers/phy/ingenic/phy-ingenic-usb.c
index 4d1587d82286..ea127b177f46 100644
--- a/drivers/phy/ingenic/phy-ingenic-usb.c
+++ b/drivers/phy/ingenic/phy-ingenic-usb.c
@@ -82,18 +82,7 @@
 #define USBPCR1_PORT_RST			BIT(21)
 #define USBPCR1_WORD_IF_16BIT		BIT(19)
 
-enum ingenic_usb_phy_version {
-	ID_JZ4770,
-	ID_JZ4775,
-	ID_JZ4780,
-	ID_X1000,
-	ID_X1830,
-	ID_X2000,
-};
-
 struct ingenic_soc_info {
-	enum ingenic_usb_phy_version version;
-
 	void (*usb_phy_init)(struct phy *phy);
 };
 
@@ -300,38 +289,26 @@ static void x2000_usb_phy_init(struct phy *phy)
 }
 
 static const struct ingenic_soc_info jz4770_soc_info = {
-	.version = ID_JZ4770,
-
 	.usb_phy_init = jz4770_usb_phy_init,
 };
 
 static const struct ingenic_soc_info jz4775_soc_info = {
-	.version = ID_JZ4775,
-
 	.usb_phy_init = jz4775_usb_phy_init,
 };
 
 static const struct ingenic_soc_info jz4780_soc_info = {
-	.version = ID_JZ4780,
-
 	.usb_phy_init = jz4780_usb_phy_init,
 };
 
 static const struct ingenic_soc_info x1000_soc_info = {
-	.version = ID_X1000,
-
 	.usb_phy_init = x1000_usb_phy_init,
 };
 
 static const struct ingenic_soc_info x1830_soc_info = {
-	.version = ID_X1830,
-
 	.usb_phy_init = x1830_usb_phy_init,
 };
 
 static const struct ingenic_soc_info x2000_soc_info = {
-	.version = ID_X2000,
-
 	.usb_phy_init = x2000_usb_phy_init,
 };
 
diff --git a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c
index a7d126192cf1..29d246ea24b4 100644
--- a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c
+++ b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c
@@ -124,8 +124,16 @@ static int ltq_rcu_usb2_phy_power_on(struct phy *phy)
 	reset_control_deassert(priv->phy_reset);
 
 	ret = clk_prepare_enable(priv->phy_gate_clk);
-	if (ret)
+	if (ret) {
 		dev_err(dev, "failed to enable PHY gate\n");
+		return ret;
+	}
+
+	/*
+	 * at least the xrx200 usb2 phy requires some extra time to be
+	 * operational after enabling the clock
+	 */
+	usleep_range(100, 200);
 
 	return ret;
 }
diff --git a/drivers/phy/mediatek/phy-mtk-hdmi.c b/drivers/phy/mediatek/phy-mtk-hdmi.c
index 45be8aa724f3..8313bd517e4c 100644
--- a/drivers/phy/mediatek/phy-mtk-hdmi.c
+++ b/drivers/phy/mediatek/phy-mtk-hdmi.c
@@ -201,6 +201,7 @@ static const struct of_device_id mtk_hdmi_phy_match[] = {
 	},
 	{},
 };
+MODULE_DEVICE_TABLE(of, mtk_hdmi_phy_match);
 
 static struct platform_driver mtk_hdmi_phy_driver = {
 	.probe = mtk_hdmi_phy_probe,
diff --git a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
index 18c481251f04..c51114d8e437 100644
--- a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
+++ b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c
@@ -233,8 +233,9 @@ static const struct of_device_id mtk_mipi_tx_match[] = {
 	  .data = &mt8183_mipitx_data },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, mtk_mipi_tx_match);
 
-struct platform_driver mtk_mipi_tx_driver = {
+static struct platform_driver mtk_mipi_tx_driver = {
 	.probe = mtk_mipi_tx_probe,
 	.remove = mtk_mipi_tx_remove,
 	.driver = {
diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c
index 4728e2bff662..6ee478bc5211 100644
--- a/drivers/phy/motorola/phy-cpcap-usb.c
+++ b/drivers/phy/motorola/phy-cpcap-usb.c
@@ -143,7 +143,7 @@ static bool cpcap_usb_vbus_valid(struct cpcap_phy_ddata *ddata)
 
 	error = iio_read_channel_processed(ddata->vbus, &value);
 	if (error >= 0)
-		return value > 3900 ? true : false;
+		return value > 3900;
 
 	dev_err(ddata->dev, "error reading VBUS: %i\n", error);
 
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c
index 0939a9e9d448..9cdebe7f26cb 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.c
@@ -212,6 +212,15 @@ static const unsigned int qmp_v4_usb3_uniphy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR]  = 0x614,
 };
 
+static const unsigned int sm8350_usb3_uniphy_regs_layout[QPHY_LAYOUT_SIZE] = {
+	[QPHY_SW_RESET]			= 0x00,
+	[QPHY_START_CTRL]		= 0x44,
+	[QPHY_PCS_STATUS]		= 0x14,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x40,
+	[QPHY_PCS_AUTONOMOUS_MODE_CTRL]	= 0x1008,
+	[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR]  = 0x1014,
+};
+
 static const unsigned int sdm845_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_START_CTRL]		= 0x00,
 	[QPHY_PCS_READY_STATUS]		= 0x160,
@@ -1974,6 +1983,291 @@ static const struct qmp_phy_init_tbl sm8250_qmp_gen3x2_pcie_pcs_misc_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_POWER_STATE_CONFIG4, 0x07),
 };
 
+static const struct qmp_phy_init_tbl sdx55_usb3_uniphy_tx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V4_TX_RCV_DETECT_LVL_2, 0x12),
+	QMP_PHY_INIT_CFG(QSERDES_V4_TX_LANE_MODE_1, 0xd5),
+	QMP_PHY_INIT_CFG(QSERDES_V4_TX_LANE_MODE_2, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V4_TX_PI_QEC_CTRL, 0x20),
+	QMP_PHY_INIT_CFG(QSERDES_V4_TX_RES_CODE_LANE_OFFSET_TX, 0x08),
+};
+
+static const struct qmp_phy_init_tbl sdx55_usb3_uniphy_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH4, 0x26),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH3, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH2, 0xbf),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_LOW, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH4, 0xb4),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH3, 0x7b),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH2, 0x5c),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH, 0xdc),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_LOW, 0xdc),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_PI_CONTROLS, 0x99),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_THRESH1, 0x048),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_THRESH2, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_GAIN1, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_GAIN2, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FO_GAIN, 0x09),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_VGA_CAL_CNTRL1, 0x54),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_VGA_CAL_CNTRL2, 0x0c),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4a),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_DFE_EN_TIMER, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_SIGDET_CNTRL, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_SIGDET_DEGLITCH_CNTRL, 0x0e),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_IDAC_TSETTLE_HIGH, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_IDAC_TSETTLE_LOW, 0xc0),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SO_GAIN, 0x05),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_DCC_CTRL1, 0x0c),
+	QMP_PHY_INIT_CFG(QSERDES_V4_RX_GM_CAL, 0x1f),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0xd9),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x11),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x42),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_INITVAL2, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x82),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x14),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x18),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x18),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x19),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xac),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x98),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x14),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x18),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x18),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x65),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x1e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0xdd),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x23),
+
+	/* Rate B */
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x06),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_tx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_1_DIVIDER_BAND0_1, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_2_DIVIDER_BAND0_1, 0x03),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_3_DIVIDER_BAND0_1, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_4_DIVIDER_BAND0_1, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xf5),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_3, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x09),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x09),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_TRAN_DRVR_EMP_EN, 0x0c),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_LVL, 0x24),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_CNTRL, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x1e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_BAND, 0x18),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x5a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf1),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CTRL2, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x0e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_TERM_BW, 0x1b),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL1, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x1a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x17),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_MEASURE_TIME, 0x10),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW, 0xc0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x6d),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0x6d),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xed),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3b),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0x3c),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xe0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xc8),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xc8),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x3b),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xb7),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_LOW, 0xe0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH, 0xc8),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH2, 0xc8),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x3b),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH4, 0xb7),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_DCC_CTRL1, 0x0c),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_pcs_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_SIGDET_CTRL2, 0x6d),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0a),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_SMALL_AMP_DRV_LVL, 0x02),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_MID_TERM_CTRL1, 0x43),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_DEBUG_BUS_CLKSEL, 0x1f),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_MIN_HIBERN8_TIME, 0xff),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_PLL_CNTL, 0x03),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_MSB, 0x16),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_LSB, 0xd8),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_PWM_GEAR_BAND, 0xaa),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_HS_GEAR_BAND, 0x06),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x03),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x03),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_SIGDET_CTRL1, 0x0e),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_MULTI_LANE_CTRL1, 0x02),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_tx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_TX, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_RX, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x16),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x35),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_3, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_5, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RCV_DETECT_LVL_2, 0x12),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x21),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x05),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0x99),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN1, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN2, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL1, 0x54),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW, 0xc0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_CNTRL, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x0e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0xbb),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0x7b),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xbb),
+	QMP_PHY_INIT_CFG_LANE(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3d, 1),
+	QMP_PHY_INIT_CFG_LANE(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3c, 2),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xdb),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0x64),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0x24),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xd2),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x13),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa9),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_EN_TIMER, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_AUX_DATA_TCOARSE_TFINE, 0xa0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_DCC_CTRL1, 0x0c),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_VTH_CODE, 0x10),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_pcs_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_L, 0x40),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_H, 0x00),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG1, 0xd0),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG2, 0x07),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG3, 0x20),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG6, 0x13),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x21),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_CDR_RESET_TIME, 0x0a),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG1, 0x88),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG2, 0x13),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCS_TX_RX_CONFIG, 0x0c),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG1, 0x4b),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG5, 0x10),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_tx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xa5),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_2, 0x82),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_3, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x21),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x10),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0e),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xdc),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0xbd),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa9),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x7b),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xe4),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0x24),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0x64),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0x99),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN1, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN2, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL1, 0x54),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_CNTRL, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x0e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x05),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_ENABLES, 0x00),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_pcs_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG1, 0xd0),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG2, 0x07),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG3, 0x20),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG6, 0x13),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCS_TX_RX_CONFIG, 0x0c),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_UNI_RXEQTRAINING_DFE_TIME_S2, 0x07),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_UNI_LFPS_DET_HIGH_COUNT_VAL, 0xf8),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_CDR_RESET_TIME, 0x0a),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG1, 0x88),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG2, 0x13),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG1, 0x4b),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG5, 0x10),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x21),
+};
+
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
 	/* phy-type - PCIE/UFS/USB */
@@ -2183,6 +2477,11 @@ static const char * const sdm845_ufs_phy_clk_l[] = {
 	"ref", "ref_aux",
 };
 
+/* usb3 phy on sdx55 doesn't have com_aux clock */
+static const char * const qmp_v4_sdx55_usbphy_clk_l[] = {
+	"aux", "cfg_ahb", "ref"
+};
+
 /* list of resets */
 static const char * const msm8996_pciephy_reset_l[] = {
 	"phy", "common", "cfg",
@@ -2824,6 +3123,117 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
 	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
+static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
+	.type			= PHY_TYPE_USB3,
+	.nlanes			= 1,
+
+	.serdes_tbl		= sm8150_usb3_uniphy_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(sm8150_usb3_uniphy_serdes_tbl),
+	.tx_tbl			= sdx55_usb3_uniphy_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(sdx55_usb3_uniphy_tx_tbl),
+	.rx_tbl			= sdx55_usb3_uniphy_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(sdx55_usb3_uniphy_rx_tbl),
+	.pcs_tbl		= sm8250_usb3_uniphy_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(sm8250_usb3_uniphy_pcs_tbl),
+	.clk_list		= qmp_v4_sdx55_usbphy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_sdx55_usbphy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= qmp_v4_usb3_uniphy_regs_layout,
+
+	.start_ctrl		= SERDES_START | PCS_START,
+	.pwrdn_ctrl		= SW_PWRDN,
+
+	.has_pwrdn_delay	= true,
+	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
+	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
+};
+
+static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
+	.type			= PHY_TYPE_UFS,
+	.nlanes			= 2,
+
+	.serdes_tbl		= sm8350_ufsphy_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_serdes_tbl),
+	.tx_tbl			= sm8350_ufsphy_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_tx_tbl),
+	.rx_tbl			= sm8350_ufsphy_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_rx_tbl),
+	.pcs_tbl		= sm8350_ufsphy_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_pcs_tbl),
+	.clk_list		= sdm845_ufs_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(sdm845_ufs_phy_clk_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= sm8150_ufsphy_regs_layout,
+
+	.start_ctrl		= SERDES_START,
+	.pwrdn_ctrl		= SW_PWRDN,
+
+	.is_dual_lane_phy	= true,
+};
+
+static const struct qmp_phy_cfg sm8350_usb3phy_cfg = {
+	.type			= PHY_TYPE_USB3,
+	.nlanes			= 1,
+
+	.serdes_tbl		= sm8150_usb3_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(sm8150_usb3_serdes_tbl),
+	.tx_tbl			= sm8350_usb3_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(sm8350_usb3_tx_tbl),
+	.rx_tbl			= sm8350_usb3_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(sm8350_usb3_rx_tbl),
+	.pcs_tbl		= sm8350_usb3_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(sm8350_usb3_pcs_tbl),
+	.clk_list		= qmp_v4_sm8250_usbphy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= qmp_v4_usb3phy_regs_layout,
+
+	.start_ctrl		= SERDES_START | PCS_START,
+	.pwrdn_ctrl		= SW_PWRDN,
+
+	.has_pwrdn_delay	= true,
+	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
+	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
+
+	.has_phy_dp_com_ctrl	= true,
+	.is_dual_lane_phy	= true,
+};
+
+static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
+	.type			= PHY_TYPE_USB3,
+	.nlanes			= 1,
+
+	.serdes_tbl		= sm8150_usb3_uniphy_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(sm8150_usb3_uniphy_serdes_tbl),
+	.tx_tbl			= sm8350_usb3_uniphy_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(sm8350_usb3_uniphy_tx_tbl),
+	.rx_tbl			= sm8350_usb3_uniphy_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(sm8350_usb3_uniphy_rx_tbl),
+	.pcs_tbl		= sm8350_usb3_uniphy_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(sm8350_usb3_uniphy_pcs_tbl),
+	.clk_list		= qmp_v4_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= sm8350_usb3_uniphy_regs_layout,
+
+	.start_ctrl		= SERDES_START | PCS_START,
+	.pwrdn_ctrl		= SW_PWRDN,
+
+	.has_pwrdn_delay	= true,
+	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
+	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
+};
+
 static void qcom_qmp_phy_configure_lane(void __iomem *base,
 					const unsigned int *regs,
 					const struct qmp_phy_init_tbl tbl[],
@@ -3135,7 +3545,7 @@ static int qcom_qmp_phy_configure_dp_phy(struct qmp_phy *qphy)
 static int qcom_qmp_dp_phy_calibrate(struct phy *phy)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d };
+	static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d };
 	u8 val;
 
 	qphy->dp_aux_cfg++;
@@ -4129,6 +4539,12 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
 		.compatible = "qcom,sc7180-qmp-usb3-dp-phy",
 		/* It's a combo phy */
 	}, {
+		.compatible = "qcom,sc8180x-qmp-ufs-phy",
+		.data = &sm8150_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sc8180x-qmp-usb3-phy",
+		.data = &sm8150_usb3phy_cfg,
+	}, {
 		.compatible = "qcom,sdm845-qhp-pcie-phy",
 		.data = &sdm845_qhp_pciephy_cfg,
 	}, {
@@ -4171,8 +4587,20 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
 		.compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy",
 		.data = &sm8250_qmp_gen3x2_pciephy_cfg,
 	}, {
+		.compatible = "qcom,sm8350-qmp-ufs-phy",
+		.data = &sm8350_ufsphy_cfg,
+	}, {
 		.compatible = "qcom,sm8250-qmp-modem-pcie-phy",
 		.data = &sm8250_qmp_gen3x2_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sdx55-qmp-usb3-uni-phy",
+		.data = &sdx55_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sm8350-qmp-usb3-phy",
+		.data = &sm8350_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sm8350-qmp-usb3-uni-phy",
+		.data = &sm8350_usb3_uniphy_cfg,
 	},
 	{ },
 };
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h
index db92a461dd2e..71ce3aa174ae 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.h
@@ -824,4 +824,151 @@
 #define QPHY_V4_PCS_PCIE_PRESET_P10_PRE			0xbc
 #define QPHY_V4_PCS_PCIE_PRESET_P10_POST		0xe0
 
+/* Only for QMP V5 PHY - QSERDES COM registers */
+#define QSERDES_V5_COM_PLL_IVCO				0x058
+#define QSERDES_V5_COM_CP_CTRL_MODE0			0x074
+#define QSERDES_V5_COM_CP_CTRL_MODE1			0x078
+#define QSERDES_V5_COM_PLL_RCTRL_MODE0			0x07c
+#define QSERDES_V5_COM_PLL_RCTRL_MODE1			0x080
+#define QSERDES_V5_COM_PLL_CCTRL_MODE0			0x084
+#define QSERDES_V5_COM_PLL_CCTRL_MODE1			0x088
+#define QSERDES_V5_COM_SYSCLK_EN_SEL			0x094
+#define QSERDES_V5_COM_LOCK_CMP_EN			0x0a4
+#define QSERDES_V5_COM_LOCK_CMP1_MODE0			0x0ac
+#define QSERDES_V5_COM_LOCK_CMP2_MODE0			0x0b0
+#define QSERDES_V5_COM_LOCK_CMP1_MODE1			0x0b4
+#define QSERDES_V5_COM_DEC_START_MODE0			0x0bc
+#define QSERDES_V5_COM_LOCK_CMP2_MODE1			0x0b8
+#define QSERDES_V5_COM_DEC_START_MODE1			0x0c4
+#define QSERDES_V5_COM_VCO_TUNE_MAP			0x10c
+#define QSERDES_V5_COM_VCO_TUNE_INITVAL2		0x124
+#define QSERDES_V5_COM_HSCLK_SEL			0x158
+#define QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL		0x15c
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0	0x1ac
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0	0x1b0
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1	0x1b4
+#define QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL		0x1bc
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1	0x1b8
+
+/* Only for QMP V5 PHY - TX registers */
+#define QSERDES_V5_TX_RES_CODE_LANE_TX			0x34
+#define QSERDES_V5_TX_RES_CODE_LANE_RX			0x38
+#define QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX 		0x3c
+#define QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX 		0x40
+#define QSERDES_V5_TX_LANE_MODE_1			0x84
+#define QSERDES_V5_TX_LANE_MODE_2			0x88
+#define QSERDES_V5_TX_LANE_MODE_3			0x8c
+#define QSERDES_V5_TX_LANE_MODE_4			0x90
+#define QSERDES_V5_TX_LANE_MODE_5			0x94
+#define QSERDES_V5_TX_RCV_DETECT_LVL_2			0xa4
+#define QSERDES_V5_TX_TRAN_DRVR_EMP_EN			0xc0
+#define QSERDES_V5_TX_PI_QEC_CTRL			0xe4
+#define QSERDES_V5_TX_PWM_GEAR_1_DIVIDER_BAND0_1	0x178
+#define QSERDES_V5_TX_PWM_GEAR_2_DIVIDER_BAND0_1	0x17c
+#define QSERDES_V5_TX_PWM_GEAR_3_DIVIDER_BAND0_1	0x180
+#define QSERDES_V5_TX_PWM_GEAR_4_DIVIDER_BAND0_1	0x184
+
+/* Only for QMP V5 PHY - RX registers */
+#define QSERDES_V5_RX_UCDR_FO_GAIN			0x008
+#define QSERDES_V5_RX_UCDR_SO_GAIN			0x014
+#define QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN		0x030
+#define QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE	0x034
+#define QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW		0x03c
+#define QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH		0x040
+#define QSERDES_V5_RX_UCDR_PI_CONTROLS			0x044
+#define QSERDES_V5_RX_UCDR_PI_CTRL2			0x048
+#define QSERDES_V5_RX_UCDR_SB2_THRESH1			0x04c
+#define QSERDES_V5_RX_UCDR_SB2_THRESH2			0x050
+#define QSERDES_V5_RX_UCDR_SB2_GAIN1			0x054
+#define QSERDES_V5_RX_UCDR_SB2_GAIN2			0x058
+#define QSERDES_V5_RX_AUX_DATA_TCOARSE_TFINE		0x060
+#define QSERDES_V5_RX_RCLK_AUXDATA_SEL			0x064
+#define QSERDES_V5_RX_AC_JTAG_ENABLE			0x068
+#define QSERDES_V5_RX_AC_JTAG_MODE			0x078
+#define QSERDES_V5_RX_RX_TERM_BW			0x080
+#define QSERDES_V5_RX_VGA_CAL_CNTRL1			0x0d4
+#define QSERDES_V5_RX_VGA_CAL_CNTRL2			0x0d8
+#define QSERDES_V5_RX_GM_CAL				0x0dc
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL1		0x0e8
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2		0x0ec
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3		0x0f0
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4		0x0f4
+#define QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW		0x0f8
+#define QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH		0x0fc
+#define QSERDES_V5_RX_RX_IDAC_MEASURE_TIME		0x100
+#define QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1	0x110
+#define QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2		0x114
+#define QSERDES_V5_RX_SIGDET_ENABLES			0x118
+#define QSERDES_V5_RX_SIGDET_CNTRL			0x11c
+#define QSERDES_V5_RX_SIGDET_LVL			0x120
+#define QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL		0x124
+#define QSERDES_V5_RX_RX_BAND				0x128
+#define QSERDES_V5_RX_RX_MODE_00_LOW			0x15c
+#define QSERDES_V5_RX_RX_MODE_00_HIGH			0x160
+#define QSERDES_V5_RX_RX_MODE_00_HIGH2			0x164
+#define QSERDES_V5_RX_RX_MODE_00_HIGH3			0x168
+#define QSERDES_V5_RX_RX_MODE_00_HIGH4			0x16c
+#define QSERDES_V5_RX_RX_MODE_01_LOW			0x170
+#define QSERDES_V5_RX_RX_MODE_01_HIGH			0x174
+#define QSERDES_V5_RX_RX_MODE_01_HIGH2			0x178
+#define QSERDES_V5_RX_RX_MODE_01_HIGH3			0x17c
+#define QSERDES_V5_RX_RX_MODE_01_HIGH4			0x180
+#define QSERDES_V5_RX_RX_MODE_10_LOW			0x184
+#define QSERDES_V5_RX_RX_MODE_10_HIGH			0x188
+#define QSERDES_V5_RX_RX_MODE_10_HIGH2			0x18c
+#define QSERDES_V5_RX_RX_MODE_10_HIGH3			0x190
+#define QSERDES_V5_RX_RX_MODE_10_HIGH4			0x194
+#define QSERDES_V5_RX_DFE_EN_TIMER			0x1a0
+#define QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET		0x1a4
+#define QSERDES_V5_RX_DCC_CTRL1				0x1a8
+#define QSERDES_V5_RX_VTH_CODE				0x1b0
+
+/* Only for QMP V5 PHY - UFS PCS registers */
+#define QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_MSB	0x00c
+#define QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_LSB	0x010
+#define QPHY_V5_PCS_UFS_PLL_CNTL			0x02c
+#define QPHY_V5_PCS_UFS_TX_LARGE_AMP_DRV_LVL		0x030
+#define QPHY_V5_PCS_UFS_TX_SMALL_AMP_DRV_LVL		0x038
+#define QPHY_V5_PCS_UFS_TX_HSGEAR_CAPABILITY		0x074
+#define QPHY_V5_PCS_UFS_RX_HSGEAR_CAPABILITY		0x0b4
+#define QPHY_V5_PCS_UFS_DEBUG_BUS_CLKSEL		0x124
+#define QPHY_V5_PCS_UFS_RX_MIN_HIBERN8_TIME		0x150
+#define QPHY_V5_PCS_UFS_RX_SIGDET_CTRL1			0x154
+#define QPHY_V5_PCS_UFS_RX_SIGDET_CTRL2			0x158
+#define QPHY_V5_PCS_UFS_TX_PWM_GEAR_BAND		0x160
+#define QPHY_V5_PCS_UFS_TX_HS_GEAR_BAND			0x168
+#define QPHY_V5_PCS_UFS_TX_MID_TERM_CTRL1		0x1d8
+#define QPHY_V5_PCS_UFS_MULTI_LANE_CTRL1		0x1e0
+
+/* Only for QMP V5 PHY - USB3 have different offsets than V4 */
+#define QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1		0x300
+#define QPHY_V5_PCS_USB3_AUTONOMOUS_MODE_STATUS		0x304
+#define QPHY_V5_PCS_USB3_AUTONOMOUS_MODE_CTRL		0x308
+#define QPHY_V5_PCS_USB3_AUTONOMOUS_MODE_CTRL2		0x30c
+#define QPHY_V5_PCS_USB3_LFPS_RXTERM_IRQ_SOURCE_STATUS	0x310
+#define QPHY_V5_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR		0x314
+#define QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL	0x318
+#define QPHY_V5_PCS_USB3_LFPS_TX_ECSTART		0x31c
+#define QPHY_V5_PCS_USB3_LFPS_PER_TIMER_VAL		0x320
+#define QPHY_V5_PCS_USB3_LFPS_TX_END_CNT_U3_START	0x324
+#define QPHY_V5_PCS_USB3_LFPS_CONFIG1			0x328
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_LOCK_TIME		0x32c
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_WAIT_TIME		0x330
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_CTLE_TIME		0x334
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_WAIT_TIME_S2	0x338
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2	0x33c
+#define QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_L		0x340
+#define QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_H		0x344
+#define QPHY_V5_PCS_USB3_ARCVR_DTCT_EN_PERIOD		0x348
+#define QPHY_V5_PCS_USB3_ARCVR_DTCT_CM_DLY		0x34c
+#define QPHY_V5_PCS_USB3_TXONESZEROS_RUN_LENGTH		0x350
+#define QPHY_V5_PCS_USB3_ALFPS_DEGLITCH_VAL		0x354
+#define QPHY_V5_PCS_USB3_SIGDET_STARTUP_TIMER_VAL	0x358
+#define QPHY_V5_PCS_USB3_TEST_CONTROL			0x35c
+#define QPHY_V5_PCS_USB3_RXTERMINATION_DLY_SEL		0x360
+
+/* Only for QMP V5 PHY - UNI has 0x1000 offset for PCS_USB3 regs */
+#define QPHY_V5_PCS_USB3_UNI_LFPS_DET_HIGH_COUNT_VAL	0x1018
+#define QPHY_V5_PCS_USB3_UNI_RXEQTRAINING_DFE_TIME_S2	0x103c
+
 #endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c
index 109792203baf..8f1bf7e2186b 100644
--- a/drivers/phy/qualcomm/phy-qcom-qusb2.c
+++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c
@@ -22,6 +22,7 @@
 
 #include <dt-bindings/phy/phy-qcom-qusb2.h>
 
+#define QUSB2PHY_PLL			0x0
 #define QUSB2PHY_PLL_TEST		0x04
 #define CLK_REF_SEL			BIT(7)
 
@@ -135,6 +136,35 @@ enum qusb2phy_reg_layout {
 	QUSB2PHY_INTR_CTRL,
 };
 
+static const struct qusb2_phy_init_tbl ipq6018_init_tbl[] = {
+	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL, 0x14),
+	QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0xF8),
+	QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0xB3),
+	QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE3, 0x83),
+	QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE4, 0xC0),
+	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TUNE, 0x30),
+	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL1, 0x79),
+	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL2, 0x21),
+	QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE5, 0x00),
+	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00),
+	QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TEST2, 0x14),
+	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TEST, 0x80),
+	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9F),
+};
+
+static const unsigned int ipq6018_regs_layout[] = {
+	[QUSB2PHY_PLL_STATUS]              = 0x38,
+	[QUSB2PHY_PORT_TUNE1]              = 0x80,
+	[QUSB2PHY_PORT_TUNE2]              = 0x84,
+	[QUSB2PHY_PORT_TUNE3]              = 0x88,
+	[QUSB2PHY_PORT_TUNE4]              = 0x8C,
+	[QUSB2PHY_PORT_TUNE5]              = 0x90,
+	[QUSB2PHY_PORT_TEST1]              = 0x98,
+	[QUSB2PHY_PORT_TEST2]              = 0x9C,
+	[QUSB2PHY_PORT_POWERDOWN]          = 0xB4,
+	[QUSB2PHY_INTR_CTRL]               = 0xBC,
+};
+
 static const unsigned int msm8996_regs_layout[] = {
 	[QUSB2PHY_PLL_STATUS]		= 0x38,
 	[QUSB2PHY_PORT_TUNE1]		= 0x80,
@@ -245,6 +275,9 @@ struct qusb2_phy_cfg {
 
 	/* true if PHY has PLL_CORE_INPUT_OVERRIDE register to reset PLL */
 	bool has_pll_override;
+
+	/* true if PHY default clk scheme is single-ended */
+	bool se_clk_scheme_default;
 };
 
 static const struct qusb2_phy_cfg msm8996_phy_cfg = {
@@ -253,6 +286,7 @@ static const struct qusb2_phy_cfg msm8996_phy_cfg = {
 	.regs		= msm8996_regs_layout,
 
 	.has_pll_test	= true,
+	.se_clk_scheme_default = true,
 	.disable_ctrl	= (CLAMP_N_EN | FREEZIO_N | POWER_DOWN),
 	.mask_core_ready = PLL_LOCKED,
 	.autoresume_en	 = BIT(3),
@@ -266,10 +300,22 @@ static const struct qusb2_phy_cfg msm8998_phy_cfg = {
 	.disable_ctrl   = POWER_DOWN,
 	.mask_core_ready = CORE_READY_STATUS,
 	.has_pll_override = true,
+	.se_clk_scheme_default = true,
 	.autoresume_en   = BIT(0),
 	.update_tune1_with_efuse = true,
 };
 
+static const struct qusb2_phy_cfg ipq6018_phy_cfg = {
+	.tbl            = ipq6018_init_tbl,
+	.tbl_num        = ARRAY_SIZE(ipq6018_init_tbl),
+	.regs           = ipq6018_regs_layout,
+
+	.disable_ctrl   = POWER_DOWN,
+	.mask_core_ready = PLL_LOCKED,
+	/* autoresume not used */
+	.autoresume_en   = BIT(0),
+};
+
 static const struct qusb2_phy_cfg qusb2_v2_phy_cfg = {
 	.tbl		= qusb2_v2_init_tbl,
 	.tbl_num	= ARRAY_SIZE(qusb2_v2_init_tbl),
@@ -279,10 +325,23 @@ static const struct qusb2_phy_cfg qusb2_v2_phy_cfg = {
 			   POWER_DOWN),
 	.mask_core_ready = CORE_READY_STATUS,
 	.has_pll_override = true,
+	.se_clk_scheme_default = true,
 	.autoresume_en	  = BIT(0),
 	.update_tune1_with_efuse = true,
 };
 
+static const struct qusb2_phy_cfg sdm660_phy_cfg = {
+	.tbl		= msm8996_init_tbl,
+	.tbl_num	= ARRAY_SIZE(msm8996_init_tbl),
+	.regs		= msm8996_regs_layout,
+
+	.has_pll_test	= true,
+	.se_clk_scheme_default = false,
+	.disable_ctrl	= (CLAMP_N_EN | FREEZIO_N | POWER_DOWN),
+	.mask_core_ready = PLL_LOCKED,
+	.autoresume_en	 = BIT(3),
+};
+
 static const char * const qusb2_phy_vreg_names[] = {
 	"vdda-pll", "vdda-phy-dpdm",
 };
@@ -701,8 +760,13 @@ static int qusb2_phy_init(struct phy *phy)
 	/* Required to get phy pll lock successfully */
 	usleep_range(150, 160);
 
-	/* Default is single-ended clock on msm8996 */
-	qphy->has_se_clk_scheme = true;
+	/*
+	 * Not all the SoCs have got a readable TCSR_PHY_CLK_SCHEME
+	 * register in the TCSR so, if there's none, use the default
+	 * value hardcoded in the configuration.
+	 */
+	qphy->has_se_clk_scheme = cfg->se_clk_scheme_default;
+
 	/*
 	 * read TCSR_PHY_CLK_SCHEME register to check if single-ended
 	 * clock scheme is selected. If yes, then disable differential
@@ -810,6 +874,9 @@ static const struct phy_ops qusb2_phy_gen_ops = {
 
 static const struct of_device_id qusb2_phy_of_match_table[] = {
 	{
+		.compatible	= "qcom,ipq6018-qusb2-phy",
+		.data		= &ipq6018_phy_cfg,
+	}, {
 		.compatible	= "qcom,ipq8074-qusb2-phy",
 		.data		= &msm8996_phy_cfg,
 	}, {
@@ -819,6 +886,9 @@ static const struct of_device_id qusb2_phy_of_match_table[] = {
 		.compatible	= "qcom,msm8998-qusb2-phy",
 		.data		= &msm8998_phy_cfg,
 	}, {
+		.compatible	= "qcom,sdm660-qusb2-phy",
+		.data		= &sdm660_phy_cfg,
+	}, {
 		/*
 		 * Deprecated. Only here to support legacy device
 		 * trees that didn't include "qcom,qusb2-v2-phy"
diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
index a52a9bf13b75..8807e59a1162 100644
--- a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
+++ b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
@@ -401,13 +401,26 @@ static const struct hsphy_init_seq init_seq_femtophy[] = {
 	HSPHY_INIT_CFG(0x90, 0x60, 0),
 };
 
+static const struct hsphy_init_seq init_seq_mdm9607[] = {
+	HSPHY_INIT_CFG(0x80, 0x44, 0),
+	HSPHY_INIT_CFG(0x81, 0x38, 0),
+	HSPHY_INIT_CFG(0x82, 0x24, 0),
+	HSPHY_INIT_CFG(0x83, 0x13, 0),
+};
+
 static const struct hsphy_data hsphy_data_femtophy = {
 	.init_seq = init_seq_femtophy,
 	.init_seq_num = ARRAY_SIZE(init_seq_femtophy),
 };
 
+static const struct hsphy_data hsphy_data_mdm9607 = {
+	.init_seq = init_seq_mdm9607,
+	.init_seq_num = ARRAY_SIZE(init_seq_mdm9607),
+};
+
 static const struct of_device_id qcom_snps_hsphy_match[] = {
 	{ .compatible = "qcom,usb-hs-28nm-femtophy", .data = &hsphy_data_femtophy, },
+	{ .compatible = "qcom,usb-hs-28nm-mdm9607", .data = &hsphy_data_mdm9607, },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, qcom_snps_hsphy_match);
diff --git a/drivers/phy/rockchip/phy-rockchip-emmc.c b/drivers/phy/rockchip/phy-rockchip-emmc.c
index 1e424f263e7a..20023f6eb994 100644
--- a/drivers/phy/rockchip/phy-rockchip-emmc.c
+++ b/drivers/phy/rockchip/phy-rockchip-emmc.c
@@ -248,15 +248,17 @@ static int rockchip_emmc_phy_init(struct phy *phy)
 	 * - SDHCI driver to get the PHY
 	 * - SDHCI driver to init the PHY
 	 *
-	 * The clock is optional, so upon any error we just set to NULL.
+	 * The clock is optional, using clk_get_optional() to get the clock
+	 * and do error processing if the return value != NULL
 	 *
 	 * NOTE: we don't do anything special for EPROBE_DEFER here.  Given the
 	 * above expected use case, EPROBE_DEFER isn't sensible to expect, so
 	 * it's just like any other error.
 	 */
-	rk_phy->emmcclk = clk_get(&phy->dev, "emmcclk");
+	rk_phy->emmcclk = clk_get_optional(&phy->dev, "emmcclk");
 	if (IS_ERR(rk_phy->emmcclk)) {
-		dev_dbg(&phy->dev, "Error getting emmcclk: %d\n", ret);
+		ret = PTR_ERR(rk_phy->emmcclk);
+		dev_err(&phy->dev, "Error getting emmcclk: %d\n", ret);
 		rk_phy->emmcclk = NULL;
 	}
 
@@ -380,10 +382,10 @@ static int rockchip_emmc_phy_probe(struct platform_device *pdev)
 	if (!of_property_read_u32(dev->of_node, "drive-impedance-ohm", &val))
 		rk_phy->drive_impedance = convert_drive_impedance_ohm(pdev, val);
 
-	if (of_property_read_bool(dev->of_node, "enable-strobe-pulldown"))
+	if (of_property_read_bool(dev->of_node, "rockchip,enable-strobe-pulldown"))
 		rk_phy->enable_strobe_pulldown = PHYCTRL_REN_STRB_ENABLE;
 
-	if (!of_property_read_u32(dev->of_node, "output-tapdelay-select", &val)) {
+	if (!of_property_read_u32(dev->of_node, "rockchip,output-tapdelay-select", &val)) {
 		if (val <= PHYCTRL_OTAPDLYSEL_MAXVALUE)
 			rk_phy->output_tapdelay_select = val;
 		else
diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c
index a54317e96c41..d08fbb180e43 100644
--- a/drivers/phy/st/phy-stm32-usbphyc.c
+++ b/drivers/phy/st/phy-stm32-usbphyc.c
@@ -8,7 +8,7 @@
 #include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
@@ -17,6 +17,7 @@
 
 #define STM32_USBPHYC_PLL	0x0
 #define STM32_USBPHYC_MISC	0x8
+#define STM32_USBPHYC_MONITOR(X) (0x108 + ((X) * 0x100))
 #define STM32_USBPHYC_VERSION	0x3F4
 
 /* STM32_USBPHYC_PLL bit fields */
@@ -32,19 +33,16 @@
 /* STM32_USBPHYC_MISC bit fields */
 #define SWITHOST		BIT(0)
 
+/* STM32_USBPHYC_MONITOR bit fields */
+#define STM32_USBPHYC_MON_OUT	GENMASK(3, 0)
+#define STM32_USBPHYC_MON_SEL	GENMASK(8, 4)
+#define STM32_USBPHYC_MON_SEL_LOCKP 0x1F
+#define STM32_USBPHYC_MON_OUT_LOCKP BIT(3)
+
 /* STM32_USBPHYC_VERSION bit fields */
 #define MINREV			GENMASK(3, 0)
 #define MAJREV			GENMASK(7, 4)
 
-static const char * const supplies_names[] = {
-	"vdda1v1",	/* 1V1 */
-	"vdda1v8",	/* 1V8 */
-};
-
-#define NUM_SUPPLIES		ARRAY_SIZE(supplies_names)
-
-#define PLL_LOCK_TIME_US	100
-#define PLL_PWR_DOWN_TIME_US	5
 #define PLL_FVCO_MHZ		2880
 #define PLL_INFF_MIN_RATE_HZ	19200000
 #define PLL_INFF_MAX_RATE_HZ	38400000
@@ -58,7 +56,6 @@ struct pll_params {
 struct stm32_usbphyc_phy {
 	struct phy *phy;
 	struct stm32_usbphyc *usbphyc;
-	struct regulator_bulk_data supplies[NUM_SUPPLIES];
 	u32 index;
 	bool active;
 };
@@ -70,6 +67,9 @@ struct stm32_usbphyc {
 	struct reset_control *rst;
 	struct stm32_usbphyc_phy **phys;
 	int nphys;
+	struct regulator *vdda1v1;
+	struct regulator *vdda1v8;
+	atomic_t n_pll_cons;
 	int switch_setup;
 };
 
@@ -83,6 +83,41 @@ static inline void stm32_usbphyc_clr_bits(void __iomem *reg, u32 bits)
 	writel_relaxed(readl_relaxed(reg) & ~bits, reg);
 }
 
+static int stm32_usbphyc_regulators_enable(struct stm32_usbphyc *usbphyc)
+{
+	int ret;
+
+	ret = regulator_enable(usbphyc->vdda1v1);
+	if (ret)
+		return ret;
+
+	ret = regulator_enable(usbphyc->vdda1v8);
+	if (ret)
+		goto vdda1v1_disable;
+
+	return 0;
+
+vdda1v1_disable:
+	regulator_disable(usbphyc->vdda1v1);
+
+	return ret;
+}
+
+static int stm32_usbphyc_regulators_disable(struct stm32_usbphyc *usbphyc)
+{
+	int ret;
+
+	ret = regulator_disable(usbphyc->vdda1v8);
+	if (ret)
+		return ret;
+
+	ret = regulator_disable(usbphyc->vdda1v1);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static void stm32_usbphyc_get_pll_params(u32 clk_rate,
 					 struct pll_params *pll_params)
 {
@@ -142,83 +177,106 @@ static int stm32_usbphyc_pll_init(struct stm32_usbphyc *usbphyc)
 	return 0;
 }
 
-static bool stm32_usbphyc_has_one_phy_active(struct stm32_usbphyc *usbphyc)
+static int __stm32_usbphyc_pll_disable(struct stm32_usbphyc *usbphyc)
 {
-	int i;
+	void __iomem *pll_reg = usbphyc->base + STM32_USBPHYC_PLL;
+	u32 pllen;
+
+	stm32_usbphyc_clr_bits(pll_reg, PLLEN);
 
-	for (i = 0; i < usbphyc->nphys; i++)
-		if (usbphyc->phys[i]->active)
-			return true;
+	/* Wait for minimum width of powerdown pulse (ENABLE = Low) */
+	if (readl_relaxed_poll_timeout(pll_reg, pllen, !(pllen & PLLEN), 5, 50))
+		dev_err(usbphyc->dev, "PLL not reset\n");
 
-	return false;
+	return stm32_usbphyc_regulators_disable(usbphyc);
+}
+
+static int stm32_usbphyc_pll_disable(struct stm32_usbphyc *usbphyc)
+{
+	/* Check if a phy port is still active or clk48 in use */
+	if (atomic_dec_return(&usbphyc->n_pll_cons) > 0)
+		return 0;
+
+	return __stm32_usbphyc_pll_disable(usbphyc);
 }
 
 static int stm32_usbphyc_pll_enable(struct stm32_usbphyc *usbphyc)
 {
 	void __iomem *pll_reg = usbphyc->base + STM32_USBPHYC_PLL;
-	bool pllen = (readl_relaxed(pll_reg) & PLLEN);
+	bool pllen = readl_relaxed(pll_reg) & PLLEN;
 	int ret;
 
-	/* Check if one phy port has already configured the pll */
-	if (pllen && stm32_usbphyc_has_one_phy_active(usbphyc))
+	/*
+	 * Check if a phy port or clk48 prepare has configured the pll
+	 * and ensure the PLL is enabled
+	 */
+	if (atomic_inc_return(&usbphyc->n_pll_cons) > 1 && pllen)
 		return 0;
 
 	if (pllen) {
-		stm32_usbphyc_clr_bits(pll_reg, PLLEN);
-		/* Wait for minimum width of powerdown pulse (ENABLE = Low) */
-		udelay(PLL_PWR_DOWN_TIME_US);
+		/*
+		 * PLL shouldn't be enabled without known consumer,
+		 * disable it and reinit n_pll_cons
+		 */
+		dev_warn(usbphyc->dev, "PLL enabled without known consumers\n");
+
+		ret = __stm32_usbphyc_pll_disable(usbphyc);
+		if (ret)
+			return ret;
 	}
 
+	ret = stm32_usbphyc_regulators_enable(usbphyc);
+	if (ret)
+		goto dec_n_pll_cons;
+
 	ret = stm32_usbphyc_pll_init(usbphyc);
 	if (ret)
-		return ret;
+		goto reg_disable;
 
 	stm32_usbphyc_set_bits(pll_reg, PLLEN);
 
-	/* Wait for maximum lock time */
-	udelay(PLL_LOCK_TIME_US);
-
-	if (!(readl_relaxed(pll_reg) & PLLEN)) {
-		dev_err(usbphyc->dev, "PLLEN not set\n");
-		return -EIO;
-	}
-
 	return 0;
-}
-
-static int stm32_usbphyc_pll_disable(struct stm32_usbphyc *usbphyc)
-{
-	void __iomem *pll_reg = usbphyc->base + STM32_USBPHYC_PLL;
 
-	/* Check if other phy port active */
-	if (stm32_usbphyc_has_one_phy_active(usbphyc))
-		return 0;
-
-	stm32_usbphyc_clr_bits(pll_reg, PLLEN);
-	/* Wait for minimum width of powerdown pulse (ENABLE = Low) */
-	udelay(PLL_PWR_DOWN_TIME_US);
+reg_disable:
+	stm32_usbphyc_regulators_disable(usbphyc);
 
-	if (readl_relaxed(pll_reg) & PLLEN) {
-		dev_err(usbphyc->dev, "PLL not reset\n");
-		return -EIO;
-	}
+dec_n_pll_cons:
+	atomic_dec(&usbphyc->n_pll_cons);
 
-	return 0;
+	return ret;
 }
 
 static int stm32_usbphyc_phy_init(struct phy *phy)
 {
 	struct stm32_usbphyc_phy *usbphyc_phy = phy_get_drvdata(phy);
 	struct stm32_usbphyc *usbphyc = usbphyc_phy->usbphyc;
+	u32 reg_mon = STM32_USBPHYC_MONITOR(usbphyc_phy->index);
+	u32 monsel = FIELD_PREP(STM32_USBPHYC_MON_SEL,
+				STM32_USBPHYC_MON_SEL_LOCKP);
+	u32 monout;
 	int ret;
 
 	ret = stm32_usbphyc_pll_enable(usbphyc);
 	if (ret)
 		return ret;
 
+	/* Check that PLL Lock input to PHY is High */
+	writel_relaxed(monsel, usbphyc->base + reg_mon);
+	ret = readl_relaxed_poll_timeout(usbphyc->base + reg_mon, monout,
+					 (monout & STM32_USBPHYC_MON_OUT_LOCKP),
+					 100, 1000);
+	if (ret) {
+		dev_err(usbphyc->dev, "PLL Lock input to PHY is Low (val=%x)\n",
+			(u32)(monout & STM32_USBPHYC_MON_OUT));
+		goto pll_disable;
+	}
+
 	usbphyc_phy->active = true;
 
 	return 0;
+
+pll_disable:
+	return stm32_usbphyc_pll_disable(usbphyc);
 }
 
 static int stm32_usbphyc_phy_exit(struct phy *phy)
@@ -231,25 +289,9 @@ static int stm32_usbphyc_phy_exit(struct phy *phy)
 	return stm32_usbphyc_pll_disable(usbphyc);
 }
 
-static int stm32_usbphyc_phy_power_on(struct phy *phy)
-{
-	struct stm32_usbphyc_phy *usbphyc_phy = phy_get_drvdata(phy);
-
-	return regulator_bulk_enable(NUM_SUPPLIES, usbphyc_phy->supplies);
-}
-
-static int stm32_usbphyc_phy_power_off(struct phy *phy)
-{
-	struct stm32_usbphyc_phy *usbphyc_phy = phy_get_drvdata(phy);
-
-	return regulator_bulk_disable(NUM_SUPPLIES, usbphyc_phy->supplies);
-}
-
 static const struct phy_ops stm32_usbphyc_phy_ops = {
 	.init = stm32_usbphyc_phy_init,
 	.exit = stm32_usbphyc_phy_exit,
-	.power_on = stm32_usbphyc_phy_power_on,
-	.power_off = stm32_usbphyc_phy_power_off,
 	.owner = THIS_MODULE,
 };
 
@@ -312,7 +354,7 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *child, *np = dev->of_node;
 	struct phy_provider *phy_provider;
-	u32 version;
+	u32 pllen, version;
 	int ret, port = 0;
 
 	usbphyc = devm_kzalloc(dev, sizeof(*usbphyc), GFP_KERNEL);
@@ -344,6 +386,19 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
 		ret = PTR_ERR(usbphyc->rst);
 		if (ret == -EPROBE_DEFER)
 			goto clk_disable;
+
+		stm32_usbphyc_clr_bits(usbphyc->base + STM32_USBPHYC_PLL, PLLEN);
+	}
+
+	/*
+	 * Wait for minimum width of powerdown pulse (ENABLE = Low):
+	 * we have to ensure the PLL is disabled before phys initialization.
+	 */
+	if (readl_relaxed_poll_timeout(usbphyc->base + STM32_USBPHYC_PLL,
+				       pllen, !(pllen & PLLEN), 5, 50)) {
+		dev_warn(usbphyc->dev, "PLL not reset\n");
+		ret = -EPROBE_DEFER;
+		goto clk_disable;
 	}
 
 	usbphyc->switch_setup = -EINVAL;
@@ -355,11 +410,26 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
 		goto clk_disable;
 	}
 
+	usbphyc->vdda1v1 = devm_regulator_get(dev, "vdda1v1");
+	if (IS_ERR(usbphyc->vdda1v1)) {
+		ret = PTR_ERR(usbphyc->vdda1v1);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to get vdda1v1 supply: %d\n", ret);
+		goto clk_disable;
+	}
+
+	usbphyc->vdda1v8 = devm_regulator_get(dev, "vdda1v8");
+	if (IS_ERR(usbphyc->vdda1v8)) {
+		ret = PTR_ERR(usbphyc->vdda1v8);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "failed to get vdda1v8 supply: %d\n", ret);
+		goto clk_disable;
+	}
+
 	for_each_child_of_node(np, child) {
 		struct stm32_usbphyc_phy *usbphyc_phy;
 		struct phy *phy;
 		u32 index;
-		int i;
 
 		phy = devm_phy_create(dev, child, &stm32_usbphyc_phy_ops);
 		if (IS_ERR(phy)) {
@@ -377,18 +447,6 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
 			goto put_child;
 		}
 
-		for (i = 0; i < NUM_SUPPLIES; i++)
-			usbphyc_phy->supplies[i].supply = supplies_names[i];
-
-		ret = devm_regulator_bulk_get(&phy->dev, NUM_SUPPLIES,
-					      usbphyc_phy->supplies);
-		if (ret) {
-			if (ret != -EPROBE_DEFER)
-				dev_err(&phy->dev,
-					"failed to get regulators: %d\n", ret);
-			goto put_child;
-		}
-
 		ret = of_property_read_u32(child, "reg", &index);
 		if (ret || index > usbphyc->nphys) {
 			dev_err(&phy->dev, "invalid reg property: %d\n", ret);
@@ -432,6 +490,12 @@ clk_disable:
 static int stm32_usbphyc_remove(struct platform_device *pdev)
 {
 	struct stm32_usbphyc *usbphyc = dev_get_drvdata(&pdev->dev);
+	int port;
+
+	/* Ensure PHYs are not active, to allow PLL disabling */
+	for (port = 0; port < usbphyc->nphys; port++)
+		if (usbphyc->phys[port]->active)
+			stm32_usbphyc_phy_exit(usbphyc->phys[port]->phy);
 
 	clk_disable_unprepare(usbphyc->clk);
 
diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c
index 2b0f921b6ee3..2b65f84a5f89 100644
--- a/drivers/phy/xilinx/phy-zynqmp.c
+++ b/drivers/phy/xilinx/phy-zynqmp.c
@@ -874,13 +874,10 @@ static int xpsgtr_get_ref_clocks(struct xpsgtr_dev *gtr_dev)
 
 		snprintf(name, sizeof(name), "ref%u", refclk);
 		clk = devm_clk_get_optional(gtr_dev->dev, name);
-		if (IS_ERR(clk)) {
-			if (PTR_ERR(clk) != -EPROBE_DEFER)
-				dev_err(gtr_dev->dev,
-					"Failed to get reference clock %u: %ld\n",
-					refclk, PTR_ERR(clk));
-			return PTR_ERR(clk);
-		}
+		if (IS_ERR(clk))
+			return dev_err_probe(gtr_dev->dev, PTR_ERR(clk),
+					     "Failed to get reference clock %u\n",
+					     refclk);
 
 		if (!clk)
 			continue;
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 1ab207ec9c94..b67539f9848c 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -212,9 +212,6 @@ struct goldfish_pipe_dev {
 	int version;
 	unsigned char __iomem *base;
 
-	/* an irq tasklet to run goldfish_interrupt_task */
-	struct tasklet_struct irq_tasklet;
-
 	struct miscdevice miscdev;
 };
 
@@ -577,10 +574,10 @@ static struct goldfish_pipe *signalled_pipes_pop_front(
 	return pipe;
 }
 
-static void goldfish_interrupt_task(unsigned long dev_addr)
+static irqreturn_t goldfish_interrupt_task(int irq, void *dev_addr)
 {
 	/* Iterate over the signalled pipes and wake them one by one */
-	struct goldfish_pipe_dev *dev = (struct goldfish_pipe_dev *)dev_addr;
+	struct goldfish_pipe_dev *dev = dev_addr;
 	struct goldfish_pipe *pipe;
 	int wakes;
 
@@ -599,13 +596,14 @@ static void goldfish_interrupt_task(unsigned long dev_addr)
 		 */
 		wake_up_interruptible(&pipe->wake_queue);
 	}
+	return IRQ_HANDLED;
 }
 
 static void goldfish_pipe_device_deinit(struct platform_device *pdev,
 					struct goldfish_pipe_dev *dev);
 
 /*
- * The general idea of the interrupt handling:
+ * The general idea of the (threaded) interrupt handling:
  *
  *  1. device raises an interrupt if there's at least one signalled pipe
  *  2. IRQ handler reads the signalled pipes and their count from the device
@@ -614,8 +612,8 @@ static void goldfish_pipe_device_deinit(struct platform_device *pdev,
  *      otherwise it leaves it raised, so IRQ handler will be called
  *      again for the next chunk
  *  4. IRQ handler adds all returned pipes to the device's signalled pipes list
- *  5. IRQ handler launches a tasklet to process the signalled pipes from the
- *      list in a separate context
+ *  5. IRQ handler defers processing the signalled pipes from the list in a
+ *      separate context
  */
 static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
 {
@@ -645,8 +643,7 @@ static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	tasklet_schedule(&dev->irq_tasklet);
-	return IRQ_HANDLED;
+	return IRQ_WAKE_THREAD;
 }
 
 static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
@@ -811,12 +808,10 @@ static int goldfish_pipe_device_init(struct platform_device *pdev,
 {
 	int err;
 
-	tasklet_init(&dev->irq_tasklet, &goldfish_interrupt_task,
-		     (unsigned long)dev);
-
-	err = devm_request_irq(&pdev->dev, dev->irq,
-			       goldfish_pipe_interrupt,
-			       IRQF_SHARED, "goldfish_pipe", dev);
+	err = devm_request_threaded_irq(&pdev->dev, dev->irq,
+					goldfish_pipe_interrupt,
+					goldfish_interrupt_task,
+					IRQF_SHARED, "goldfish_pipe", dev);
 	if (err) {
 		dev_err(&pdev->dev, "unable to allocate IRQ for v2\n");
 		return err;
@@ -874,7 +869,6 @@ static void goldfish_pipe_device_deinit(struct platform_device *pdev,
 					struct goldfish_pipe_dev *dev)
 {
 	misc_deregister(&dev->miscdev);
-	tasklet_kill(&dev->irq_tasklet);
 	kfree(dev->pipes);
 	free_page((unsigned long)dev->buffers);
 }
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index d1e8c3a54976..46885429928a 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -267,8 +267,10 @@ static int sdw_transfer_unlocked(struct sdw_bus *bus, struct sdw_msg *msg)
 
 	ret = do_transfer(bus, msg);
 	if (ret != 0 && ret != -ENODATA)
-		dev_err(bus->dev, "trf on Slave %d failed:%d\n",
-			msg->dev_num, ret);
+		dev_err(bus->dev, "trf on Slave %d failed:%d %s addr %x count %d\n",
+			msg->dev_num, ret,
+			(msg->flags & SDW_MSG_FLAG_WRITE) ? "write" : "read",
+			msg->addr, msg->len);
 
 	if (msg->page)
 		sdw_reset_page(bus, msg->dev_num);
@@ -405,10 +407,11 @@ sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val)
 	return sdw_transfer(slave->bus, &msg);
 }
 
-static int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value)
+int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value)
 {
 	return sdw_nwrite_no_pm(slave, addr, 1, &value);
 }
+EXPORT_SYMBOL(sdw_write_no_pm);
 
 static int
 sdw_bread_no_pm(struct sdw_bus *bus, u16 dev_num, u32 addr)
@@ -476,8 +479,7 @@ int sdw_bwrite_no_pm_unlocked(struct sdw_bus *bus, u16 dev_num, u32 addr, u8 val
 }
 EXPORT_SYMBOL(sdw_bwrite_no_pm_unlocked);
 
-static int
-sdw_read_no_pm(struct sdw_slave *slave, u32 addr)
+int sdw_read_no_pm(struct sdw_slave *slave, u32 addr)
 {
 	u8 buf;
 	int ret;
@@ -488,6 +490,19 @@ sdw_read_no_pm(struct sdw_slave *slave, u32 addr)
 	else
 		return buf;
 }
+EXPORT_SYMBOL(sdw_read_no_pm);
+
+static int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val)
+{
+	int tmp;
+
+	tmp = sdw_read_no_pm(slave, addr);
+	if (tmp < 0)
+		return tmp;
+
+	tmp = (tmp & ~mask) | val;
+	return sdw_write_no_pm(slave, addr, tmp);
+}
 
 /**
  * sdw_nread() - Read "n" contiguous SDW Slave registers
@@ -500,16 +515,16 @@ int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val)
 {
 	int ret;
 
-	ret = pm_runtime_get_sync(slave->bus->dev);
+	ret = pm_runtime_get_sync(&slave->dev);
 	if (ret < 0 && ret != -EACCES) {
-		pm_runtime_put_noidle(slave->bus->dev);
+		pm_runtime_put_noidle(&slave->dev);
 		return ret;
 	}
 
 	ret = sdw_nread_no_pm(slave, addr, count, val);
 
-	pm_runtime_mark_last_busy(slave->bus->dev);
-	pm_runtime_put(slave->bus->dev);
+	pm_runtime_mark_last_busy(&slave->dev);
+	pm_runtime_put(&slave->dev);
 
 	return ret;
 }
@@ -526,16 +541,16 @@ int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, u8 *val)
 {
 	int ret;
 
-	ret = pm_runtime_get_sync(slave->bus->dev);
+	ret = pm_runtime_get_sync(&slave->dev);
 	if (ret < 0 && ret != -EACCES) {
-		pm_runtime_put_noidle(slave->bus->dev);
+		pm_runtime_put_noidle(&slave->dev);
 		return ret;
 	}
 
 	ret = sdw_nwrite_no_pm(slave, addr, count, val);
 
-	pm_runtime_mark_last_busy(slave->bus->dev);
-	pm_runtime_put(slave->bus->dev);
+	pm_runtime_mark_last_busy(&slave->dev);
+	pm_runtime_put(&slave->dev);
 
 	return ret;
 }
@@ -623,6 +638,7 @@ err:
 
 static int sdw_assign_device_num(struct sdw_slave *slave)
 {
+	struct sdw_bus *bus = slave->bus;
 	int ret, dev_num;
 	bool new_device = false;
 
@@ -633,7 +649,7 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
 			dev_num = sdw_get_device_num(slave);
 			mutex_unlock(&slave->bus->bus_lock);
 			if (dev_num < 0) {
-				dev_err(slave->bus->dev, "Get dev_num failed: %d\n",
+				dev_err(bus->dev, "Get dev_num failed: %d\n",
 					dev_num);
 				return dev_num;
 			}
@@ -646,7 +662,7 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
 	}
 
 	if (!new_device)
-		dev_dbg(slave->bus->dev,
+		dev_dbg(bus->dev,
 			"Slave already registered, reusing dev_num:%d\n",
 			slave->dev_num);
 
@@ -656,7 +672,7 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
 
 	ret = sdw_write_no_pm(slave, SDW_SCP_DEVNUMBER, dev_num);
 	if (ret < 0) {
-		dev_err(&slave->dev, "Program device_num %d failed: %d\n",
+		dev_err(bus->dev, "Program device_num %d failed: %d\n",
 			dev_num, ret);
 		return ret;
 	}
@@ -679,9 +695,8 @@ void sdw_extract_slave_id(struct sdw_bus *bus,
 	id->class_id = SDW_CLASS_ID(addr);
 
 	dev_dbg(bus->dev,
-		"SDW Slave class_id %x, part_id %x, mfg_id %x, unique_id %x, version %x\n",
-				id->class_id, id->part_id, id->mfg_id,
-				id->unique_id, id->sdw_version);
+		"SDW Slave class_id 0x%02x, mfg_id 0x%04x, part_id 0x%04x, unique_id 0x%x, version 0x%x\n",
+		id->class_id, id->mfg_id, id->part_id, id->unique_id, id->sdw_version);
 }
 
 static int sdw_program_device_num(struct sdw_bus *bus)
@@ -735,7 +750,7 @@ static int sdw_program_device_num(struct sdw_bus *bus)
 				 */
 				ret = sdw_assign_device_num(slave);
 				if (ret) {
-					dev_err(slave->bus->dev,
+					dev_err(bus->dev,
 						"Assign dev_num failed:%d\n",
 						ret);
 					return ret;
@@ -775,15 +790,17 @@ static int sdw_program_device_num(struct sdw_bus *bus)
 static void sdw_modify_slave_status(struct sdw_slave *slave,
 				    enum sdw_slave_status status)
 {
-	mutex_lock(&slave->bus->bus_lock);
+	struct sdw_bus *bus = slave->bus;
 
-	dev_vdbg(&slave->dev,
+	mutex_lock(&bus->bus_lock);
+
+	dev_vdbg(bus->dev,
 		 "%s: changing status slave %d status %d new status %d\n",
 		 __func__, slave->dev_num, slave->status, status);
 
 	if (status == SDW_SLAVE_UNATTACHED) {
 		dev_dbg(&slave->dev,
-			"%s: initializing completion for Slave %d\n",
+			"%s: initializing enumeration and init completion for Slave %d\n",
 			__func__, slave->dev_num);
 
 		init_completion(&slave->enumeration_complete);
@@ -792,13 +809,13 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
 	} else if ((status == SDW_SLAVE_ATTACHED) &&
 		   (slave->status == SDW_SLAVE_UNATTACHED)) {
 		dev_dbg(&slave->dev,
-			"%s: signaling completion for Slave %d\n",
+			"%s: signaling enumeration completion for Slave %d\n",
 			__func__, slave->dev_num);
 
 		complete(&slave->enumeration_complete);
 	}
 	slave->status = status;
-	mutex_unlock(&slave->bus->bus_lock);
+	mutex_unlock(&bus->bus_lock);
 }
 
 static enum sdw_clk_stop_mode sdw_get_clk_stop_mode(struct sdw_slave *slave)
@@ -950,17 +967,17 @@ int sdw_bus_prep_clk_stop(struct sdw_bus *bus)
 			simple_clk_stop = false;
 	}
 
-	if (is_slave && !simple_clk_stop) {
+	/* Skip remaining clock stop preparation if no Slave is attached */
+	if (!is_slave)
+		return ret;
+
+	if (!simple_clk_stop) {
 		ret = sdw_bus_wait_for_clk_prep_deprep(bus,
 						       SDW_BROADCAST_DEV_NUM);
 		if (ret < 0)
 			return ret;
 	}
 
-	/* Don't need to inform slaves if there is no slave attached */
-	if (!is_slave)
-		return ret;
-
 	/* Inform slaves that prep is done */
 	list_for_each_entry(slave, &bus->slaves, node) {
 		if (!slave->dev_num)
@@ -1074,16 +1091,13 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus)
 				 "clk stop deprep failed:%d", ret);
 	}
 
-	if (is_slave && !simple_clk_stop)
-		sdw_bus_wait_for_clk_prep_deprep(bus, SDW_BROADCAST_DEV_NUM);
-
-	/*
-	 * Don't need to call slave callback function if there is no slave
-	 * attached
-	 */
+	/* Skip remaining clock stop de-preparation if no Slave is attached */
 	if (!is_slave)
 		return 0;
 
+	if (!simple_clk_stop)
+		sdw_bus_wait_for_clk_prep_deprep(bus, SDW_BROADCAST_DEV_NUM);
+
 	list_for_each_entry(slave, &bus->slaves, node) {
 		if (!slave->dev_num)
 			continue;
@@ -1127,7 +1141,7 @@ int sdw_configure_dpn_intr(struct sdw_slave *slave,
 
 	ret = sdw_update(slave, addr, (mask | SDW_DPN_INT_PORT_READY), val);
 	if (ret < 0)
-		dev_err(slave->bus->dev,
+		dev_err(&slave->dev,
 			"SDW_DPN_INTMASK write failed:%d\n", val);
 
 	return ret;
@@ -1210,7 +1224,7 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave)
 	}
 	scale_index++;
 
-	ret = sdw_write(slave, SDW_SCP_BUS_CLOCK_BASE, base);
+	ret = sdw_write_no_pm(slave, SDW_SCP_BUS_CLOCK_BASE, base);
 	if (ret < 0) {
 		dev_err(&slave->dev,
 			"SDW_SCP_BUS_CLOCK_BASE write failed:%d\n", ret);
@@ -1218,13 +1232,13 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave)
 	}
 
 	/* initialize scale for both banks */
-	ret = sdw_write(slave, SDW_SCP_BUSCLOCK_SCALE_B0, scale_index);
+	ret = sdw_write_no_pm(slave, SDW_SCP_BUSCLOCK_SCALE_B0, scale_index);
 	if (ret < 0) {
 		dev_err(&slave->dev,
 			"SDW_SCP_BUSCLOCK_SCALE_B0 write failed:%d\n", ret);
 		return ret;
 	}
-	ret = sdw_write(slave, SDW_SCP_BUSCLOCK_SCALE_B1, scale_index);
+	ret = sdw_write_no_pm(slave, SDW_SCP_BUSCLOCK_SCALE_B1, scale_index);
 	if (ret < 0)
 		dev_err(&slave->dev,
 			"SDW_SCP_BUSCLOCK_SCALE_B1 write failed:%d\n", ret);
@@ -1256,9 +1270,9 @@ static int sdw_initialize_slave(struct sdw_slave *slave)
 	val = slave->prop.scp_int1_mask;
 
 	/* Enable SCP interrupts */
-	ret = sdw_update(slave, SDW_SCP_INTMASK1, val, val);
+	ret = sdw_update_no_pm(slave, SDW_SCP_INTMASK1, val, val);
 	if (ret < 0) {
-		dev_err(slave->bus->dev,
+		dev_err(&slave->dev,
 			"SDW_SCP_INTMASK1 write failed:%d\n", ret);
 		return ret;
 	}
@@ -1271,9 +1285,9 @@ static int sdw_initialize_slave(struct sdw_slave *slave)
 	val = prop->dp0_prop->imp_def_interrupts;
 	val |= SDW_DP0_INT_PORT_READY | SDW_DP0_INT_BRA_FAILURE;
 
-	ret = sdw_update(slave, SDW_DP0_INTMASK, val, val);
+	ret = sdw_update_no_pm(slave, SDW_DP0_INTMASK, val, val);
 	if (ret < 0)
-		dev_err(slave->bus->dev,
+		dev_err(&slave->dev,
 			"SDW_DP0_INTMASK read failed:%d\n", ret);
 	return ret;
 }
@@ -1283,9 +1297,9 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status)
 	u8 clear, impl_int_mask;
 	int status, status2, ret, count = 0;
 
-	status = sdw_read(slave, SDW_DP0_INT);
+	status = sdw_read_no_pm(slave, SDW_DP0_INT);
 	if (status < 0) {
-		dev_err(slave->bus->dev,
+		dev_err(&slave->dev,
 			"SDW_DP0_INT read failed:%d\n", status);
 		return status;
 	}
@@ -1322,17 +1336,17 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status)
 		}
 
 		/* clear the interrupts but don't touch reserved and SDCA_CASCADE fields */
-		ret = sdw_write(slave, SDW_DP0_INT, clear);
+		ret = sdw_write_no_pm(slave, SDW_DP0_INT, clear);
 		if (ret < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_DP0_INT write failed:%d\n", ret);
 			return ret;
 		}
 
 		/* Read DP0 interrupt again */
-		status2 = sdw_read(slave, SDW_DP0_INT);
+		status2 = sdw_read_no_pm(slave, SDW_DP0_INT);
 		if (status2 < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_DP0_INT read failed:%d\n", status2);
 			return status2;
 		}
@@ -1345,7 +1359,7 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status)
 	} while ((status & SDW_DP0_INTERRUPTS) && (count < SDW_READ_INTR_CLEAR_RETRY));
 
 	if (count == SDW_READ_INTR_CLEAR_RETRY)
-		dev_warn(slave->bus->dev, "Reached MAX_RETRY on DP0 read\n");
+		dev_warn(&slave->dev, "Reached MAX_RETRY on DP0 read\n");
 
 	return ret;
 }
@@ -1361,9 +1375,9 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
 		return sdw_handle_dp0_interrupt(slave, slave_status);
 
 	addr = SDW_DPN_INT(port);
-	status = sdw_read(slave, addr);
+	status = sdw_read_no_pm(slave, addr);
 	if (status < 0) {
-		dev_err(slave->bus->dev,
+		dev_err(&slave->dev,
 			"SDW_DPN_INT read failed:%d\n", status);
 
 		return status;
@@ -1395,17 +1409,17 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
 		}
 
 		/* clear the interrupt but don't touch reserved fields */
-		ret = sdw_write(slave, addr, clear);
+		ret = sdw_write_no_pm(slave, addr, clear);
 		if (ret < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_DPN_INT write failed:%d\n", ret);
 			return ret;
 		}
 
 		/* Read DPN interrupt again */
-		status2 = sdw_read(slave, addr);
+		status2 = sdw_read_no_pm(slave, addr);
 		if (status2 < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_DPN_INT read failed:%d\n", status2);
 			return status2;
 		}
@@ -1418,7 +1432,7 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
 	} while ((status & SDW_DPN_INTERRUPTS) && (count < SDW_READ_INTR_CLEAR_RETRY));
 
 	if (count == SDW_READ_INTR_CLEAR_RETRY)
-		dev_warn(slave->bus->dev, "Reached MAX_RETRY on port read");
+		dev_warn(&slave->dev, "Reached MAX_RETRY on port read");
 
 	return ret;
 }
@@ -1440,30 +1454,30 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
 	ret = pm_runtime_get_sync(&slave->dev);
 	if (ret < 0 && ret != -EACCES) {
 		dev_err(&slave->dev, "Failed to resume device: %d\n", ret);
-		pm_runtime_put_noidle(slave->bus->dev);
+		pm_runtime_put_noidle(&slave->dev);
 		return ret;
 	}
 
 	/* Read Intstat 1, Intstat 2 and Intstat 3 registers */
-	ret = sdw_read(slave, SDW_SCP_INT1);
+	ret = sdw_read_no_pm(slave, SDW_SCP_INT1);
 	if (ret < 0) {
-		dev_err(slave->bus->dev,
+		dev_err(&slave->dev,
 			"SDW_SCP_INT1 read failed:%d\n", ret);
 		goto io_err;
 	}
 	buf = ret;
 
-	ret = sdw_nread(slave, SDW_SCP_INTSTAT2, 2, buf2);
+	ret = sdw_nread_no_pm(slave, SDW_SCP_INTSTAT2, 2, buf2);
 	if (ret < 0) {
-		dev_err(slave->bus->dev,
+		dev_err(&slave->dev,
 			"SDW_SCP_INT2/3 read failed:%d\n", ret);
 		goto io_err;
 	}
 
 	if (slave->prop.is_sdca) {
-		ret = sdw_read(slave, SDW_DP0_INT);
+		ret = sdw_read_no_pm(slave, SDW_DP0_INT);
 		if (ret < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_DP0_INT read failed:%d\n", ret);
 			goto io_err;
 		}
@@ -1558,9 +1572,9 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
 		}
 
 		/* Ack interrupt */
-		ret = sdw_write(slave, SDW_SCP_INT1, clear);
+		ret = sdw_write_no_pm(slave, SDW_SCP_INT1, clear);
 		if (ret < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_SCP_INT1 write failed:%d\n", ret);
 			goto io_err;
 		}
@@ -1572,25 +1586,25 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
 		 * Read status again to ensure no new interrupts arrived
 		 * while servicing interrupts.
 		 */
-		ret = sdw_read(slave, SDW_SCP_INT1);
+		ret = sdw_read_no_pm(slave, SDW_SCP_INT1);
 		if (ret < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_SCP_INT1 read failed:%d\n", ret);
 			goto io_err;
 		}
 		_buf = ret;
 
-		ret = sdw_nread(slave, SDW_SCP_INTSTAT2, 2, _buf2);
+		ret = sdw_nread_no_pm(slave, SDW_SCP_INTSTAT2, 2, _buf2);
 		if (ret < 0) {
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"SDW_SCP_INT2/3 read failed:%d\n", ret);
 			goto io_err;
 		}
 
 		if (slave->prop.is_sdca) {
-			ret = sdw_read(slave, SDW_DP0_INT);
+			ret = sdw_read_no_pm(slave, SDW_DP0_INT);
 			if (ret < 0) {
-				dev_err(slave->bus->dev,
+				dev_err(&slave->dev,
 					"SDW_DP0_INT read failed:%d\n", ret);
 				goto io_err;
 			}
@@ -1616,7 +1630,7 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
 	} while (stat != 0 && count < SDW_READ_INTR_CLEAR_RETRY);
 
 	if (count == SDW_READ_INTR_CLEAR_RETRY)
-		dev_warn(slave->bus->dev, "Reached MAX_RETRY on alert read\n");
+		dev_warn(&slave->dev, "Reached MAX_RETRY on alert read\n");
 
 io_err:
 	pm_runtime_mark_last_busy(&slave->dev);
@@ -1722,7 +1736,7 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
 		case SDW_SLAVE_ALERT:
 			ret = sdw_handle_slave_alerts(slave);
 			if (ret)
-				dev_err(bus->dev,
+				dev_err(&slave->dev,
 					"Slave %d alert handling failed: %d\n",
 					i, ret);
 			break;
@@ -1741,24 +1755,29 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
 
 			ret = sdw_initialize_slave(slave);
 			if (ret)
-				dev_err(bus->dev,
+				dev_err(&slave->dev,
 					"Slave %d initialization failed: %d\n",
 					i, ret);
 
 			break;
 
 		default:
-			dev_err(bus->dev, "Invalid slave %d status:%d\n",
+			dev_err(&slave->dev, "Invalid slave %d status:%d\n",
 				i, status[i]);
 			break;
 		}
 
 		ret = sdw_update_slave_status(slave, status[i]);
 		if (ret)
-			dev_err(slave->bus->dev,
+			dev_err(&slave->dev,
 				"Update Slave status failed:%d\n", ret);
-		if (attached_initializing)
+		if (attached_initializing) {
+			dev_dbg(&slave->dev,
+				"%s: signaling initialization completion for Slave %d\n",
+				__func__, slave->dev_num);
+
 			complete(&slave->initialization_complete);
+		}
 	}
 
 	return ret;
diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index 9fa55164354a..d05442e646a3 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -188,7 +188,7 @@ MODULE_PARM_DESC(cdns_mcp_int_mask, "Cadence MCP IntMask");
 #define CDNS_PDI_CONFIG_PORT			GENMASK(4, 0)
 
 /* Driver defaults */
-#define CDNS_TX_TIMEOUT				2000
+#define CDNS_TX_TIMEOUT				500
 
 #define CDNS_SCP_RX_FIFOLEVEL			0x2
 
@@ -483,11 +483,11 @@ cdns_fill_msg_resp(struct sdw_cdns *cdns,
 	for (i = 0; i < count; i++) {
 		if (!(cdns->response_buf[i] & CDNS_MCP_RESP_ACK)) {
 			no_ack = 1;
-			dev_dbg_ratelimited(cdns->dev, "Msg Ack not received\n");
-			if (cdns->response_buf[i] & CDNS_MCP_RESP_NACK) {
-				nack = 1;
-				dev_err_ratelimited(cdns->dev, "Msg NACK received\n");
-			}
+			dev_vdbg(cdns->dev, "Msg Ack not received, cmd %d\n", i);
+		}
+		if (cdns->response_buf[i] & CDNS_MCP_RESP_NACK) {
+			nack = 1;
+			dev_err_ratelimited(cdns->dev, "Msg NACK received, cmd %d\n", i);
 		}
 	}
 
@@ -734,21 +734,18 @@ static void cdns_read_response(struct sdw_cdns *cdns)
 }
 
 static int cdns_update_slave_status(struct sdw_cdns *cdns,
-				    u32 slave0, u32 slave1)
+				    u64 slave_intstat)
 {
 	enum sdw_slave_status status[SDW_MAX_DEVICES + 1];
 	bool is_slave = false;
-	u64 slave;
 	u32 mask;
 	int i, set_status;
 
-	/* combine the two status */
-	slave = ((u64)slave1 << 32) | slave0;
 	memset(status, 0, sizeof(status));
 
 	for (i = 0; i <= SDW_MAX_DEVICES; i++) {
-		mask = (slave >> (i * CDNS_MCP_SLAVE_STATUS_NUM)) &
-				CDNS_MCP_SLAVE_STATUS_BITS;
+		mask = (slave_intstat >> (i * CDNS_MCP_SLAVE_STATUS_NUM)) &
+			CDNS_MCP_SLAVE_STATUS_BITS;
 		if (!mask)
 			continue;
 
@@ -918,13 +915,17 @@ static void cdns_update_slave_status_work(struct work_struct *work)
 	struct sdw_cdns *cdns =
 		container_of(work, struct sdw_cdns, work);
 	u32 slave0, slave1;
-
-	dev_dbg_ratelimited(cdns->dev, "Slave status change\n");
+	u64 slave_intstat;
 
 	slave0 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT0);
 	slave1 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT1);
 
-	cdns_update_slave_status(cdns, slave0, slave1);
+	/* combine the two status */
+	slave_intstat = ((u64)slave1 << 32) | slave0;
+
+	dev_dbg_ratelimited(cdns->dev, "Slave status change: 0x%llx\n", slave_intstat);
+
+	cdns_update_slave_status(cdns, slave_intstat);
 	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
 	cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);
 
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 66adb258a425..a2d5cdaa9998 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -967,7 +967,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
 	}
 
 	/* Port configuration */
-	pconfig = kcalloc(1, sizeof(*pconfig), GFP_KERNEL);
+	pconfig = kzalloc(sizeof(*pconfig), GFP_KERNEL);
 	if (!pconfig) {
 		ret =  -ENOMEM;
 		goto error;
@@ -1673,10 +1673,12 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev)
 
 	} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET ||
 		   !clock_stop_quirks) {
+		bool wake_enable = true;
+
 		ret = sdw_cdns_clock_stop(cdns, true);
 		if (ret < 0) {
 			dev_err(dev, "cannot enable clock stop on suspend\n");
-			return ret;
+			wake_enable = false;
 		}
 
 		ret = sdw_cdns_enable_interrupt(cdns, false);
@@ -1691,7 +1693,7 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev)
 			return ret;
 		}
 
-		intel_shim_wake(sdw, true);
+		intel_shim_wake(sdw, wake_enable);
 	} else {
 		dev_err(dev, "%s clock_stop_quirks %x unsupported\n",
 			__func__, clock_stop_quirks);
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index cabdadb09a1b..bc8520eb385e 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -405,11 +405,12 @@ int sdw_intel_acpi_scan(acpi_handle *parent_handle,
 {
 	acpi_status status;
 
+	info->handle = NULL;
 	status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
 				     parent_handle, 1,
 				     sdw_intel_acpi_cb,
 				     NULL, info, NULL);
-	if (ACPI_FAILURE(status))
+	if (ACPI_FAILURE(status) || info->handle == NULL)
 		return -ENODEV;
 
 	return sdw_intel_scan_controller(info);
diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c
index a08f4081c1c4..180f38bd003b 100644
--- a/drivers/soundwire/slave.c
+++ b/drivers/soundwire/slave.c
@@ -163,15 +163,13 @@ int sdw_acpi_find_slaves(struct sdw_bus *bus)
 
 			if (id.unique_id != id2.unique_id) {
 				dev_dbg(bus->dev,
-					"Valid unique IDs %x %x for Slave mfg %x part %d\n",
-					id.unique_id, id2.unique_id,
-					id.mfg_id, id.part_id);
+					"Valid unique IDs 0x%x 0x%x for Slave mfg_id 0x%04x, part_id 0x%04x\n",
+					id.unique_id, id2.unique_id, id.mfg_id, id.part_id);
 				ignore_unique_id = false;
 			} else {
 				dev_err(bus->dev,
-					"Invalid unique IDs %x %x for Slave mfg %x part %d\n",
-					id.unique_id, id2.unique_id,
-					id.mfg_id, id.part_id);
+					"Invalid unique IDs 0x%x 0x%x for Slave mfg_id 0x%04x, part_id 0x%04x\n",
+					id.unique_id, id2.unique_id, id.mfg_id, id.part_id);
 				return -ENODEV;
 			}
 		}
diff --git a/drivers/soundwire/sysfs_slave.c b/drivers/soundwire/sysfs_slave.c
index b48b6617a396..3210359cd944 100644
--- a/drivers/soundwire/sysfs_slave.c
+++ b/drivers/soundwire/sysfs_slave.c
@@ -130,7 +130,7 @@ static struct attribute *slave_dev_attrs[] = {
  * we don't use ATTRIBUTES_GROUP here since we want to add a subdirectory
  * for device-level properties
  */
-static struct attribute_group sdw_slave_dev_attr_group = {
+static const struct attribute_group sdw_slave_dev_attr_group = {
 	.attrs	= slave_dev_attrs,
 	.name = "dev-properties",
 };
diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index de844b412110..bbbd311eda03 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2012-2015, 2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2012-2015, 2017, 2021, The Linux Foundation. All rights reserved.
  */
 #include <linux/bitmap.h>
 #include <linux/delay.h>
@@ -505,8 +505,7 @@ static void cleanup_irq(struct spmi_pmic_arb *pmic_arb, u16 apid, int id)
 static void periph_interrupt(struct spmi_pmic_arb *pmic_arb, u16 apid)
 {
 	unsigned int irq;
-	u32 status;
-	int id;
+	u32 status, id;
 	u8 sid = (pmic_arb->apid_data[apid].ppid >> 8) & 0xF;
 	u8 per = pmic_arb->apid_data[apid].ppid & 0xFF;
 
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index fd0ea4dbcb91..35d7260e2271 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -689,7 +689,7 @@ err_dev:
 	return err;
 }
 
-static int vme_user_remove(struct vme_dev *dev)
+static void vme_user_remove(struct vme_dev *dev)
 {
 	int i;
 
@@ -717,8 +717,6 @@ static int vme_user_remove(struct vme_dev *dev)
 
 	/* Unregister the major and minor device numbers */
 	unregister_chrdev_region(MKDEV(VME_MAJOR, 0), VME_DEVS);
-
-	return 0;
 }
 
 static struct vme_driver vme_user_driver = {
diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
index b3ccae932660..730de6bf048b 100644
--- a/drivers/tty/Makefile
+++ b/drivers/tty/Makefile
@@ -9,8 +9,6 @@ obj-$(CONFIG_AUDIT)		+= tty_audit.o
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
 obj-$(CONFIG_N_HDLC)		+= n_hdlc.o
 obj-$(CONFIG_N_GSM)		+= n_gsm.o
-obj-$(CONFIG_TRACE_ROUTER)	+= n_tracerouter.o
-obj-$(CONFIG_TRACE_SINK)	+= n_tracesink.o
 obj-$(CONFIG_R3964)		+= n_r3964.o
 
 obj-y				+= vt/
diff --git a/drivers/tty/n_tracerouter.c b/drivers/tty/n_tracerouter.c
deleted file mode 100644
index 3490ed51b1a3..000000000000
--- a/drivers/tty/n_tracerouter.c
+++ /dev/null
@@ -1,235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  n_tracerouter.c - Trace data router through tty space
- *
- *  Copyright (C) Intel 2011
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This trace router uses the Linux line discipline framework to route
- * trace data coming from a HW Modem to a PTI (Parallel Trace Module) port.
- * The solution is not specific to a HW modem and this line disciple can
- * be used to route any stream of data in kernel space.
- * This is part of a solution for the P1149.7, compact JTAG, standard.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <linux/tty.h>
-#include <linux/tty_ldisc.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/bug.h>
-#include "n_tracesink.h"
-
-/*
- * Other ldisc drivers use 65536 which basically means,
- * 'I can always accept 64k' and flow control is off.
- * This number is deemed appropriate for this driver.
- */
-#define RECEIVE_ROOM	65536
-#define DRIVERNAME	"n_tracerouter"
-
-/*
- * struct to hold private configuration data for this ldisc.
- * opencalled is used to hold if this ldisc has been opened.
- * kref_tty holds the tty reference the ldisc sits on top of.
- */
-struct tracerouter_data {
-	u8 opencalled;
-	struct tty_struct *kref_tty;
-};
-static struct tracerouter_data *tr_data;
-
-/* lock for when tty reference is being used */
-static DEFINE_MUTEX(routelock);
-
-/**
- * n_tracerouter_open() - Called when a tty is opened by a SW entity.
- * @tty: terminal device to the ldisc.
- *
- * Return:
- *      0 for success.
- *
- * Caveats: This should only be opened one time per SW entity.
- */
-static int n_tracerouter_open(struct tty_struct *tty)
-{
-	int retval = -EEXIST;
-
-	mutex_lock(&routelock);
-	if (tr_data->opencalled == 0) {
-
-		tr_data->kref_tty = tty_kref_get(tty);
-		if (tr_data->kref_tty == NULL) {
-			retval = -EFAULT;
-		} else {
-			tr_data->opencalled = 1;
-			tty->disc_data      = tr_data;
-			tty->receive_room   = RECEIVE_ROOM;
-			tty_driver_flush_buffer(tty);
-			retval = 0;
-		}
-	}
-	mutex_unlock(&routelock);
-	return retval;
-}
-
-/**
- * n_tracerouter_close() - close connection
- * @tty: terminal device to the ldisc.
- *
- * Called when a software entity wants to close a connection.
- */
-static void n_tracerouter_close(struct tty_struct *tty)
-{
-	struct tracerouter_data *tptr = tty->disc_data;
-
-	mutex_lock(&routelock);
-	WARN_ON(tptr->kref_tty != tr_data->kref_tty);
-	tty_driver_flush_buffer(tty);
-	tty_kref_put(tr_data->kref_tty);
-	tr_data->kref_tty = NULL;
-	tr_data->opencalled = 0;
-	tty->disc_data = NULL;
-	mutex_unlock(&routelock);
-}
-
-/**
- * n_tracerouter_read() - read request from user space
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * function that allows read() functionality in userspace. By default if this
- * is not implemented it returns -EIO. This module is functioning like a
- * router via n_tracerouter_receivebuf(), and there is no real requirement
- * to implement this function. However, an error return value other than
- * -EIO should be used just to show that there was an intent not to have
- * this function implemented.  Return value based on read() man pages.
- *
- * Return:
- *	 -EINVAL
- */
-static ssize_t n_tracerouter_read(struct tty_struct *tty, struct file *file,
-				  unsigned char *buf, size_t nr,
-				  void **cookie, unsigned long offset)
-{
-	return -EINVAL;
-}
-
-/**
- * n_tracerouter_write() - Function that allows write() in userspace.
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * By default if this is not implemented, it returns -EIO.
- * This should not be implemented, ever, because
- * 1. this driver is functioning like a router via
- *    n_tracerouter_receivebuf()
- * 2. No writes to HW will ever go through this line discpline driver.
- * However, an error return value other than -EIO should be used
- * just to show that there was an intent not to have this function
- * implemented.  Return value based on write() man pages.
- *
- * Return:
- *	-EINVAL
- */
-static ssize_t n_tracerouter_write(struct tty_struct *tty, struct file *file,
-				   const unsigned char *buf, size_t nr) {
-	return -EINVAL;
-}
-
-/**
- * n_tracerouter_receivebuf() - Routing function for driver.
- * @tty: terminal device passed into the ldisc.  It's assumed
- *       tty will never be NULL.
- * @cp:  buffer, block of characters to be eventually read by
- *       someone, somewhere (user read() call or some kernel function).
- * @fp:  flag buffer.
- * @count: number of characters (aka, bytes) in cp.
- *
- * This function takes the input buffer, cp, and passes it to
- * an external API function for processing.
- */
-static void n_tracerouter_receivebuf(struct tty_struct *tty,
-					const unsigned char *cp,
-					char *fp, int count)
-{
-	mutex_lock(&routelock);
-	n_tracesink_datadrain((u8 *) cp, count);
-	mutex_unlock(&routelock);
-}
-
-/*
- * Flush buffer is not impelemented as the ldisc has no internal buffering
- * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
- */
-
-static struct tty_ldisc_ops tty_ptirouter_ldisc = {
-	.owner		= THIS_MODULE,
-	.magic		= TTY_LDISC_MAGIC,
-	.name		= DRIVERNAME,
-	.open		= n_tracerouter_open,
-	.close		= n_tracerouter_close,
-	.read		= n_tracerouter_read,
-	.write		= n_tracerouter_write,
-	.receive_buf	= n_tracerouter_receivebuf
-};
-
-/**
- * n_tracerouter_init -	module initialisation
- *
- * Registers this module as a line discipline driver.
- *
- * Return:
- *	0 for success, any other value error.
- */
-static int __init n_tracerouter_init(void)
-{
-	int retval;
-
-	tr_data = kzalloc(sizeof(struct tracerouter_data), GFP_KERNEL);
-	if (tr_data == NULL)
-		return -ENOMEM;
-
-
-	/* Note N_TRACEROUTER is defined in linux/tty.h */
-	retval = tty_register_ldisc(N_TRACEROUTER, &tty_ptirouter_ldisc);
-	if (retval < 0) {
-		pr_err("%s: Registration failed: %d\n", __func__, retval);
-		kfree(tr_data);
-	}
-	return retval;
-}
-
-/**
- * n_tracerouter_exit -	module unload
- *
- * Removes this module as a line discipline driver.
- */
-static void __exit n_tracerouter_exit(void)
-{
-	int retval = tty_unregister_ldisc(N_TRACEROUTER);
-
-	if (retval < 0)
-		pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
-	else
-		kfree(tr_data);
-}
-
-module_init(n_tracerouter_init);
-module_exit(n_tracerouter_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jay Freyensee");
-MODULE_ALIAS_LDISC(N_TRACEROUTER);
-MODULE_DESCRIPTION("Trace router ldisc driver");
diff --git a/drivers/tty/n_tracesink.c b/drivers/tty/n_tracesink.c
deleted file mode 100644
index 1d9931041fd8..000000000000
--- a/drivers/tty/n_tracesink.c
+++ /dev/null
@@ -1,230 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  n_tracesink.c - Trace data router and sink path through tty space.
- *
- *  Copyright (C) Intel 2011
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The trace sink uses the Linux line discipline framework to receive
- * trace data coming from the PTI source line discipline driver
- * to a user-desired tty port, like USB.
- * This is to provide a way to extract modem trace data on
- * devices that do not have a PTI HW module, or just need modem
- * trace data to come out of a different HW output port.
- * This is part of a solution for the P1149.7, compact JTAG, standard.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <linux/tty.h>
-#include <linux/tty_ldisc.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/bug.h>
-#include "n_tracesink.h"
-
-/*
- * Other ldisc drivers use 65536 which basically means,
- * 'I can always accept 64k' and flow control is off.
- * This number is deemed appropriate for this driver.
- */
-#define RECEIVE_ROOM	65536
-#define DRIVERNAME	"n_tracesink"
-
-/*
- * there is a quirk with this ldisc is he can write data
- * to a tty from anyone calling his kernel API, which
- * meets customer requirements in the drivers/misc/pti.c
- * project.  So he needs to know when he can and cannot write when
- * the API is called. In theory, the API can be called
- * after an init() but before a successful open() which
- * would crash the system if tty is not checked.
- */
-static struct tty_struct *this_tty;
-static DEFINE_MUTEX(writelock);
-
-/**
- * n_tracesink_open() - Called when a tty is opened by a SW entity.
- * @tty: terminal device to the ldisc.
- *
- * Return:
- *      0 for success,
- *      -EFAULT = couldn't get a tty kref n_tracesink will sit
- *       on top of
- *      -EEXIST = open() called successfully once and it cannot
- *      be called again.
- *
- * Caveats: open() should only be successful the first time a
- * SW entity calls it.
- */
-static int n_tracesink_open(struct tty_struct *tty)
-{
-	int retval = -EEXIST;
-
-	mutex_lock(&writelock);
-	if (this_tty == NULL) {
-		this_tty = tty_kref_get(tty);
-		if (this_tty == NULL) {
-			retval = -EFAULT;
-		} else {
-			tty->disc_data = this_tty;
-			tty_driver_flush_buffer(tty);
-			retval = 0;
-		}
-	}
-	mutex_unlock(&writelock);
-
-	return retval;
-}
-
-/**
- * n_tracesink_close() - close connection
- * @tty: terminal device to the ldisc.
- *
- * Called when a software entity wants to close a connection.
- */
-static void n_tracesink_close(struct tty_struct *tty)
-{
-	mutex_lock(&writelock);
-	tty_driver_flush_buffer(tty);
-	tty_kref_put(this_tty);
-	this_tty = NULL;
-	tty->disc_data = NULL;
-	mutex_unlock(&writelock);
-}
-
-/**
- * n_tracesink_read() - read request from user space
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * function that allows read() functionality in userspace. By default if this
- * is not implemented it returns -EIO. This module is functioning like a
- * router via n_tracesink_receivebuf(), and there is no real requirement
- * to implement this function. However, an error return value other than
- * -EIO should be used just to show that there was an intent not to have
- * this function implemented.  Return value based on read() man pages.
- *
- * Return:
- *	 -EINVAL
- */
-static ssize_t n_tracesink_read(struct tty_struct *tty, struct file *file,
-				unsigned char *buf, size_t nr,
-				void **cookie, unsigned long offset)
-{
-	return -EINVAL;
-}
-
-/**
- * n_tracesink_write() - Function that allows write() in userspace.
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * By default if this is not implemented, it returns -EIO.
- * This should not be implemented, ever, because
- * 1. this driver is functioning like a router via
- *    n_tracesink_receivebuf()
- * 2. No writes to HW will ever go through this line discpline driver.
- * However, an error return value other than -EIO should be used
- * just to show that there was an intent not to have this function
- * implemented.  Return value based on write() man pages.
- *
- * Return:
- *	-EINVAL
- */
-static ssize_t n_tracesink_write(struct tty_struct *tty, struct file *file,
-				 const unsigned char *buf, size_t nr) {
-	return -EINVAL;
-}
-
-/**
- * n_tracesink_datadrain() - Kernel API function used to route
- *			     trace debugging data to user-defined
- *			     port like USB.
- *
- * @buf:   Trace debuging data buffer to write to tty target
- *         port. Null value will return with no write occurring.
- * @count: Size of buf. Value of 0 or a negative number will
- *         return with no write occuring.
- *
- * Caveat: If this line discipline does not set the tty it sits
- * on top of via an open() call, this API function will not
- * call the tty's write() call because it will have no pointer
- * to call the write().
- */
-void n_tracesink_datadrain(u8 *buf, int count)
-{
-	mutex_lock(&writelock);
-
-	if ((buf != NULL) && (count > 0) && (this_tty != NULL))
-		this_tty->ops->write(this_tty, buf, count);
-
-	mutex_unlock(&writelock);
-}
-EXPORT_SYMBOL_GPL(n_tracesink_datadrain);
-
-/*
- * Flush buffer is not impelemented as the ldisc has no internal buffering
- * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
- */
-
-/*
- * tty_ldisc function operations for this driver.
- */
-static struct tty_ldisc_ops tty_n_tracesink = {
-	.owner		= THIS_MODULE,
-	.magic		= TTY_LDISC_MAGIC,
-	.name		= DRIVERNAME,
-	.open		= n_tracesink_open,
-	.close		= n_tracesink_close,
-	.read		= n_tracesink_read,
-	.write		= n_tracesink_write
-};
-
-/**
- * n_tracesink_init-	module initialisation
- *
- * Registers this module as a line discipline driver.
- *
- * Return:
- *	0 for success, any other value error.
- */
-static int __init n_tracesink_init(void)
-{
-	/* Note N_TRACESINK is defined in linux/tty.h */
-	int retval = tty_register_ldisc(N_TRACESINK, &tty_n_tracesink);
-
-	if (retval < 0)
-		pr_err("%s: Registration failed: %d\n", __func__, retval);
-
-	return retval;
-}
-
-/**
- * n_tracesink_exit -	module unload
- *
- * Removes this module as a line discipline driver.
- */
-static void __exit n_tracesink_exit(void)
-{
-	int retval = tty_unregister_ldisc(N_TRACESINK);
-
-	if (retval < 0)
-		pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
-}
-
-module_init(n_tracesink_init);
-module_exit(n_tracesink_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jay Freyensee");
-MODULE_ALIAS_LDISC(N_TRACESINK);
-MODULE_DESCRIPTION("Trace sink ldisc driver");
diff --git a/drivers/tty/n_tracesink.h b/drivers/tty/n_tracesink.h
deleted file mode 100644
index 7031d515a700..000000000000
--- a/drivers/tty/n_tracesink.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  n_tracesink.h - Kernel driver API to route trace data in kernel space.
- *
- *  Copyright (C) Intel 2011
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The PTI (Parallel Trace Interface) driver directs trace data routed from
- * various parts in the system out through the Intel Penwell PTI port and
- * out of the mobile device for analysis with a debugging tool
- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
- * compact JTAG, standard.
- *
- * This header file is used by n_tracerouter to be able to send the
- * data of it's tty port to the tty port this module sits.  This
- * mechanism can also be used independent of the PTI module.
- *
- */
-
-#ifndef N_TRACESINK_H_
-#define N_TRACESINK_H_
-
-void n_tracesink_datadrain(u8 *buf, int count);
-
-#endif
diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index b8e44d16279f..c7d681fef198 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -92,7 +92,7 @@ static int probe(struct pci_dev *pdev,
 	gdev->info.version = DRIVER_VERSION;
 	gdev->info.release = release;
 	gdev->pdev = pdev;
-	if (pdev->irq) {
+	if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
 		gdev->info.irq = pdev->irq;
 		gdev->info.irq_flags = IRQF_SHARED;
 		gdev->info.handler = irqhandler;
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
index 80c5f9c16ec1..8061e8ef449f 100644
--- a/drivers/virt/Kconfig
+++ b/drivers/virt/Kconfig
@@ -34,4 +34,6 @@ config FSL_HV_MANAGER
 source "drivers/virt/vboxguest/Kconfig"
 
 source "drivers/virt/nitro_enclaves/Kconfig"
+
+source "drivers/virt/acrn/Kconfig"
 endif
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
index f28425ce4b39..3e272ea60cd9 100644
--- a/drivers/virt/Makefile
+++ b/drivers/virt/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_FSL_HV_MANAGER)	+= fsl_hypervisor.o
 obj-y				+= vboxguest/
 
 obj-$(CONFIG_NITRO_ENCLAVES)	+= nitro_enclaves/
+obj-$(CONFIG_ACRN_HSM)		+= acrn/
diff --git a/drivers/virt/acrn/Kconfig b/drivers/virt/acrn/Kconfig
new file mode 100644
index 000000000000..3e1a61c9d8d8
--- /dev/null
+++ b/drivers/virt/acrn/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config ACRN_HSM
+	tristate "ACRN Hypervisor Service Module"
+	depends on ACRN_GUEST
+	select EVENTFD
+	help
+	  ACRN Hypervisor Service Module (HSM) is a kernel module which
+	  communicates with ACRN userspace through ioctls and talks to
+	  the ACRN Hypervisor through hypercalls. HSM will only run in
+	  a privileged management VM, called Service VM, to manage User
+	  VMs and do I/O emulation. Not required for simply running
+	  under ACRN as a User VM.
+
+	  To compile as a module, choose M, the module will be called
+	  acrn. If unsure, say N.
diff --git a/drivers/virt/acrn/Makefile b/drivers/virt/acrn/Makefile
new file mode 100644
index 000000000000..08ce641dcfa1
--- /dev/null
+++ b/drivers/virt/acrn/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ACRN_HSM)	:= acrn.o
+acrn-y := hsm.o vm.o mm.o ioreq.o ioeventfd.o irqfd.o
diff --git a/drivers/virt/acrn/acrn_drv.h b/drivers/virt/acrn/acrn_drv.h
new file mode 100644
index 000000000000..1be54efa666c
--- /dev/null
+++ b/drivers/virt/acrn/acrn_drv.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ACRN_HSM_DRV_H
+#define __ACRN_HSM_DRV_H
+
+#include <linux/acrn.h>
+#include <linux/dev_printk.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include "hypercall.h"
+
+extern struct miscdevice acrn_dev;
+
+#define ACRN_NAME_LEN		16
+#define ACRN_MEM_MAPPING_MAX	256
+
+#define ACRN_MEM_REGION_ADD	0
+#define ACRN_MEM_REGION_DEL	2
+
+struct acrn_vm;
+struct acrn_ioreq_client;
+
+/**
+ * struct vm_memory_region_op - Hypervisor memory operation
+ * @type:		Operation type (ACRN_MEM_REGION_*)
+ * @attr:		Memory attribute (ACRN_MEM_TYPE_* | ACRN_MEM_ACCESS_*)
+ * @user_vm_pa:		Physical address of User VM to be mapped.
+ * @service_vm_pa:	Physical address of Service VM to be mapped.
+ * @size:		Size of this region.
+ *
+ * Structure containing needed information that is provided to ACRN Hypervisor
+ * to manage the EPT mappings of a single memory region of the User VM. Several
+ * &struct vm_memory_region_op can be batched to ACRN Hypervisor, see &struct
+ * vm_memory_region_batch.
+ */
+struct vm_memory_region_op {
+	u32	type;
+	u32	attr;
+	u64	user_vm_pa;
+	u64	service_vm_pa;
+	u64	size;
+};
+
+/**
+ * struct vm_memory_region_batch - A batch of vm_memory_region_op.
+ * @vmid:		A User VM ID.
+ * @reserved:		Reserved.
+ * @regions_num:	The number of vm_memory_region_op.
+ * @regions_gpa:	Physical address of a vm_memory_region_op array.
+ *
+ * HC_VM_SET_MEMORY_REGIONS uses this structure to manage EPT mappings of
+ * multiple memory regions of a User VM. A &struct vm_memory_region_batch
+ * contains multiple &struct vm_memory_region_op for batch processing in the
+ * ACRN Hypervisor.
+ */
+struct vm_memory_region_batch {
+	u16	vmid;
+	u16	reserved[3];
+	u32	regions_num;
+	u64	regions_gpa;
+};
+
+/**
+ * struct vm_memory_mapping - Memory map between a User VM and the Service VM
+ * @pages:		Pages in Service VM kernel.
+ * @npages:		Number of pages.
+ * @service_vm_va:	Virtual address in Service VM kernel.
+ * @user_vm_pa:		Physical address in User VM.
+ * @size:		Size of this memory region.
+ *
+ * HSM maintains memory mappings between a User VM GPA and the Service VM
+ * kernel VA for accelerating the User VM GPA translation.
+ */
+struct vm_memory_mapping {
+	struct page	**pages;
+	int		npages;
+	void		*service_vm_va;
+	u64		user_vm_pa;
+	size_t		size;
+};
+
+/**
+ * struct acrn_ioreq_buffer - Data for setting the ioreq buffer of User VM
+ * @ioreq_buf:	The GPA of the IO request shared buffer of a VM
+ *
+ * The parameter for the HC_SET_IOREQ_BUFFER hypercall used to set up
+ * the shared I/O request buffer between Service VM and ACRN hypervisor.
+ */
+struct acrn_ioreq_buffer {
+	u64	ioreq_buf;
+};
+
+struct acrn_ioreq_range {
+	struct list_head	list;
+	u32			type;
+	u64			start;
+	u64			end;
+};
+
+#define ACRN_IOREQ_CLIENT_DESTROYING	0U
+typedef	int (*ioreq_handler_t)(struct acrn_ioreq_client *client,
+			       struct acrn_io_request *req);
+/**
+ * struct acrn_ioreq_client - Structure of I/O client.
+ * @name:	Client name
+ * @vm:		The VM that the client belongs to
+ * @list:	List node for this acrn_ioreq_client
+ * @is_default:	If this client is the default one
+ * @flags:	Flags (ACRN_IOREQ_CLIENT_*)
+ * @range_list:	I/O ranges
+ * @range_lock:	Lock to protect range_list
+ * @ioreqs_map:	The pending I/O requests bitmap.
+ * @handler:	I/O requests handler of this client
+ * @thread:	The thread which executes the handler
+ * @wq:		The wait queue for the handler thread parking
+ * @priv:	Data for the thread
+ */
+struct acrn_ioreq_client {
+	char			name[ACRN_NAME_LEN];
+	struct acrn_vm		*vm;
+	struct list_head	list;
+	bool			is_default;
+	unsigned long		flags;
+	struct list_head	range_list;
+	rwlock_t		range_lock;
+	DECLARE_BITMAP(ioreqs_map, ACRN_IO_REQUEST_MAX);
+	ioreq_handler_t		handler;
+	struct task_struct	*thread;
+	wait_queue_head_t	wq;
+	void			*priv;
+};
+
+#define ACRN_INVALID_VMID (0xffffU)
+
+#define ACRN_VM_FLAG_DESTROYED		0U
+#define ACRN_VM_FLAG_CLEARING_IOREQ	1U
+extern struct list_head acrn_vm_list;
+extern rwlock_t acrn_vm_list_lock;
+/**
+ * struct acrn_vm - Properties of ACRN User VM.
+ * @list:			Entry within global list of all VMs.
+ * @vmid:			User VM ID.
+ * @vcpu_num:			Number of virtual CPUs in the VM.
+ * @flags:			Flags (ACRN_VM_FLAG_*) of the VM. This is VM
+ *				flag management in HSM which is different
+ *				from the &acrn_vm_creation.vm_flag.
+ * @regions_mapping_lock:	Lock to protect &acrn_vm.regions_mapping and
+ *				&acrn_vm.regions_mapping_count.
+ * @regions_mapping:		Memory mappings of this VM.
+ * @regions_mapping_count:	Number of memory mapping of this VM.
+ * @ioreq_clients_lock:		Lock to protect ioreq_clients and default_client
+ * @ioreq_clients:		The I/O request clients list of this VM
+ * @default_client:		The default I/O request client
+ * @ioreq_buf:			I/O request shared buffer
+ * @ioreq_page:			The page of the I/O request shared buffer
+ * @pci_conf_addr:		Address of a PCI configuration access emulation
+ * @monitor_page:		Page of interrupt statistics of User VM
+ * @ioeventfds_lock:		Lock to protect ioeventfds list
+ * @ioeventfds:			List to link all hsm_ioeventfd
+ * @ioeventfd_client:		I/O client for ioeventfds of the VM
+ * @irqfds_lock:		Lock to protect irqfds list
+ * @irqfds:			List to link all hsm_irqfd
+ * @irqfd_wq:			Workqueue for irqfd async shutdown
+ */
+struct acrn_vm {
+	struct list_head		list;
+	u16				vmid;
+	int				vcpu_num;
+	unsigned long			flags;
+	struct mutex			regions_mapping_lock;
+	struct vm_memory_mapping	regions_mapping[ACRN_MEM_MAPPING_MAX];
+	int				regions_mapping_count;
+	spinlock_t			ioreq_clients_lock;
+	struct list_head		ioreq_clients;
+	struct acrn_ioreq_client	*default_client;
+	struct acrn_io_request_buffer	*ioreq_buf;
+	struct page			*ioreq_page;
+	u32				pci_conf_addr;
+	struct page			*monitor_page;
+	struct mutex			ioeventfds_lock;
+	struct list_head		ioeventfds;
+	struct acrn_ioreq_client	*ioeventfd_client;
+	struct mutex			irqfds_lock;
+	struct list_head		irqfds;
+	struct workqueue_struct		*irqfd_wq;
+};
+
+struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
+			       struct acrn_vm_creation *vm_param);
+int acrn_vm_destroy(struct acrn_vm *vm);
+int acrn_mm_region_add(struct acrn_vm *vm, u64 user_gpa, u64 service_gpa,
+		       u64 size, u32 mem_type, u32 mem_access_right);
+int acrn_mm_region_del(struct acrn_vm *vm, u64 user_gpa, u64 size);
+int acrn_vm_memseg_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
+int acrn_vm_memseg_unmap(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
+int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
+void acrn_vm_all_ram_unmap(struct acrn_vm *vm);
+
+int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma);
+void acrn_ioreq_deinit(struct acrn_vm *vm);
+int acrn_ioreq_intr_setup(void);
+void acrn_ioreq_intr_remove(void);
+void acrn_ioreq_request_clear(struct acrn_vm *vm);
+int acrn_ioreq_client_wait(struct acrn_ioreq_client *client);
+int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu);
+struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
+						   ioreq_handler_t handler,
+						   void *data, bool is_default,
+						   const char *name);
+void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client);
+int acrn_ioreq_range_add(struct acrn_ioreq_client *client,
+			 u32 type, u64 start, u64 end);
+void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
+			  u32 type, u64 start, u64 end);
+
+int acrn_msi_inject(struct acrn_vm *vm, u64 msi_addr, u64 msi_data);
+
+int acrn_ioeventfd_init(struct acrn_vm *vm);
+int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args);
+void acrn_ioeventfd_deinit(struct acrn_vm *vm);
+
+int acrn_irqfd_init(struct acrn_vm *vm);
+int acrn_irqfd_config(struct acrn_vm *vm, struct acrn_irqfd *args);
+void acrn_irqfd_deinit(struct acrn_vm *vm);
+
+#endif /* __ACRN_HSM_DRV_H */
diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c
new file mode 100644
index 000000000000..1f6b7c54a1a4
--- /dev/null
+++ b/drivers/virt/acrn/hsm.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN Hypervisor Service Module (HSM)
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *	Fengwei Yin <fengwei.yin@intel.com>
+ *	Yakui Zhao <yakui.zhao@intel.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <asm/acrn.h>
+#include <asm/hypervisor.h>
+
+#include "acrn_drv.h"
+
+/*
+ * When /dev/acrn_hsm is opened, a 'struct acrn_vm' object is created to
+ * represent a VM instance and continues to be associated with the opened file
+ * descriptor. All ioctl operations on this file descriptor will be targeted to
+ * the VM instance. Release of this file descriptor will destroy the object.
+ */
+static int acrn_dev_open(struct inode *inode, struct file *filp)
+{
+	struct acrn_vm *vm;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return -ENOMEM;
+
+	vm->vmid = ACRN_INVALID_VMID;
+	filp->private_data = vm;
+	return 0;
+}
+
+static int pmcmd_ioctl(u64 cmd, void __user *uptr)
+{
+	struct acrn_pstate_data *px_data;
+	struct acrn_cstate_data *cx_data;
+	u64 *pm_info;
+	int ret = 0;
+
+	switch (cmd & PMCMD_TYPE_MASK) {
+	case ACRN_PMCMD_GET_PX_CNT:
+	case ACRN_PMCMD_GET_CX_CNT:
+		pm_info = kmalloc(sizeof(u64), GFP_KERNEL);
+		if (!pm_info)
+			return -ENOMEM;
+
+		ret = hcall_get_cpu_state(cmd, virt_to_phys(pm_info));
+		if (ret < 0) {
+			kfree(pm_info);
+			break;
+		}
+
+		if (copy_to_user(uptr, pm_info, sizeof(u64)))
+			ret = -EFAULT;
+		kfree(pm_info);
+		break;
+	case ACRN_PMCMD_GET_PX_DATA:
+		px_data = kmalloc(sizeof(*px_data), GFP_KERNEL);
+		if (!px_data)
+			return -ENOMEM;
+
+		ret = hcall_get_cpu_state(cmd, virt_to_phys(px_data));
+		if (ret < 0) {
+			kfree(px_data);
+			break;
+		}
+
+		if (copy_to_user(uptr, px_data, sizeof(*px_data)))
+			ret = -EFAULT;
+		kfree(px_data);
+		break;
+	case ACRN_PMCMD_GET_CX_DATA:
+		cx_data = kmalloc(sizeof(*cx_data), GFP_KERNEL);
+		if (!cx_data)
+			return -ENOMEM;
+
+		ret = hcall_get_cpu_state(cmd, virt_to_phys(cx_data));
+		if (ret < 0) {
+			kfree(cx_data);
+			break;
+		}
+
+		if (copy_to_user(uptr, cx_data, sizeof(*cx_data)))
+			ret = -EFAULT;
+		kfree(cx_data);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * HSM relies on hypercall layer of the ACRN hypervisor to do the
+ * sanity check against the input parameters.
+ */
+static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
+			   unsigned long ioctl_param)
+{
+	struct acrn_vm *vm = filp->private_data;
+	struct acrn_vm_creation *vm_param;
+	struct acrn_vcpu_regs *cpu_regs;
+	struct acrn_ioreq_notify notify;
+	struct acrn_ptdev_irq *irq_info;
+	struct acrn_ioeventfd ioeventfd;
+	struct acrn_vm_memmap memmap;
+	struct acrn_msi_entry *msi;
+	struct acrn_pcidev *pcidev;
+	struct acrn_irqfd irqfd;
+	struct page *page;
+	u64 cstate_cmd;
+	int i, ret = 0;
+
+	if (vm->vmid == ACRN_INVALID_VMID && cmd != ACRN_IOCTL_CREATE_VM) {
+		dev_dbg(acrn_dev.this_device,
+			"ioctl 0x%x: Invalid VM state!\n", cmd);
+		return -EINVAL;
+	}
+
+	switch (cmd) {
+	case ACRN_IOCTL_CREATE_VM:
+		vm_param = memdup_user((void __user *)ioctl_param,
+				       sizeof(struct acrn_vm_creation));
+		if (IS_ERR(vm_param))
+			return PTR_ERR(vm_param);
+
+		if ((vm_param->reserved0 | vm_param->reserved1) != 0)
+			return -EINVAL;
+
+		vm = acrn_vm_create(vm, vm_param);
+		if (!vm) {
+			ret = -EINVAL;
+			kfree(vm_param);
+			break;
+		}
+
+		if (copy_to_user((void __user *)ioctl_param, vm_param,
+				 sizeof(struct acrn_vm_creation))) {
+			acrn_vm_destroy(vm);
+			ret = -EFAULT;
+		}
+
+		kfree(vm_param);
+		break;
+	case ACRN_IOCTL_START_VM:
+		ret = hcall_start_vm(vm->vmid);
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to start VM %u!\n", vm->vmid);
+		break;
+	case ACRN_IOCTL_PAUSE_VM:
+		ret = hcall_pause_vm(vm->vmid);
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to pause VM %u!\n", vm->vmid);
+		break;
+	case ACRN_IOCTL_RESET_VM:
+		ret = hcall_reset_vm(vm->vmid);
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to restart VM %u!\n", vm->vmid);
+		break;
+	case ACRN_IOCTL_DESTROY_VM:
+		ret = acrn_vm_destroy(vm);
+		break;
+	case ACRN_IOCTL_SET_VCPU_REGS:
+		cpu_regs = memdup_user((void __user *)ioctl_param,
+				       sizeof(struct acrn_vcpu_regs));
+		if (IS_ERR(cpu_regs))
+			return PTR_ERR(cpu_regs);
+
+		for (i = 0; i < ARRAY_SIZE(cpu_regs->reserved); i++)
+			if (cpu_regs->reserved[i])
+				return -EINVAL;
+
+		for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_32); i++)
+			if (cpu_regs->vcpu_regs.reserved_32[i])
+				return -EINVAL;
+
+		for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_64); i++)
+			if (cpu_regs->vcpu_regs.reserved_64[i])
+				return -EINVAL;
+
+		for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.gdt.reserved); i++)
+			if (cpu_regs->vcpu_regs.gdt.reserved[i] |
+			    cpu_regs->vcpu_regs.idt.reserved[i])
+				return -EINVAL;
+
+		ret = hcall_set_vcpu_regs(vm->vmid, virt_to_phys(cpu_regs));
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to set regs state of VM%u!\n",
+				vm->vmid);
+		kfree(cpu_regs);
+		break;
+	case ACRN_IOCTL_SET_MEMSEG:
+		if (copy_from_user(&memmap, (void __user *)ioctl_param,
+				   sizeof(memmap)))
+			return -EFAULT;
+
+		ret = acrn_vm_memseg_map(vm, &memmap);
+		break;
+	case ACRN_IOCTL_UNSET_MEMSEG:
+		if (copy_from_user(&memmap, (void __user *)ioctl_param,
+				   sizeof(memmap)))
+			return -EFAULT;
+
+		ret = acrn_vm_memseg_unmap(vm, &memmap);
+		break;
+	case ACRN_IOCTL_ASSIGN_PCIDEV:
+		pcidev = memdup_user((void __user *)ioctl_param,
+				     sizeof(struct acrn_pcidev));
+		if (IS_ERR(pcidev))
+			return PTR_ERR(pcidev);
+
+		ret = hcall_assign_pcidev(vm->vmid, virt_to_phys(pcidev));
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to assign pci device!\n");
+		kfree(pcidev);
+		break;
+	case ACRN_IOCTL_DEASSIGN_PCIDEV:
+		pcidev = memdup_user((void __user *)ioctl_param,
+				     sizeof(struct acrn_pcidev));
+		if (IS_ERR(pcidev))
+			return PTR_ERR(pcidev);
+
+		ret = hcall_deassign_pcidev(vm->vmid, virt_to_phys(pcidev));
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to deassign pci device!\n");
+		kfree(pcidev);
+		break;
+	case ACRN_IOCTL_SET_PTDEV_INTR:
+		irq_info = memdup_user((void __user *)ioctl_param,
+				       sizeof(struct acrn_ptdev_irq));
+		if (IS_ERR(irq_info))
+			return PTR_ERR(irq_info);
+
+		ret = hcall_set_ptdev_intr(vm->vmid, virt_to_phys(irq_info));
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to configure intr for ptdev!\n");
+		kfree(irq_info);
+		break;
+	case ACRN_IOCTL_RESET_PTDEV_INTR:
+		irq_info = memdup_user((void __user *)ioctl_param,
+				       sizeof(struct acrn_ptdev_irq));
+		if (IS_ERR(irq_info))
+			return PTR_ERR(irq_info);
+
+		ret = hcall_reset_ptdev_intr(vm->vmid, virt_to_phys(irq_info));
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to reset intr for ptdev!\n");
+		kfree(irq_info);
+		break;
+	case ACRN_IOCTL_SET_IRQLINE:
+		ret = hcall_set_irqline(vm->vmid, ioctl_param);
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to set interrupt line!\n");
+		break;
+	case ACRN_IOCTL_INJECT_MSI:
+		msi = memdup_user((void __user *)ioctl_param,
+				  sizeof(struct acrn_msi_entry));
+		if (IS_ERR(msi))
+			return PTR_ERR(msi);
+
+		ret = hcall_inject_msi(vm->vmid, virt_to_phys(msi));
+		if (ret < 0)
+			dev_dbg(acrn_dev.this_device,
+				"Failed to inject MSI!\n");
+		kfree(msi);
+		break;
+	case ACRN_IOCTL_VM_INTR_MONITOR:
+		ret = pin_user_pages_fast(ioctl_param, 1,
+					  FOLL_WRITE | FOLL_LONGTERM, &page);
+		if (unlikely(ret != 1)) {
+			dev_dbg(acrn_dev.this_device,
+				"Failed to pin intr hdr buffer!\n");
+			return -EFAULT;
+		}
+
+		ret = hcall_vm_intr_monitor(vm->vmid, page_to_phys(page));
+		if (ret < 0) {
+			unpin_user_page(page);
+			dev_dbg(acrn_dev.this_device,
+				"Failed to monitor intr data!\n");
+			return ret;
+		}
+		if (vm->monitor_page)
+			unpin_user_page(vm->monitor_page);
+		vm->monitor_page = page;
+		break;
+	case ACRN_IOCTL_CREATE_IOREQ_CLIENT:
+		if (vm->default_client)
+			return -EEXIST;
+		if (!acrn_ioreq_client_create(vm, NULL, NULL, true, "acrndm"))
+			ret = -EINVAL;
+		break;
+	case ACRN_IOCTL_DESTROY_IOREQ_CLIENT:
+		if (vm->default_client)
+			acrn_ioreq_client_destroy(vm->default_client);
+		break;
+	case ACRN_IOCTL_ATTACH_IOREQ_CLIENT:
+		if (vm->default_client)
+			ret = acrn_ioreq_client_wait(vm->default_client);
+		else
+			ret = -ENODEV;
+		break;
+	case ACRN_IOCTL_NOTIFY_REQUEST_FINISH:
+		if (copy_from_user(&notify, (void __user *)ioctl_param,
+				   sizeof(struct acrn_ioreq_notify)))
+			return -EFAULT;
+
+		if (notify.reserved != 0)
+			return -EINVAL;
+
+		ret = acrn_ioreq_request_default_complete(vm, notify.vcpu);
+		break;
+	case ACRN_IOCTL_CLEAR_VM_IOREQ:
+		acrn_ioreq_request_clear(vm);
+		break;
+	case ACRN_IOCTL_PM_GET_CPU_STATE:
+		if (copy_from_user(&cstate_cmd, (void *)ioctl_param,
+				   sizeof(cstate_cmd)))
+			return -EFAULT;
+
+		ret = pmcmd_ioctl(cstate_cmd, (void __user *)ioctl_param);
+		break;
+	case ACRN_IOCTL_IOEVENTFD:
+		if (copy_from_user(&ioeventfd, (void __user *)ioctl_param,
+				   sizeof(ioeventfd)))
+			return -EFAULT;
+
+		if (ioeventfd.reserved != 0)
+			return -EINVAL;
+
+		ret = acrn_ioeventfd_config(vm, &ioeventfd);
+		break;
+	case ACRN_IOCTL_IRQFD:
+		if (copy_from_user(&irqfd, (void __user *)ioctl_param,
+				   sizeof(irqfd)))
+			return -EFAULT;
+		ret = acrn_irqfd_config(vm, &irqfd);
+		break;
+	default:
+		dev_dbg(acrn_dev.this_device, "Unknown IOCTL 0x%x!\n", cmd);
+		ret = -ENOTTY;
+	}
+
+	return ret;
+}
+
+static int acrn_dev_release(struct inode *inode, struct file *filp)
+{
+	struct acrn_vm *vm = filp->private_data;
+
+	acrn_vm_destroy(vm);
+	kfree(vm);
+	return 0;
+}
+
+static ssize_t remove_cpu_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	u64 cpu, lapicid;
+	int ret;
+
+	if (kstrtoull(buf, 0, &cpu) < 0)
+		return -EINVAL;
+
+	if (cpu >= num_possible_cpus() || cpu == 0 || !cpu_is_hotpluggable(cpu))
+		return -EINVAL;
+
+	if (cpu_online(cpu))
+		remove_cpu(cpu);
+
+	lapicid = cpu_data(cpu).apicid;
+	dev_dbg(dev, "Try to remove cpu %lld with lapicid %lld\n", cpu, lapicid);
+	ret = hcall_sos_remove_cpu(lapicid);
+	if (ret < 0) {
+		dev_err(dev, "Failed to remove cpu %lld!\n", cpu);
+		goto fail_remove;
+	}
+
+	return count;
+
+fail_remove:
+	add_cpu(cpu);
+	return ret;
+}
+static DEVICE_ATTR_WO(remove_cpu);
+
+static struct attribute *acrn_attrs[] = {
+	&dev_attr_remove_cpu.attr,
+	NULL
+};
+
+static struct attribute_group acrn_attr_group = {
+	.attrs = acrn_attrs,
+};
+
+static const struct attribute_group *acrn_attr_groups[] = {
+	&acrn_attr_group,
+	NULL
+};
+
+static const struct file_operations acrn_fops = {
+	.owner		= THIS_MODULE,
+	.open		= acrn_dev_open,
+	.release	= acrn_dev_release,
+	.unlocked_ioctl = acrn_dev_ioctl,
+};
+
+struct miscdevice acrn_dev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "acrn_hsm",
+	.fops	= &acrn_fops,
+	.groups	= acrn_attr_groups,
+};
+
+static int __init hsm_init(void)
+{
+	int ret;
+
+	if (x86_hyper_type != X86_HYPER_ACRN)
+		return -ENODEV;
+
+	if (!(cpuid_eax(ACRN_CPUID_FEATURES) & ACRN_FEATURE_PRIVILEGED_VM))
+		return -EPERM;
+
+	ret = misc_register(&acrn_dev);
+	if (ret) {
+		pr_err("Create misc dev failed!\n");
+		return ret;
+	}
+
+	ret = acrn_ioreq_intr_setup();
+	if (ret) {
+		pr_err("Setup I/O request handler failed!\n");
+		misc_deregister(&acrn_dev);
+		return ret;
+	}
+	return 0;
+}
+
+static void __exit hsm_exit(void)
+{
+	acrn_ioreq_intr_remove();
+	misc_deregister(&acrn_dev);
+}
+module_init(hsm_init);
+module_exit(hsm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ACRN Hypervisor Service Module (HSM)");
diff --git a/drivers/virt/acrn/hypercall.h b/drivers/virt/acrn/hypercall.h
new file mode 100644
index 000000000000..0cfad05bd1a9
--- /dev/null
+++ b/drivers/virt/acrn/hypercall.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ACRN HSM: hypercalls of ACRN Hypervisor
+ */
+#ifndef __ACRN_HSM_HYPERCALL_H
+#define __ACRN_HSM_HYPERCALL_H
+#include <asm/acrn.h>
+
+/*
+ * Hypercall IDs of the ACRN Hypervisor
+ */
+#define _HC_ID(x, y) (((x) << 24) | (y))
+
+#define HC_ID 0x80UL
+
+#define HC_ID_GEN_BASE			0x0UL
+#define HC_SOS_REMOVE_CPU		_HC_ID(HC_ID, HC_ID_GEN_BASE + 0x01)
+
+#define HC_ID_VM_BASE			0x10UL
+#define HC_CREATE_VM			_HC_ID(HC_ID, HC_ID_VM_BASE + 0x00)
+#define HC_DESTROY_VM			_HC_ID(HC_ID, HC_ID_VM_BASE + 0x01)
+#define HC_START_VM			_HC_ID(HC_ID, HC_ID_VM_BASE + 0x02)
+#define HC_PAUSE_VM			_HC_ID(HC_ID, HC_ID_VM_BASE + 0x03)
+#define HC_RESET_VM			_HC_ID(HC_ID, HC_ID_VM_BASE + 0x05)
+#define HC_SET_VCPU_REGS		_HC_ID(HC_ID, HC_ID_VM_BASE + 0x06)
+
+#define HC_ID_IRQ_BASE			0x20UL
+#define HC_INJECT_MSI			_HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x03)
+#define HC_VM_INTR_MONITOR		_HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x04)
+#define HC_SET_IRQLINE			_HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x05)
+
+#define HC_ID_IOREQ_BASE		0x30UL
+#define HC_SET_IOREQ_BUFFER		_HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x00)
+#define HC_NOTIFY_REQUEST_FINISH	_HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x01)
+
+#define HC_ID_MEM_BASE			0x40UL
+#define HC_VM_SET_MEMORY_REGIONS	_HC_ID(HC_ID, HC_ID_MEM_BASE + 0x02)
+
+#define HC_ID_PCI_BASE			0x50UL
+#define HC_SET_PTDEV_INTR		_HC_ID(HC_ID, HC_ID_PCI_BASE + 0x03)
+#define HC_RESET_PTDEV_INTR		_HC_ID(HC_ID, HC_ID_PCI_BASE + 0x04)
+#define HC_ASSIGN_PCIDEV		_HC_ID(HC_ID, HC_ID_PCI_BASE + 0x05)
+#define HC_DEASSIGN_PCIDEV		_HC_ID(HC_ID, HC_ID_PCI_BASE + 0x06)
+
+#define HC_ID_PM_BASE			0x80UL
+#define HC_PM_GET_CPU_STATE		_HC_ID(HC_ID, HC_ID_PM_BASE + 0x00)
+
+/**
+ * hcall_sos_remove_cpu() - Remove a vCPU of Service VM
+ * @cpu: The vCPU to be removed
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_sos_remove_cpu(u64 cpu)
+{
+	return acrn_hypercall1(HC_SOS_REMOVE_CPU, cpu);
+}
+
+/**
+ * hcall_create_vm() - Create a User VM
+ * @vminfo:	Service VM GPA of info of User VM creation
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_create_vm(u64 vminfo)
+{
+	return acrn_hypercall1(HC_CREATE_VM, vminfo);
+}
+
+/**
+ * hcall_start_vm() - Start a User VM
+ * @vmid:	User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_start_vm(u64 vmid)
+{
+	return acrn_hypercall1(HC_START_VM, vmid);
+}
+
+/**
+ * hcall_pause_vm() - Pause a User VM
+ * @vmid:	User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_pause_vm(u64 vmid)
+{
+	return acrn_hypercall1(HC_PAUSE_VM, vmid);
+}
+
+/**
+ * hcall_destroy_vm() - Destroy a User VM
+ * @vmid:	User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_destroy_vm(u64 vmid)
+{
+	return acrn_hypercall1(HC_DESTROY_VM, vmid);
+}
+
+/**
+ * hcall_reset_vm() - Reset a User VM
+ * @vmid:	User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_reset_vm(u64 vmid)
+{
+	return acrn_hypercall1(HC_RESET_VM, vmid);
+}
+
+/**
+ * hcall_set_vcpu_regs() - Set up registers of virtual CPU of a User VM
+ * @vmid:	User VM ID
+ * @regs_state:	Service VM GPA of registers state
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_vcpu_regs(u64 vmid, u64 regs_state)
+{
+	return acrn_hypercall2(HC_SET_VCPU_REGS, vmid, regs_state);
+}
+
+/**
+ * hcall_inject_msi() - Deliver a MSI interrupt to a User VM
+ * @vmid:	User VM ID
+ * @msi:	Service VM GPA of MSI message
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_inject_msi(u64 vmid, u64 msi)
+{
+	return acrn_hypercall2(HC_INJECT_MSI, vmid, msi);
+}
+
+/**
+ * hcall_vm_intr_monitor() - Set a shared page for User VM interrupt statistics
+ * @vmid:	User VM ID
+ * @addr:	Service VM GPA of the shared page
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_vm_intr_monitor(u64 vmid, u64 addr)
+{
+	return acrn_hypercall2(HC_VM_INTR_MONITOR, vmid, addr);
+}
+
+/**
+ * hcall_set_irqline() - Set or clear an interrupt line
+ * @vmid:	User VM ID
+ * @op:		Service VM GPA of interrupt line operations
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_irqline(u64 vmid, u64 op)
+{
+	return acrn_hypercall2(HC_SET_IRQLINE, vmid, op);
+}
+
+/**
+ * hcall_set_ioreq_buffer() - Set up the shared buffer for I/O Requests.
+ * @vmid:	User VM ID
+ * @buffer:	Service VM GPA of the shared buffer
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_ioreq_buffer(u64 vmid, u64 buffer)
+{
+	return acrn_hypercall2(HC_SET_IOREQ_BUFFER, vmid, buffer);
+}
+
+/**
+ * hcall_notify_req_finish() - Notify ACRN Hypervisor of I/O request completion.
+ * @vmid:	User VM ID
+ * @vcpu:	The vCPU which initiated the I/O request
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_notify_req_finish(u64 vmid, u64 vcpu)
+{
+	return acrn_hypercall2(HC_NOTIFY_REQUEST_FINISH, vmid, vcpu);
+}
+
+/**
+ * hcall_set_memory_regions() - Inform the hypervisor to set up EPT mappings
+ * @regions_pa:	Service VM GPA of &struct vm_memory_region_batch
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_memory_regions(u64 regions_pa)
+{
+	return acrn_hypercall1(HC_VM_SET_MEMORY_REGIONS, regions_pa);
+}
+
+/**
+ * hcall_assign_pcidev() - Assign a PCI device to a User VM
+ * @vmid:	User VM ID
+ * @addr:	Service VM GPA of the &struct acrn_pcidev
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_assign_pcidev(u64 vmid, u64 addr)
+{
+	return acrn_hypercall2(HC_ASSIGN_PCIDEV, vmid, addr);
+}
+
+/**
+ * hcall_deassign_pcidev() - De-assign a PCI device from a User VM
+ * @vmid:	User VM ID
+ * @addr:	Service VM GPA of the &struct acrn_pcidev
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_deassign_pcidev(u64 vmid, u64 addr)
+{
+	return acrn_hypercall2(HC_DEASSIGN_PCIDEV, vmid, addr);
+}
+
+/**
+ * hcall_set_ptdev_intr() - Configure an interrupt for an assigned PCI device.
+ * @vmid:	User VM ID
+ * @irq:	Service VM GPA of the &struct acrn_ptdev_irq
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_ptdev_intr(u64 vmid, u64 irq)
+{
+	return acrn_hypercall2(HC_SET_PTDEV_INTR, vmid, irq);
+}
+
+/**
+ * hcall_reset_ptdev_intr() - Reset an interrupt for an assigned PCI device.
+ * @vmid:	User VM ID
+ * @irq:	Service VM GPA of the &struct acrn_ptdev_irq
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_reset_ptdev_intr(u64 vmid, u64 irq)
+{
+	return acrn_hypercall2(HC_RESET_PTDEV_INTR, vmid, irq);
+}
+
+/*
+ * hcall_get_cpu_state() - Get P-states and C-states info from the hypervisor
+ * @state:	Service VM GPA of buffer of P-states and C-states
+ */
+static inline long hcall_get_cpu_state(u64 cmd, u64 state)
+{
+	return acrn_hypercall2(HC_PM_GET_CPU_STATE, cmd, state);
+}
+
+#endif /* __ACRN_HSM_HYPERCALL_H */
diff --git a/drivers/virt/acrn/ioeventfd.c b/drivers/virt/acrn/ioeventfd.c
new file mode 100644
index 000000000000..ac4037e9f947
--- /dev/null
+++ b/drivers/virt/acrn/ioeventfd.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN HSM eventfd - use eventfd objects to signal expected I/O requests
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *	Shuo Liu <shuo.a.liu@intel.com>
+ *	Yakui Zhao <yakui.zhao@intel.com>
+ */
+
+#include <linux/eventfd.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+/**
+ * struct hsm_ioeventfd - Properties of HSM ioeventfd
+ * @list:	Entry within &acrn_vm.ioeventfds of ioeventfds of a VM
+ * @eventfd:	Eventfd of the HSM ioeventfd
+ * @addr:	Address of I/O range
+ * @data:	Data for matching
+ * @length:	Length of I/O range
+ * @type:	Type of I/O range (ACRN_IOREQ_TYPE_MMIO/ACRN_IOREQ_TYPE_PORTIO)
+ * @wildcard:	Data matching or not
+ */
+struct hsm_ioeventfd {
+	struct list_head	list;
+	struct eventfd_ctx	*eventfd;
+	u64			addr;
+	u64			data;
+	int			length;
+	int			type;
+	bool			wildcard;
+};
+
+static inline int ioreq_type_from_flags(int flags)
+{
+	return flags & ACRN_IOEVENTFD_FLAG_PIO ?
+		       ACRN_IOREQ_TYPE_PORTIO : ACRN_IOREQ_TYPE_MMIO;
+}
+
+static void acrn_ioeventfd_shutdown(struct acrn_vm *vm, struct hsm_ioeventfd *p)
+{
+	lockdep_assert_held(&vm->ioeventfds_lock);
+
+	eventfd_ctx_put(p->eventfd);
+	list_del(&p->list);
+	kfree(p);
+}
+
+static bool hsm_ioeventfd_is_conflict(struct acrn_vm *vm,
+				      struct hsm_ioeventfd *ioeventfd)
+{
+	struct hsm_ioeventfd *p;
+
+	lockdep_assert_held(&vm->ioeventfds_lock);
+
+	/* Either one is wildcard, the data matching will be skipped. */
+	list_for_each_entry(p, &vm->ioeventfds, list)
+		if (p->eventfd == ioeventfd->eventfd &&
+		    p->addr == ioeventfd->addr &&
+		    p->type == ioeventfd->type &&
+		    (p->wildcard || ioeventfd->wildcard ||
+			p->data == ioeventfd->data))
+			return true;
+
+	return false;
+}
+
+/*
+ * Assign an eventfd to a VM and create a HSM ioeventfd associated with the
+ * eventfd. The properties of the HSM ioeventfd are built from a &struct
+ * acrn_ioeventfd.
+ */
+static int acrn_ioeventfd_assign(struct acrn_vm *vm,
+				 struct acrn_ioeventfd *args)
+{
+	struct eventfd_ctx *eventfd;
+	struct hsm_ioeventfd *p;
+	int ret;
+
+	/* Check for range overflow */
+	if (args->addr + args->len < args->addr)
+		return -EINVAL;
+
+	/*
+	 * Currently, acrn_ioeventfd is used to support vhost. 1,2,4,8 width
+	 * accesses can cover vhost's requirements.
+	 */
+	if (!(args->len == 1 || args->len == 2 ||
+	      args->len == 4 || args->len == 8))
+		return -EINVAL;
+
+	eventfd = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	INIT_LIST_HEAD(&p->list);
+	p->addr = args->addr;
+	p->length = args->len;
+	p->eventfd = eventfd;
+	p->type = ioreq_type_from_flags(args->flags);
+
+	/*
+	 * ACRN_IOEVENTFD_FLAG_DATAMATCH flag is set in virtio 1.0 support, the
+	 * writing of notification register of each virtqueue may trigger the
+	 * notification. There is no data matching requirement.
+	 */
+	if (args->flags & ACRN_IOEVENTFD_FLAG_DATAMATCH)
+		p->data = args->data;
+	else
+		p->wildcard = true;
+
+	mutex_lock(&vm->ioeventfds_lock);
+
+	if (hsm_ioeventfd_is_conflict(vm, p)) {
+		ret = -EEXIST;
+		goto unlock_fail;
+	}
+
+	/* register the I/O range into ioreq client */
+	ret = acrn_ioreq_range_add(vm->ioeventfd_client, p->type,
+				   p->addr, p->addr + p->length - 1);
+	if (ret < 0)
+		goto unlock_fail;
+
+	list_add_tail(&p->list, &vm->ioeventfds);
+	mutex_unlock(&vm->ioeventfds_lock);
+
+	return 0;
+
+unlock_fail:
+	mutex_unlock(&vm->ioeventfds_lock);
+	kfree(p);
+fail:
+	eventfd_ctx_put(eventfd);
+	return ret;
+}
+
+static int acrn_ioeventfd_deassign(struct acrn_vm *vm,
+				   struct acrn_ioeventfd *args)
+{
+	struct hsm_ioeventfd *p;
+	struct eventfd_ctx *eventfd;
+
+	eventfd = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	mutex_lock(&vm->ioeventfds_lock);
+	list_for_each_entry(p, &vm->ioeventfds, list) {
+		if (p->eventfd != eventfd)
+			continue;
+
+		acrn_ioreq_range_del(vm->ioeventfd_client, p->type,
+				     p->addr, p->addr + p->length - 1);
+		acrn_ioeventfd_shutdown(vm, p);
+		break;
+	}
+	mutex_unlock(&vm->ioeventfds_lock);
+
+	eventfd_ctx_put(eventfd);
+	return 0;
+}
+
+static struct hsm_ioeventfd *hsm_ioeventfd_match(struct acrn_vm *vm, u64 addr,
+						 u64 data, int len, int type)
+{
+	struct hsm_ioeventfd *p = NULL;
+
+	lockdep_assert_held(&vm->ioeventfds_lock);
+
+	list_for_each_entry(p, &vm->ioeventfds, list) {
+		if (p->type == type && p->addr == addr && p->length >= len &&
+		    (p->wildcard || p->data == data))
+			return p;
+	}
+
+	return NULL;
+}
+
+static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client,
+				  struct acrn_io_request *req)
+{
+	struct hsm_ioeventfd *p;
+	u64 addr, val;
+	int size;
+
+	if (req->type == ACRN_IOREQ_TYPE_MMIO) {
+		/*
+		 * I/O requests are dispatched by range check only, so a
+		 * acrn_ioreq_client need process both READ and WRITE accesses
+		 * of same range. READ accesses are safe to be ignored here
+		 * because virtio PCI devices write the notify registers for
+		 * notification.
+		 */
+		if (req->reqs.mmio_request.direction == ACRN_IOREQ_DIR_READ) {
+			/* reading does nothing and return 0 */
+			req->reqs.mmio_request.value = 0;
+			return 0;
+		}
+		addr = req->reqs.mmio_request.address;
+		size = req->reqs.mmio_request.size;
+		val = req->reqs.mmio_request.value;
+	} else {
+		if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_READ) {
+			/* reading does nothing and return 0 */
+			req->reqs.pio_request.value = 0;
+			return 0;
+		}
+		addr = req->reqs.pio_request.address;
+		size = req->reqs.pio_request.size;
+		val = req->reqs.pio_request.value;
+	}
+
+	mutex_lock(&client->vm->ioeventfds_lock);
+	p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type);
+	if (p)
+		eventfd_signal(p->eventfd, 1);
+	mutex_unlock(&client->vm->ioeventfds_lock);
+
+	return 0;
+}
+
+int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args)
+{
+	int ret;
+
+	if (args->flags & ACRN_IOEVENTFD_FLAG_DEASSIGN)
+		ret = acrn_ioeventfd_deassign(vm, args);
+	else
+		ret = acrn_ioeventfd_assign(vm, args);
+
+	return ret;
+}
+
+int acrn_ioeventfd_init(struct acrn_vm *vm)
+{
+	char name[ACRN_NAME_LEN];
+
+	mutex_init(&vm->ioeventfds_lock);
+	INIT_LIST_HEAD(&vm->ioeventfds);
+	snprintf(name, sizeof(name), "ioeventfd-%u", vm->vmid);
+	vm->ioeventfd_client = acrn_ioreq_client_create(vm,
+							acrn_ioeventfd_handler,
+							NULL, false, name);
+	if (!vm->ioeventfd_client) {
+		dev_err(acrn_dev.this_device, "Failed to create ioeventfd ioreq client!\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(acrn_dev.this_device, "VM %u ioeventfd init.\n", vm->vmid);
+	return 0;
+}
+
+void acrn_ioeventfd_deinit(struct acrn_vm *vm)
+{
+	struct hsm_ioeventfd *p, *next;
+
+	dev_dbg(acrn_dev.this_device, "VM %u ioeventfd deinit.\n", vm->vmid);
+	acrn_ioreq_client_destroy(vm->ioeventfd_client);
+	mutex_lock(&vm->ioeventfds_lock);
+	list_for_each_entry_safe(p, next, &vm->ioeventfds, list)
+		acrn_ioeventfd_shutdown(vm, p);
+	mutex_unlock(&vm->ioeventfds_lock);
+}
diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c
new file mode 100644
index 000000000000..80b2e3f0e276
--- /dev/null
+++ b/drivers/virt/acrn/ioreq.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN_HSM: Handle I/O requests
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *	Jason Chen CJ <jason.cj.chen@intel.com>
+ *	Fengwei Yin <fengwei.yin@intel.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/acrn.h>
+
+#include "acrn_drv.h"
+
+static void ioreq_pause(void);
+static void ioreq_resume(void);
+
+static void ioreq_dispatcher(struct work_struct *work);
+static struct workqueue_struct *ioreq_wq;
+static DECLARE_WORK(ioreq_work, ioreq_dispatcher);
+
+static inline bool has_pending_request(struct acrn_ioreq_client *client)
+{
+	return !bitmap_empty(client->ioreqs_map, ACRN_IO_REQUEST_MAX);
+}
+
+static inline bool is_destroying(struct acrn_ioreq_client *client)
+{
+	return test_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
+}
+
+static int ioreq_complete_request(struct acrn_vm *vm, u16 vcpu,
+				  struct acrn_io_request *acrn_req)
+{
+	bool polling_mode;
+	int ret = 0;
+
+	polling_mode = acrn_req->completion_polling;
+	/* Add barrier() to make sure the writes are done before completion */
+	smp_store_release(&acrn_req->processed, ACRN_IOREQ_STATE_COMPLETE);
+
+	/*
+	 * To fulfill the requirement of real-time in several industry
+	 * scenarios, like automotive, ACRN can run under the partition mode,
+	 * in which User VMs and Service VM are bound to dedicated CPU cores.
+	 * Polling mode of handling the I/O request is introduced to achieve a
+	 * faster I/O request handling. In polling mode, the hypervisor polls
+	 * I/O request's completion. Once an I/O request is marked as
+	 * ACRN_IOREQ_STATE_COMPLETE, hypervisor resumes from the polling point
+	 * to continue the I/O request flow. Thus, the completion notification
+	 * from HSM of I/O request is not needed.  Please note,
+	 * completion_polling needs to be read before the I/O request being
+	 * marked as ACRN_IOREQ_STATE_COMPLETE to avoid racing with the
+	 * hypervisor.
+	 */
+	if (!polling_mode) {
+		ret = hcall_notify_req_finish(vm->vmid, vcpu);
+		if (ret < 0)
+			dev_err(acrn_dev.this_device,
+				"Notify I/O request finished failed!\n");
+	}
+
+	return ret;
+}
+
+static int acrn_ioreq_complete_request(struct acrn_ioreq_client *client,
+				       u16 vcpu,
+				       struct acrn_io_request *acrn_req)
+{
+	int ret;
+
+	if (vcpu >= client->vm->vcpu_num)
+		return -EINVAL;
+
+	clear_bit(vcpu, client->ioreqs_map);
+	if (!acrn_req) {
+		acrn_req = (struct acrn_io_request *)client->vm->ioreq_buf;
+		acrn_req += vcpu;
+	}
+
+	ret = ioreq_complete_request(client->vm, vcpu, acrn_req);
+
+	return ret;
+}
+
+int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu)
+{
+	int ret = 0;
+
+	spin_lock_bh(&vm->ioreq_clients_lock);
+	if (vm->default_client)
+		ret = acrn_ioreq_complete_request(vm->default_client,
+						  vcpu, NULL);
+	spin_unlock_bh(&vm->ioreq_clients_lock);
+
+	return ret;
+}
+
+/**
+ * acrn_ioreq_range_add() - Add an iorange monitored by an ioreq client
+ * @client:	The ioreq client
+ * @type:	Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
+ * @start:	Start address of iorange
+ * @end:	End address of iorange
+ *
+ * Return: 0 on success, <0 on error
+ */
+int acrn_ioreq_range_add(struct acrn_ioreq_client *client,
+			 u32 type, u64 start, u64 end)
+{
+	struct acrn_ioreq_range *range;
+
+	if (end < start) {
+		dev_err(acrn_dev.this_device,
+			"Invalid IO range [0x%llx,0x%llx]\n", start, end);
+		return -EINVAL;
+	}
+
+	range = kzalloc(sizeof(*range), GFP_KERNEL);
+	if (!range)
+		return -ENOMEM;
+
+	range->type = type;
+	range->start = start;
+	range->end = end;
+
+	write_lock_bh(&client->range_lock);
+	list_add(&range->list, &client->range_list);
+	write_unlock_bh(&client->range_lock);
+
+	return 0;
+}
+
+/**
+ * acrn_ioreq_range_del() - Del an iorange monitored by an ioreq client
+ * @client:	The ioreq client
+ * @type:	Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
+ * @start:	Start address of iorange
+ * @end:	End address of iorange
+ */
+void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
+			  u32 type, u64 start, u64 end)
+{
+	struct acrn_ioreq_range *range;
+
+	write_lock_bh(&client->range_lock);
+	list_for_each_entry(range, &client->range_list, list) {
+		if (type == range->type &&
+		    start == range->start &&
+		    end == range->end) {
+			list_del(&range->list);
+			kfree(range);
+			break;
+		}
+	}
+	write_unlock_bh(&client->range_lock);
+}
+
+/*
+ * ioreq_task() is the execution entity of handler thread of an I/O client.
+ * The handler callback of the I/O client is called within the handler thread.
+ */
+static int ioreq_task(void *data)
+{
+	struct acrn_ioreq_client *client = data;
+	struct acrn_io_request *req;
+	unsigned long *ioreqs_map;
+	int vcpu, ret;
+
+	/*
+	 * Lockless access to ioreqs_map is safe, because
+	 * 1) set_bit() and clear_bit() are atomic operations.
+	 * 2) I/O requests arrives serialized. The access flow of ioreqs_map is:
+	 *	set_bit() - in ioreq_work handler
+	 *	Handler callback handles corresponding I/O request
+	 *	clear_bit() - in handler thread (include ACRN userspace)
+	 *	Mark corresponding I/O request completed
+	 *	Loop again if a new I/O request occurs
+	 */
+	ioreqs_map = client->ioreqs_map;
+	while (!kthread_should_stop()) {
+		acrn_ioreq_client_wait(client);
+		while (has_pending_request(client)) {
+			vcpu = find_first_bit(ioreqs_map, client->vm->vcpu_num);
+			req = client->vm->ioreq_buf->req_slot + vcpu;
+			ret = client->handler(client, req);
+			if (ret < 0) {
+				dev_err(acrn_dev.this_device,
+					"IO handle failure: %d\n", ret);
+				break;
+			}
+			acrn_ioreq_complete_request(client, vcpu, req);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * For the non-default I/O clients, give them chance to complete the current
+ * I/O requests if there are any. For the default I/O client, it is safe to
+ * clear all pending I/O requests because the clearing request is from ACRN
+ * userspace.
+ */
+void acrn_ioreq_request_clear(struct acrn_vm *vm)
+{
+	struct acrn_ioreq_client *client;
+	bool has_pending = false;
+	unsigned long vcpu;
+	int retry = 10;
+
+	/*
+	 * IO requests of this VM will be completed directly in
+	 * acrn_ioreq_dispatch if ACRN_VM_FLAG_CLEARING_IOREQ flag is set.
+	 */
+	set_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
+
+	/*
+	 * acrn_ioreq_request_clear is only called in VM reset case. Simply
+	 * wait 100ms in total for the IO requests' completion.
+	 */
+	do {
+		spin_lock_bh(&vm->ioreq_clients_lock);
+		list_for_each_entry(client, &vm->ioreq_clients, list) {
+			has_pending = has_pending_request(client);
+			if (has_pending)
+				break;
+		}
+		spin_unlock_bh(&vm->ioreq_clients_lock);
+
+		if (has_pending)
+			schedule_timeout_interruptible(HZ / 100);
+	} while (has_pending && --retry > 0);
+	if (retry == 0)
+		dev_warn(acrn_dev.this_device,
+			 "%s cannot flush pending request!\n", client->name);
+
+	/* Clear all ioreqs belonging to the default client */
+	spin_lock_bh(&vm->ioreq_clients_lock);
+	client = vm->default_client;
+	if (client) {
+		vcpu = find_next_bit(client->ioreqs_map,
+				     ACRN_IO_REQUEST_MAX, 0);
+		while (vcpu < ACRN_IO_REQUEST_MAX) {
+			acrn_ioreq_complete_request(client, vcpu, NULL);
+			vcpu = find_next_bit(client->ioreqs_map,
+					     ACRN_IO_REQUEST_MAX, vcpu + 1);
+		}
+	}
+	spin_unlock_bh(&vm->ioreq_clients_lock);
+
+	/* Clear ACRN_VM_FLAG_CLEARING_IOREQ flag after the clearing */
+	clear_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
+}
+
+int acrn_ioreq_client_wait(struct acrn_ioreq_client *client)
+{
+	if (client->is_default) {
+		/*
+		 * In the default client, a user space thread waits on the
+		 * waitqueue. The is_destroying() check is used to notify user
+		 * space the client is going to be destroyed.
+		 */
+		wait_event_interruptible(client->wq,
+					 has_pending_request(client) ||
+					 is_destroying(client));
+		if (is_destroying(client))
+			return -ENODEV;
+	} else {
+		wait_event_interruptible(client->wq,
+					 has_pending_request(client) ||
+					 kthread_should_stop());
+	}
+
+	return 0;
+}
+
+static bool is_cfg_addr(struct acrn_io_request *req)
+{
+	return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
+		(req->reqs.pio_request.address == 0xcf8));
+}
+
+static bool is_cfg_data(struct acrn_io_request *req)
+{
+	return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
+		((req->reqs.pio_request.address >= 0xcfc) &&
+		 (req->reqs.pio_request.address < (0xcfc + 4))));
+}
+
+/* The low 8-bit of supported pci_reg addr.*/
+#define PCI_LOWREG_MASK  0xFC
+/* The high 4-bit of supported pci_reg addr */
+#define PCI_HIGHREG_MASK 0xF00
+/* Max number of supported functions */
+#define PCI_FUNCMAX	7
+/* Max number of supported slots */
+#define PCI_SLOTMAX	31
+/* Max number of supported buses */
+#define PCI_BUSMAX	255
+#define CONF1_ENABLE	0x80000000UL
+/*
+ * A PCI configuration space access via PIO 0xCF8 and 0xCFC normally has two
+ * following steps:
+ *   1) writes address into 0xCF8 port
+ *   2) accesses data in/from 0xCFC
+ * This function combines such paired PCI configuration space I/O requests into
+ * one ACRN_IOREQ_TYPE_PCICFG type I/O request and continues the processing.
+ */
+static bool handle_cf8cfc(struct acrn_vm *vm,
+			  struct acrn_io_request *req, u16 vcpu)
+{
+	int offset, pci_cfg_addr, pci_reg;
+	bool is_handled = false;
+
+	if (is_cfg_addr(req)) {
+		WARN_ON(req->reqs.pio_request.size != 4);
+		if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_WRITE)
+			vm->pci_conf_addr = req->reqs.pio_request.value;
+		else
+			req->reqs.pio_request.value = vm->pci_conf_addr;
+		is_handled = true;
+	} else if (is_cfg_data(req)) {
+		if (!(vm->pci_conf_addr & CONF1_ENABLE)) {
+			if (req->reqs.pio_request.direction ==
+					ACRN_IOREQ_DIR_READ)
+				req->reqs.pio_request.value = 0xffffffff;
+			is_handled = true;
+		} else {
+			offset = req->reqs.pio_request.address - 0xcfc;
+
+			req->type = ACRN_IOREQ_TYPE_PCICFG;
+			pci_cfg_addr = vm->pci_conf_addr;
+			req->reqs.pci_request.bus =
+					(pci_cfg_addr >> 16) & PCI_BUSMAX;
+			req->reqs.pci_request.dev =
+					(pci_cfg_addr >> 11) & PCI_SLOTMAX;
+			req->reqs.pci_request.func =
+					(pci_cfg_addr >> 8) & PCI_FUNCMAX;
+			pci_reg = (pci_cfg_addr & PCI_LOWREG_MASK) +
+				   ((pci_cfg_addr >> 16) & PCI_HIGHREG_MASK);
+			req->reqs.pci_request.reg = pci_reg + offset;
+		}
+	}
+
+	if (is_handled)
+		ioreq_complete_request(vm, vcpu, req);
+
+	return is_handled;
+}
+
+static bool in_range(struct acrn_ioreq_range *range,
+		     struct acrn_io_request *req)
+{
+	bool ret = false;
+
+	if (range->type == req->type) {
+		switch (req->type) {
+		case ACRN_IOREQ_TYPE_MMIO:
+			if (req->reqs.mmio_request.address >= range->start &&
+			    (req->reqs.mmio_request.address +
+			     req->reqs.mmio_request.size - 1) <= range->end)
+				ret = true;
+			break;
+		case ACRN_IOREQ_TYPE_PORTIO:
+			if (req->reqs.pio_request.address >= range->start &&
+			    (req->reqs.pio_request.address +
+			     req->reqs.pio_request.size - 1) <= range->end)
+				ret = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static struct acrn_ioreq_client *find_ioreq_client(struct acrn_vm *vm,
+						   struct acrn_io_request *req)
+{
+	struct acrn_ioreq_client *client, *found = NULL;
+	struct acrn_ioreq_range *range;
+
+	lockdep_assert_held(&vm->ioreq_clients_lock);
+
+	list_for_each_entry(client, &vm->ioreq_clients, list) {
+		read_lock_bh(&client->range_lock);
+		list_for_each_entry(range, &client->range_list, list) {
+			if (in_range(range, req)) {
+				found = client;
+				break;
+			}
+		}
+		read_unlock_bh(&client->range_lock);
+		if (found)
+			break;
+	}
+	return found ? found : vm->default_client;
+}
+
+/**
+ * acrn_ioreq_client_create() - Create an ioreq client
+ * @vm:		The VM that this client belongs to
+ * @handler:	The ioreq_handler of ioreq client acrn_hsm will create a kernel
+ *		thread and call the handler to handle I/O requests.
+ * @priv:	Private data for the handler
+ * @is_default:	If it is the default client
+ * @name:	The name of ioreq client
+ *
+ * Return: acrn_ioreq_client pointer on success, NULL on error
+ */
+struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
+						   ioreq_handler_t handler,
+						   void *priv, bool is_default,
+						   const char *name)
+{
+	struct acrn_ioreq_client *client;
+
+	if (!handler && !is_default) {
+		dev_dbg(acrn_dev.this_device,
+			"Cannot create non-default client w/o handler!\n");
+		return NULL;
+	}
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client)
+		return NULL;
+
+	client->handler = handler;
+	client->vm = vm;
+	client->priv = priv;
+	client->is_default = is_default;
+	if (name)
+		strncpy(client->name, name, sizeof(client->name) - 1);
+	rwlock_init(&client->range_lock);
+	INIT_LIST_HEAD(&client->range_list);
+	init_waitqueue_head(&client->wq);
+
+	if (client->handler) {
+		client->thread = kthread_run(ioreq_task, client, "VM%u-%s",
+					     client->vm->vmid, client->name);
+		if (IS_ERR(client->thread)) {
+			kfree(client);
+			return NULL;
+		}
+	}
+
+	spin_lock_bh(&vm->ioreq_clients_lock);
+	if (is_default)
+		vm->default_client = client;
+	else
+		list_add(&client->list, &vm->ioreq_clients);
+	spin_unlock_bh(&vm->ioreq_clients_lock);
+
+	dev_dbg(acrn_dev.this_device, "Created ioreq client %s.\n", name);
+	return client;
+}
+
+/**
+ * acrn_ioreq_client_destroy() - Destroy an ioreq client
+ * @client:	The ioreq client
+ */
+void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client)
+{
+	struct acrn_ioreq_range *range, *next;
+	struct acrn_vm *vm = client->vm;
+
+	dev_dbg(acrn_dev.this_device,
+		"Destroy ioreq client %s.\n", client->name);
+	ioreq_pause();
+	set_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
+	if (client->is_default)
+		wake_up_interruptible(&client->wq);
+	else
+		kthread_stop(client->thread);
+
+	spin_lock_bh(&vm->ioreq_clients_lock);
+	if (client->is_default)
+		vm->default_client = NULL;
+	else
+		list_del(&client->list);
+	spin_unlock_bh(&vm->ioreq_clients_lock);
+
+	write_lock_bh(&client->range_lock);
+	list_for_each_entry_safe(range, next, &client->range_list, list) {
+		list_del(&range->list);
+		kfree(range);
+	}
+	write_unlock_bh(&client->range_lock);
+	kfree(client);
+
+	ioreq_resume();
+}
+
+static int acrn_ioreq_dispatch(struct acrn_vm *vm)
+{
+	struct acrn_ioreq_client *client;
+	struct acrn_io_request *req;
+	int i;
+
+	for (i = 0; i < vm->vcpu_num; i++) {
+		req = vm->ioreq_buf->req_slot + i;
+
+		/* barrier the read of processed of acrn_io_request */
+		if (smp_load_acquire(&req->processed) ==
+				     ACRN_IOREQ_STATE_PENDING) {
+			/* Complete the IO request directly in clearing stage */
+			if (test_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags)) {
+				ioreq_complete_request(vm, i, req);
+				continue;
+			}
+			if (handle_cf8cfc(vm, req, i))
+				continue;
+
+			spin_lock_bh(&vm->ioreq_clients_lock);
+			client = find_ioreq_client(vm, req);
+			if (!client) {
+				dev_err(acrn_dev.this_device,
+					"Failed to find ioreq client!\n");
+				spin_unlock_bh(&vm->ioreq_clients_lock);
+				return -EINVAL;
+			}
+			if (!client->is_default)
+				req->kernel_handled = 1;
+			else
+				req->kernel_handled = 0;
+			/*
+			 * Add barrier() to make sure the writes are done
+			 * before setting ACRN_IOREQ_STATE_PROCESSING
+			 */
+			smp_store_release(&req->processed,
+					  ACRN_IOREQ_STATE_PROCESSING);
+			set_bit(i, client->ioreqs_map);
+			wake_up_interruptible(&client->wq);
+			spin_unlock_bh(&vm->ioreq_clients_lock);
+		}
+	}
+
+	return 0;
+}
+
+static void ioreq_dispatcher(struct work_struct *work)
+{
+	struct acrn_vm *vm;
+
+	read_lock(&acrn_vm_list_lock);
+	list_for_each_entry(vm, &acrn_vm_list, list) {
+		if (!vm->ioreq_buf)
+			break;
+		acrn_ioreq_dispatch(vm);
+	}
+	read_unlock(&acrn_vm_list_lock);
+}
+
+static void ioreq_intr_handler(void)
+{
+	queue_work(ioreq_wq, &ioreq_work);
+}
+
+static void ioreq_pause(void)
+{
+	/* Flush and unarm the handler to ensure no I/O requests pending */
+	acrn_remove_intr_handler();
+	drain_workqueue(ioreq_wq);
+}
+
+static void ioreq_resume(void)
+{
+	/* Schedule after enabling in case other clients miss interrupt */
+	acrn_setup_intr_handler(ioreq_intr_handler);
+	queue_work(ioreq_wq, &ioreq_work);
+}
+
+int acrn_ioreq_intr_setup(void)
+{
+	acrn_setup_intr_handler(ioreq_intr_handler);
+	ioreq_wq = alloc_workqueue("ioreq_wq",
+				   WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+	if (!ioreq_wq) {
+		dev_err(acrn_dev.this_device, "Failed to alloc workqueue!\n");
+		acrn_remove_intr_handler();
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void acrn_ioreq_intr_remove(void)
+{
+	if (ioreq_wq)
+		destroy_workqueue(ioreq_wq);
+	acrn_remove_intr_handler();
+}
+
+int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma)
+{
+	struct acrn_ioreq_buffer *set_buffer;
+	struct page *page;
+	int ret;
+
+	if (vm->ioreq_buf)
+		return -EEXIST;
+
+	set_buffer = kzalloc(sizeof(*set_buffer), GFP_KERNEL);
+	if (!set_buffer)
+		return -ENOMEM;
+
+	ret = pin_user_pages_fast(buf_vma, 1,
+				  FOLL_WRITE | FOLL_LONGTERM, &page);
+	if (unlikely(ret != 1) || !page) {
+		dev_err(acrn_dev.this_device, "Failed to pin ioreq page!\n");
+		ret = -EFAULT;
+		goto free_buf;
+	}
+
+	vm->ioreq_buf = page_address(page);
+	vm->ioreq_page = page;
+	set_buffer->ioreq_buf = page_to_phys(page);
+	ret = hcall_set_ioreq_buffer(vm->vmid, virt_to_phys(set_buffer));
+	if (ret < 0) {
+		dev_err(acrn_dev.this_device, "Failed to init ioreq buffer!\n");
+		unpin_user_page(page);
+		vm->ioreq_buf = NULL;
+		goto free_buf;
+	}
+
+	dev_dbg(acrn_dev.this_device,
+		"Init ioreq buffer %pK!\n", vm->ioreq_buf);
+	ret = 0;
+free_buf:
+	kfree(set_buffer);
+	return ret;
+}
+
+void acrn_ioreq_deinit(struct acrn_vm *vm)
+{
+	struct acrn_ioreq_client *client, *next;
+
+	dev_dbg(acrn_dev.this_device,
+		"Deinit ioreq buffer %pK!\n", vm->ioreq_buf);
+	/* Destroy all clients belonging to this VM */
+	list_for_each_entry_safe(client, next, &vm->ioreq_clients, list)
+		acrn_ioreq_client_destroy(client);
+	if (vm->default_client)
+		acrn_ioreq_client_destroy(vm->default_client);
+
+	if (vm->ioreq_buf && vm->ioreq_page) {
+		unpin_user_page(vm->ioreq_page);
+		vm->ioreq_buf = NULL;
+	}
+}
diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c
new file mode 100644
index 000000000000..a8766d528e29
--- /dev/null
+++ b/drivers/virt/acrn/irqfd.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN HSM irqfd: use eventfd objects to inject virtual interrupts
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *	Shuo Liu <shuo.a.liu@intel.com>
+ *	Yakui Zhao <yakui.zhao@intel.com>
+ */
+
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+static LIST_HEAD(acrn_irqfd_clients);
+static DEFINE_MUTEX(acrn_irqfds_mutex);
+
+/**
+ * struct hsm_irqfd - Properties of HSM irqfd
+ * @vm:		Associated VM pointer
+ * @wait:	Entry of wait-queue
+ * @shutdown:	Async shutdown work
+ * @eventfd:	Associated eventfd
+ * @list:	Entry within &acrn_vm.irqfds of irqfds of a VM
+ * @pt:		Structure for select/poll on the associated eventfd
+ * @msi:	MSI data
+ */
+struct hsm_irqfd {
+	struct acrn_vm		*vm;
+	wait_queue_entry_t	wait;
+	struct work_struct	shutdown;
+	struct eventfd_ctx	*eventfd;
+	struct list_head	list;
+	poll_table		pt;
+	struct acrn_msi_entry	msi;
+};
+
+static void acrn_irqfd_inject(struct hsm_irqfd *irqfd)
+{
+	struct acrn_vm *vm = irqfd->vm;
+
+	acrn_msi_inject(vm, irqfd->msi.msi_addr,
+			irqfd->msi.msi_data);
+}
+
+static void hsm_irqfd_shutdown(struct hsm_irqfd *irqfd)
+{
+	u64 cnt;
+
+	lockdep_assert_held(&irqfd->vm->irqfds_lock);
+
+	/* remove from wait queue */
+	list_del_init(&irqfd->list);
+	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
+	eventfd_ctx_put(irqfd->eventfd);
+	kfree(irqfd);
+}
+
+static void hsm_irqfd_shutdown_work(struct work_struct *work)
+{
+	struct hsm_irqfd *irqfd;
+	struct acrn_vm *vm;
+
+	irqfd = container_of(work, struct hsm_irqfd, shutdown);
+	vm = irqfd->vm;
+	mutex_lock(&vm->irqfds_lock);
+	if (!list_empty(&irqfd->list))
+		hsm_irqfd_shutdown(irqfd);
+	mutex_unlock(&vm->irqfds_lock);
+}
+
+/* Called with wqh->lock held and interrupts disabled */
+static int hsm_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
+			    int sync, void *key)
+{
+	unsigned long poll_bits = (unsigned long)key;
+	struct hsm_irqfd *irqfd;
+	struct acrn_vm *vm;
+
+	irqfd = container_of(wait, struct hsm_irqfd, wait);
+	vm = irqfd->vm;
+	if (poll_bits & POLLIN)
+		/* An event has been signaled, inject an interrupt */
+		acrn_irqfd_inject(irqfd);
+
+	if (poll_bits & POLLHUP)
+		/* Do shutdown work in thread to hold wqh->lock */
+		queue_work(vm->irqfd_wq, &irqfd->shutdown);
+
+	return 0;
+}
+
+static void hsm_irqfd_poll_func(struct file *file, wait_queue_head_t *wqh,
+				poll_table *pt)
+{
+	struct hsm_irqfd *irqfd;
+
+	irqfd = container_of(pt, struct hsm_irqfd, pt);
+	add_wait_queue(wqh, &irqfd->wait);
+}
+
+/*
+ * Assign an eventfd to a VM and create a HSM irqfd associated with the
+ * eventfd. The properties of the HSM irqfd are built from a &struct
+ * acrn_irqfd.
+ */
+static int acrn_irqfd_assign(struct acrn_vm *vm, struct acrn_irqfd *args)
+{
+	struct eventfd_ctx *eventfd = NULL;
+	struct hsm_irqfd *irqfd, *tmp;
+	unsigned int events;
+	struct fd f;
+	int ret = 0;
+
+	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
+	if (!irqfd)
+		return -ENOMEM;
+
+	irqfd->vm = vm;
+	memcpy(&irqfd->msi, &args->msi, sizeof(args->msi));
+	INIT_LIST_HEAD(&irqfd->list);
+	INIT_WORK(&irqfd->shutdown, hsm_irqfd_shutdown_work);
+
+	f = fdget(args->fd);
+	if (!f.file) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	eventfd = eventfd_ctx_fileget(f.file);
+	if (IS_ERR(eventfd)) {
+		ret = PTR_ERR(eventfd);
+		goto fail;
+	}
+
+	irqfd->eventfd = eventfd;
+
+	/*
+	 * Install custom wake-up handling to be notified whenever underlying
+	 * eventfd is signaled.
+	 */
+	init_waitqueue_func_entry(&irqfd->wait, hsm_irqfd_wakeup);
+	init_poll_funcptr(&irqfd->pt, hsm_irqfd_poll_func);
+
+	mutex_lock(&vm->irqfds_lock);
+	list_for_each_entry(tmp, &vm->irqfds, list) {
+		if (irqfd->eventfd != tmp->eventfd)
+			continue;
+		ret = -EBUSY;
+		mutex_unlock(&vm->irqfds_lock);
+		goto fail;
+	}
+	list_add_tail(&irqfd->list, &vm->irqfds);
+	mutex_unlock(&vm->irqfds_lock);
+
+	/* Check the pending event in this stage */
+	events = f.file->f_op->poll(f.file, &irqfd->pt);
+
+	if (events & POLLIN)
+		acrn_irqfd_inject(irqfd);
+
+	fdput(f);
+	return 0;
+fail:
+	if (eventfd && !IS_ERR(eventfd))
+		eventfd_ctx_put(eventfd);
+
+	fdput(f);
+out:
+	kfree(irqfd);
+	return ret;
+}
+
+static int acrn_irqfd_deassign(struct acrn_vm *vm,
+			       struct acrn_irqfd *args)
+{
+	struct hsm_irqfd *irqfd, *tmp;
+	struct eventfd_ctx *eventfd;
+
+	eventfd = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	mutex_lock(&vm->irqfds_lock);
+	list_for_each_entry_safe(irqfd, tmp, &vm->irqfds, list) {
+		if (irqfd->eventfd == eventfd) {
+			hsm_irqfd_shutdown(irqfd);
+			break;
+		}
+	}
+	mutex_unlock(&vm->irqfds_lock);
+	eventfd_ctx_put(eventfd);
+
+	return 0;
+}
+
+int acrn_irqfd_config(struct acrn_vm *vm, struct acrn_irqfd *args)
+{
+	int ret;
+
+	if (args->flags & ACRN_IRQFD_FLAG_DEASSIGN)
+		ret = acrn_irqfd_deassign(vm, args);
+	else
+		ret = acrn_irqfd_assign(vm, args);
+
+	return ret;
+}
+
+int acrn_irqfd_init(struct acrn_vm *vm)
+{
+	INIT_LIST_HEAD(&vm->irqfds);
+	mutex_init(&vm->irqfds_lock);
+	vm->irqfd_wq = alloc_workqueue("acrn_irqfd-%u", 0, 0, vm->vmid);
+	if (!vm->irqfd_wq)
+		return -ENOMEM;
+
+	dev_dbg(acrn_dev.this_device, "VM %u irqfd init.\n", vm->vmid);
+	return 0;
+}
+
+void acrn_irqfd_deinit(struct acrn_vm *vm)
+{
+	struct hsm_irqfd *irqfd, *next;
+
+	dev_dbg(acrn_dev.this_device, "VM %u irqfd deinit.\n", vm->vmid);
+	destroy_workqueue(vm->irqfd_wq);
+	mutex_lock(&vm->irqfds_lock);
+	list_for_each_entry_safe(irqfd, next, &vm->irqfds, list)
+		hsm_irqfd_shutdown(irqfd);
+	mutex_unlock(&vm->irqfds_lock);
+}
diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c
new file mode 100644
index 000000000000..c4f2e15c8a2b
--- /dev/null
+++ b/drivers/virt/acrn/mm.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN: Memory mapping management
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *	Fei Li <lei1.li@intel.com>
+ *	Shuo Liu <shuo.a.liu@intel.com>
+ */
+
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+static int modify_region(struct acrn_vm *vm, struct vm_memory_region_op *region)
+{
+	struct vm_memory_region_batch *regions;
+	int ret;
+
+	regions = kzalloc(sizeof(*regions), GFP_KERNEL);
+	if (!regions)
+		return -ENOMEM;
+
+	regions->vmid = vm->vmid;
+	regions->regions_num = 1;
+	regions->regions_gpa = virt_to_phys(region);
+
+	ret = hcall_set_memory_regions(virt_to_phys(regions));
+	if (ret < 0)
+		dev_dbg(acrn_dev.this_device,
+			"Failed to set memory region for VM[%u]!\n", vm->vmid);
+
+	kfree(regions);
+	return ret;
+}
+
+/**
+ * acrn_mm_region_add() - Set up the EPT mapping of a memory region.
+ * @vm:			User VM.
+ * @user_gpa:		A GPA of User VM.
+ * @service_gpa:	A GPA of Service VM.
+ * @size:		Size of the region.
+ * @mem_type:		Combination of ACRN_MEM_TYPE_*.
+ * @mem_access_right:	Combination of ACRN_MEM_ACCESS_*.
+ *
+ * Return: 0 on success, <0 on error.
+ */
+int acrn_mm_region_add(struct acrn_vm *vm, u64 user_gpa, u64 service_gpa,
+		       u64 size, u32 mem_type, u32 mem_access_right)
+{
+	struct vm_memory_region_op *region;
+	int ret = 0;
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	region->type = ACRN_MEM_REGION_ADD;
+	region->user_vm_pa = user_gpa;
+	region->service_vm_pa = service_gpa;
+	region->size = size;
+	region->attr = ((mem_type & ACRN_MEM_TYPE_MASK) |
+			(mem_access_right & ACRN_MEM_ACCESS_RIGHT_MASK));
+	ret = modify_region(vm, region);
+
+	dev_dbg(acrn_dev.this_device,
+		"%s: user-GPA[%pK] service-GPA[%pK] size[0x%llx].\n",
+		__func__, (void *)user_gpa, (void *)service_gpa, size);
+	kfree(region);
+	return ret;
+}
+
+/**
+ * acrn_mm_region_del() - Del the EPT mapping of a memory region.
+ * @vm:		User VM.
+ * @user_gpa:	A GPA of the User VM.
+ * @size:	Size of the region.
+ *
+ * Return: 0 on success, <0 for error.
+ */
+int acrn_mm_region_del(struct acrn_vm *vm, u64 user_gpa, u64 size)
+{
+	struct vm_memory_region_op *region;
+	int ret = 0;
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	region->type = ACRN_MEM_REGION_DEL;
+	region->user_vm_pa = user_gpa;
+	region->service_vm_pa = 0UL;
+	region->size = size;
+	region->attr = 0U;
+
+	ret = modify_region(vm, region);
+
+	dev_dbg(acrn_dev.this_device, "%s: user-GPA[%pK] size[0x%llx].\n",
+		__func__, (void *)user_gpa, size);
+	kfree(region);
+	return ret;
+}
+
+int acrn_vm_memseg_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
+{
+	int ret;
+
+	if (memmap->type == ACRN_MEMMAP_RAM)
+		return acrn_vm_ram_map(vm, memmap);
+
+	if (memmap->type != ACRN_MEMMAP_MMIO) {
+		dev_dbg(acrn_dev.this_device,
+			"Invalid memmap type: %u\n", memmap->type);
+		return -EINVAL;
+	}
+
+	ret = acrn_mm_region_add(vm, memmap->user_vm_pa,
+				 memmap->service_vm_pa, memmap->len,
+				 ACRN_MEM_TYPE_UC, memmap->attr);
+	if (ret < 0)
+		dev_dbg(acrn_dev.this_device,
+			"Add memory region failed, VM[%u]!\n", vm->vmid);
+
+	return ret;
+}
+
+int acrn_vm_memseg_unmap(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
+{
+	int ret;
+
+	if (memmap->type != ACRN_MEMMAP_MMIO) {
+		dev_dbg(acrn_dev.this_device,
+			"Invalid memmap type: %u\n", memmap->type);
+		return -EINVAL;
+	}
+
+	ret = acrn_mm_region_del(vm, memmap->user_vm_pa, memmap->len);
+	if (ret < 0)
+		dev_dbg(acrn_dev.this_device,
+			"Del memory region failed, VM[%u]!\n", vm->vmid);
+
+	return ret;
+}
+
+/**
+ * acrn_vm_ram_map() - Create a RAM EPT mapping of User VM.
+ * @vm:		The User VM pointer
+ * @memmap:	Info of the EPT mapping
+ *
+ * Return: 0 on success, <0 for error.
+ */
+int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
+{
+	struct vm_memory_region_batch *regions_info;
+	int nr_pages, i = 0, order, nr_regions = 0;
+	struct vm_memory_mapping *region_mapping;
+	struct vm_memory_region_op *vm_region;
+	struct page **pages = NULL, *page;
+	void *remap_vaddr;
+	int ret, pinned;
+	u64 user_vm_pa;
+
+	if (!vm || !memmap)
+		return -EINVAL;
+
+	/* Get the page number of the map region */
+	nr_pages = memmap->len >> PAGE_SHIFT;
+	pages = vzalloc(nr_pages * sizeof(struct page *));
+	if (!pages)
+		return -ENOMEM;
+
+	/* Lock the pages of user memory map region */
+	pinned = pin_user_pages_fast(memmap->vma_base,
+				     nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+				     pages);
+	if (pinned < 0) {
+		ret = pinned;
+		goto free_pages;
+	} else if (pinned != nr_pages) {
+		ret = -EFAULT;
+		goto put_pages;
+	}
+
+	/* Create a kernel map for the map region */
+	remap_vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!remap_vaddr) {
+		ret = -ENOMEM;
+		goto put_pages;
+	}
+
+	/* Record Service VM va <-> User VM pa mapping */
+	mutex_lock(&vm->regions_mapping_lock);
+	region_mapping = &vm->regions_mapping[vm->regions_mapping_count];
+	if (vm->regions_mapping_count < ACRN_MEM_MAPPING_MAX) {
+		region_mapping->pages = pages;
+		region_mapping->npages = nr_pages;
+		region_mapping->size = memmap->len;
+		region_mapping->service_vm_va = remap_vaddr;
+		region_mapping->user_vm_pa = memmap->user_vm_pa;
+		vm->regions_mapping_count++;
+	} else {
+		dev_warn(acrn_dev.this_device,
+			"Run out of memory mapping slots!\n");
+		ret = -ENOMEM;
+		mutex_unlock(&vm->regions_mapping_lock);
+		goto unmap_no_count;
+	}
+	mutex_unlock(&vm->regions_mapping_lock);
+
+	/* Calculate count of vm_memory_region_op */
+	while (i < nr_pages) {
+		page = pages[i];
+		VM_BUG_ON_PAGE(PageTail(page), page);
+		order = compound_order(page);
+		nr_regions++;
+		i += 1 << order;
+	}
+
+	/* Prepare the vm_memory_region_batch */
+	regions_info = kzalloc(sizeof(*regions_info) +
+			       sizeof(*vm_region) * nr_regions,
+			       GFP_KERNEL);
+	if (!regions_info) {
+		ret = -ENOMEM;
+		goto unmap_kernel_map;
+	}
+
+	/* Fill each vm_memory_region_op */
+	vm_region = (struct vm_memory_region_op *)(regions_info + 1);
+	regions_info->vmid = vm->vmid;
+	regions_info->regions_num = nr_regions;
+	regions_info->regions_gpa = virt_to_phys(vm_region);
+	user_vm_pa = memmap->user_vm_pa;
+	i = 0;
+	while (i < nr_pages) {
+		u32 region_size;
+
+		page = pages[i];
+		VM_BUG_ON_PAGE(PageTail(page), page);
+		order = compound_order(page);
+		region_size = PAGE_SIZE << order;
+		vm_region->type = ACRN_MEM_REGION_ADD;
+		vm_region->user_vm_pa = user_vm_pa;
+		vm_region->service_vm_pa = page_to_phys(page);
+		vm_region->size = region_size;
+		vm_region->attr = (ACRN_MEM_TYPE_WB & ACRN_MEM_TYPE_MASK) |
+				  (memmap->attr & ACRN_MEM_ACCESS_RIGHT_MASK);
+
+		vm_region++;
+		user_vm_pa += region_size;
+		i += 1 << order;
+	}
+
+	/* Inform the ACRN Hypervisor to set up EPT mappings */
+	ret = hcall_set_memory_regions(virt_to_phys(regions_info));
+	if (ret < 0) {
+		dev_dbg(acrn_dev.this_device,
+			"Failed to set regions, VM[%u]!\n", vm->vmid);
+		goto unset_region;
+	}
+	kfree(regions_info);
+
+	dev_dbg(acrn_dev.this_device,
+		"%s: VM[%u] service-GVA[%pK] user-GPA[%pK] size[0x%llx]\n",
+		__func__, vm->vmid,
+		remap_vaddr, (void *)memmap->user_vm_pa, memmap->len);
+	return ret;
+
+unset_region:
+	kfree(regions_info);
+unmap_kernel_map:
+	mutex_lock(&vm->regions_mapping_lock);
+	vm->regions_mapping_count--;
+	mutex_unlock(&vm->regions_mapping_lock);
+unmap_no_count:
+	vunmap(remap_vaddr);
+put_pages:
+	for (i = 0; i < pinned; i++)
+		unpin_user_page(pages[i]);
+free_pages:
+	vfree(pages);
+	return ret;
+}
+
+/**
+ * acrn_vm_all_ram_unmap() - Destroy a RAM EPT mapping of User VM.
+ * @vm:	The User VM
+ */
+void acrn_vm_all_ram_unmap(struct acrn_vm *vm)
+{
+	struct vm_memory_mapping *region_mapping;
+	int i, j;
+
+	mutex_lock(&vm->regions_mapping_lock);
+	for (i = 0; i < vm->regions_mapping_count; i++) {
+		region_mapping = &vm->regions_mapping[i];
+		vunmap(region_mapping->service_vm_va);
+		for (j = 0; j < region_mapping->npages; j++)
+			unpin_user_page(region_mapping->pages[j]);
+		vfree(region_mapping->pages);
+	}
+	mutex_unlock(&vm->regions_mapping_lock);
+}
diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c
new file mode 100644
index 000000000000..7804a2492ad7
--- /dev/null
+++ b/drivers/virt/acrn/vm.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN_HSM: Virtual Machine management
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *	Jason Chen CJ <jason.cj.chen@intel.com>
+ *	Yakui Zhao <yakui.zhao@intel.com>
+ */
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+/* List of VMs */
+LIST_HEAD(acrn_vm_list);
+/*
+ * acrn_vm_list is read in a worker thread which dispatch I/O requests and
+ * is wrote in VM creation ioctl. Use the rwlock mechanism to protect it.
+ */
+DEFINE_RWLOCK(acrn_vm_list_lock);
+
+struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
+			       struct acrn_vm_creation *vm_param)
+{
+	int ret;
+
+	ret = hcall_create_vm(virt_to_phys(vm_param));
+	if (ret < 0 || vm_param->vmid == ACRN_INVALID_VMID) {
+		dev_err(acrn_dev.this_device,
+			"Failed to create VM! Error: %d\n", ret);
+		return NULL;
+	}
+
+	mutex_init(&vm->regions_mapping_lock);
+	INIT_LIST_HEAD(&vm->ioreq_clients);
+	spin_lock_init(&vm->ioreq_clients_lock);
+	vm->vmid = vm_param->vmid;
+	vm->vcpu_num = vm_param->vcpu_num;
+
+	if (acrn_ioreq_init(vm, vm_param->ioreq_buf) < 0) {
+		hcall_destroy_vm(vm_param->vmid);
+		vm->vmid = ACRN_INVALID_VMID;
+		return NULL;
+	}
+
+	write_lock_bh(&acrn_vm_list_lock);
+	list_add(&vm->list, &acrn_vm_list);
+	write_unlock_bh(&acrn_vm_list_lock);
+
+	acrn_ioeventfd_init(vm);
+	acrn_irqfd_init(vm);
+	dev_dbg(acrn_dev.this_device, "VM %u created.\n", vm->vmid);
+	return vm;
+}
+
+int acrn_vm_destroy(struct acrn_vm *vm)
+{
+	int ret;
+
+	if (vm->vmid == ACRN_INVALID_VMID ||
+	    test_and_set_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags))
+		return 0;
+
+	/* Remove from global VM list */
+	write_lock_bh(&acrn_vm_list_lock);
+	list_del_init(&vm->list);
+	write_unlock_bh(&acrn_vm_list_lock);
+
+	acrn_ioeventfd_deinit(vm);
+	acrn_irqfd_deinit(vm);
+	acrn_ioreq_deinit(vm);
+
+	if (vm->monitor_page) {
+		put_page(vm->monitor_page);
+		vm->monitor_page = NULL;
+	}
+
+	ret = hcall_destroy_vm(vm->vmid);
+	if (ret < 0) {
+		dev_err(acrn_dev.this_device,
+			"Failed to destroy VM %u\n", vm->vmid);
+		clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags);
+		return ret;
+	}
+
+	acrn_vm_all_ram_unmap(vm);
+
+	dev_dbg(acrn_dev.this_device, "VM %u destroyed.\n", vm->vmid);
+	vm->vmid = ACRN_INVALID_VMID;
+	return 0;
+}
+
+/**
+ * acrn_inject_msi() - Inject a MSI interrupt into a User VM
+ * @vm:		User VM
+ * @msi_addr:	The MSI address
+ * @msi_data:	The MSI data
+ *
+ * Return: 0 on success, <0 on error
+ */
+int acrn_msi_inject(struct acrn_vm *vm, u64 msi_addr, u64 msi_data)
+{
+	struct acrn_msi_entry *msi;
+	int ret;
+
+	/* might be used in interrupt context, so use GFP_ATOMIC */
+	msi = kzalloc(sizeof(*msi), GFP_ATOMIC);
+	if (!msi)
+		return -ENOMEM;
+
+	/*
+	 * msi_addr: addr[19:12] with dest vcpu id
+	 * msi_data: data[7:0] with vector
+	 */
+	msi->msi_addr = msi_addr;
+	msi->msi_data = msi_data;
+	ret = hcall_inject_msi(vm->vmid, virt_to_phys(msi));
+	if (ret < 0)
+		dev_err(acrn_dev.this_device,
+			"Failed to inject MSI to VM %u!\n", vm->vmid);
+	kfree(msi);
+	return ret;
+}
diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c
index ea05af41ec69..8d195e3f8301 100644
--- a/drivers/virt/vboxguest/vboxguest_utils.c
+++ b/drivers/virt/vboxguest/vboxguest_utils.c
@@ -468,7 +468,7 @@ static int hgcm_cancel_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call)
  *               Cancellation fun.
  */
 static int vbg_hgcm_do_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call,
-			    u32 timeout_ms, bool *leak_it)
+			    u32 timeout_ms, bool interruptible, bool *leak_it)
 {
 	int rc, cancel_rc, ret;
 	long timeout;
@@ -495,10 +495,15 @@ static int vbg_hgcm_do_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call,
 	else
 		timeout = msecs_to_jiffies(timeout_ms);
 
-	timeout = wait_event_interruptible_timeout(
-					gdev->hgcm_wq,
-					hgcm_req_done(gdev, &call->header),
-					timeout);
+	if (interruptible) {
+		timeout = wait_event_interruptible_timeout(gdev->hgcm_wq,
+							   hgcm_req_done(gdev, &call->header),
+							   timeout);
+	} else {
+		timeout = wait_event_timeout(gdev->hgcm_wq,
+					     hgcm_req_done(gdev, &call->header),
+					     timeout);
+	}
 
 	/* timeout > 0 means hgcm_req_done has returned true, so success */
 	if (timeout > 0)
@@ -631,7 +636,8 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id,
 	hgcm_call_init_call(call, client_id, function, parms, parm_count,
 			    bounce_bufs);
 
-	ret = vbg_hgcm_do_call(gdev, call, timeout_ms, &leak_it);
+	ret = vbg_hgcm_do_call(gdev, call, timeout_ms,
+			       requestor & VMMDEV_REQUESTOR_USERMODE, &leak_it);
 	if (ret == 0) {
 		*vbox_status = call->header.result;
 		ret = hgcm_call_copy_back_result(call, parms, parm_count,
diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c
index 54d7963c1078..1b15afea28ee 100644
--- a/drivers/vme/vme.c
+++ b/drivers/vme/vme.c
@@ -1997,9 +1997,9 @@ static int vme_bus_remove(struct device *dev)
 
 	driver = dev->platform_data;
 	if (driver->remove)
-		return driver->remove(vdev);
+		driver->remove(vdev);
 
-	return -ENODEV;
+	return 0;
 }
 
 struct bus_type vme_bus_type = {
diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c
index e17c8f70dcd0..cd8821580f71 100644
--- a/drivers/w1/masters/ds2490.c
+++ b/drivers/w1/masters/ds2490.c
@@ -688,12 +688,22 @@ static void ds9490r_search(void *data, struct w1_master *master,
 	 * packet size.
 	 */
 	const size_t bufsize = 2 * 64;
-	u64 *buf;
+	u64 *buf, *found_ids;
 
 	buf = kmalloc(bufsize, GFP_KERNEL);
 	if (!buf)
 		return;
 
+	/*
+	 * We are holding the bus mutex during the scan, but adding devices via the
+	 * callback needs the bus to be unlocked. So we queue up found ids here.
+	 */
+	found_ids = kmalloc_array(master->max_slave_count, sizeof(u64), GFP_KERNEL);
+	if (!found_ids) {
+		kfree(buf);
+		return;
+	}
+
 	mutex_lock(&master->bus_mutex);
 
 	/* address to start searching at */
@@ -729,13 +739,13 @@ static void ds9490r_search(void *data, struct w1_master *master,
 			if (err < 0)
 				break;
 			for (i = 0; i < err/8; ++i) {
-				++found;
-				if (found <= search_limit)
-					callback(master, buf[i]);
+				found_ids[found++] = buf[i];
 				/* can't know if there will be a discrepancy
 				 * value after until the next id */
-				if (found == search_limit)
+				if (found == search_limit) {
 					master->search_id = buf[i];
+					break;
+				}
 			}
 		}
 
@@ -759,9 +769,14 @@ static void ds9490r_search(void *data, struct w1_master *master,
 			master->max_slave_count);
 		set_bit(W1_WARN_MAX_COUNT, &master->flags);
 	}
+
 search_out:
 	mutex_unlock(&master->bus_mutex);
 	kfree(buf);
+
+	for (i = 0; i < found; i++) /* run callback for all queued up IDs */
+		callback(master, found_ids[i]);
+	kfree(found_ids);
 }
 
 #if 0
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 3712b1e6dc71..976eea28f268 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -667,28 +667,24 @@ static inline int w1_DS18B20_get_resolution(struct w1_slave *sl)
  */
 static inline int w1_DS18B20_convert_temp(u8 rom[9])
 {
-	int t;
-	u32 bv;
+	u16 bv;
+	s16 t;
+
+	/* Signed 16-bit value to unsigned, cpu order */
+	bv = le16_to_cpup((__le16 *)rom);
 
 	/* Config register bit R2 = 1 - GX20MH01 in 13 or 14 bit resolution mode */
 	if (rom[4] & 0x80) {
-		/* Signed 16-bit value to unsigned, cpu order */
-		bv = le16_to_cpup((__le16 *)rom);
-
 		/* Insert two temperature bits from config register */
 		/* Avoid arithmetic shift of signed value */
 		bv = (bv << 2) | (rom[4] & 3);
-
-		t = (int) sign_extend32(bv, 17); /* Degrees, lowest bit is 2^-6 */
-		return (t*1000)/64;  /* Millidegrees */
+		t = (s16) bv;	/* Degrees, lowest bit is 2^-6 */
+		return (int)t * 1000 / 64;	/* Sign-extend to int; millidegrees */
 	}
-
-	t = (int)le16_to_cpup((__le16 *)rom);
-	return t*1000/16;
+	t = (s16)bv;	/* Degrees, lowest bit is 2^-4 */
+	return (int)t * 1000 / 16;	/* Sign-extend to int; millidegrees */
 }
 
-
-
 /**
  * w1_DS18S20_convert_temp() - temperature computation for DS18S20
  * @rom: data read from device RAM (8 data bytes + 1 CRC byte)
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 15a2ee32f116..f2ae2e563dc5 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -25,6 +25,8 @@
 #include "w1_netlink.h"
 
 #define W1_FAMILY_DEFAULT	0
+#define W1_FAMILY_DS28E04       0x1C /* for crc quirk */
+
 
 static int w1_timeout = 10;
 module_param_named(timeout, w1_timeout, int, 0);
@@ -913,11 +915,44 @@ void w1_reconnect_slaves(struct w1_family *f, int attach)
 	mutex_unlock(&w1_mlock);
 }
 
+static int w1_addr_crc_is_valid(struct w1_master *dev, u64 rn)
+{
+	u64 rn_le = cpu_to_le64(rn);
+	struct w1_reg_num *tmp = (struct w1_reg_num *)&rn;
+	u8 crc;
+
+	crc = w1_calc_crc8((u8 *)&rn_le, 7);
+
+	/* quirk:
+	 *   DS28E04 (1w eeprom) has strapping pins to change
+	 *   address, but will not update the crc. So normal rules
+	 *   for consistent w1 addresses are violated. We test
+	 *   with the 7 LSBs of the address forced high.
+	 *
+	 *   (char*)&rn_le = { family, addr_lsb, ..., addr_msb, crc }.
+	 */
+	if (crc != tmp->crc && tmp->family == W1_FAMILY_DS28E04) {
+		u64 corr_le = rn_le;
+
+		((u8 *)&corr_le)[1] |= 0x7f;
+		crc = w1_calc_crc8((u8 *)&corr_le, 7);
+
+		dev_info(&dev->dev, "DS28E04 crc workaround on %02x.%012llx.%02x\n",
+			tmp->family, (unsigned long long)tmp->id, tmp->crc);
+	}
+
+	if (crc != tmp->crc) {
+		dev_dbg(&dev->dev, "w1 addr crc mismatch: %02x.%012llx.%02x != 0x%02x.\n",
+			tmp->family, (unsigned long long)tmp->id, tmp->crc, crc);
+		return 0;
+	}
+	return 1;
+}
+
 void w1_slave_found(struct w1_master *dev, u64 rn)
 {
 	struct w1_slave *sl;
 	struct w1_reg_num *tmp;
-	u64 rn_le = cpu_to_le64(rn);
 
 	atomic_inc(&dev->refcnt);
 
@@ -927,7 +962,7 @@ void w1_slave_found(struct w1_master *dev, u64 rn)
 	if (sl) {
 		set_bit(W1_SLAVE_ACTIVE, &sl->flags);
 	} else {
-		if (rn && tmp->crc == w1_calc_crc8((u8 *)&rn_le, 7))
+		if (rn && w1_addr_crc_is_valid(dev, rn))
 			w1_attach_slave_device(dev, tmp);
 	}
 
diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c
index c5967d8b4256..e023d7d90d66 100644
--- a/drivers/watchdog/mei_wdt.c
+++ b/drivers/watchdog/mei_wdt.c
@@ -620,7 +620,7 @@ err_out:
 	return ret;
 }
 
-static int mei_wdt_remove(struct mei_cl_device *cldev)
+static void mei_wdt_remove(struct mei_cl_device *cldev)
 {
 	struct mei_wdt *wdt = mei_cldev_get_drvdata(cldev);
 
@@ -637,8 +637,6 @@ static int mei_wdt_remove(struct mei_cl_device *cldev)
 	dbgfs_unregister(wdt);
 
 	kfree(wdt);
-
-	return 0;
 }
 
 #define MEI_UUID_WD UUID_LE(0x05B79A6F, 0x4628, 0x4D7F, \
author	Linus Torvalds <torvalds@linux-foundation.org>	2021-02-24 10:25:37 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2021-02-24 10:25:37 -0800
commit	e229b429bb4af24d9828758c0c851bb6a4169400 (patch)
tree	95e49922f6c68b5f81cbf7a39349cfad42c5a0f1 /drivers
parent	7ac1161c2789be25d0d206e831b051f43028866e (diff)
parent	d19db80a366576d3ffadf2508ed876b4c1faf959 (diff)