From 46f7ac3d7892e808c9ba01c39da6bb85cda26ecd Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 3 Aug 2022 15:58:57 +0100
Subject: spi: bitbang: Fix lsb-first Rx

Shifting the recieved bit by "bits" inserts it at the top of the
*currently remaining* Tx data, so we end up accumulating the whole
transfer into bit 0 of the output word. Oops.

For the algorithm to work as intended, we need to remember where the
top of the *original* word was, and shift Rx to there.

Fixes: 1847e3046c52 ("spi: gpio: Implement LSB First bitbang support")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/28324d8622da80461cce35a82859b003d6f6c4b0.1659538737.git.robin.murphy@arm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bitbang-txrx.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/spi')

diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h
index 267342dfa738..2dcbe166df63 100644
--- a/drivers/spi/spi-bitbang-txrx.h
+++ b/drivers/spi/spi-bitbang-txrx.h
@@ -116,6 +116,7 @@ bitbang_txrx_le_cpha0(struct spi_device *spi,
 {
 	/* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
 
+	u8 rxbit = bits - 1;
 	u32 oldbit = !(word & 1);
 	/* clock starts at inactive polarity */
 	for (; likely(bits); bits--) {
@@ -135,7 +136,7 @@ bitbang_txrx_le_cpha0(struct spi_device *spi,
 		/* sample LSB (from slave) on leading edge */
 		word >>= 1;
 		if ((flags & SPI_MASTER_NO_RX) == 0)
-			word |= getmiso(spi) << (bits - 1);
+			word |= getmiso(spi) << rxbit;
 		setsck(spi, cpol);
 	}
 	return word;
@@ -148,6 +149,7 @@ bitbang_txrx_le_cpha1(struct spi_device *spi,
 {
 	/* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
 
+	u8 rxbit = bits - 1;
 	u32 oldbit = !(word & 1);
 	/* clock starts at inactive polarity */
 	for (; likely(bits); bits--) {
@@ -168,7 +170,7 @@ bitbang_txrx_le_cpha1(struct spi_device *spi,
 		/* sample LSB (from slave) on trailing edge */
 		word >>= 1;
 		if ((flags & SPI_MASTER_NO_RX) == 0)
-			word |= getmiso(spi) << (bits - 1);
+			word |= getmiso(spi) << rxbit;
 	}
 	return word;
 }
-- 
cgit 


From 9ee5b6d53b8c99d13a47227e3b7052a1365556c9 Mon Sep 17 00:00:00 2001
From: Niravkumar L Rabara <niravkumar.l.rabara@intel.com>
Date: Sat, 13 Aug 2022 12:26:16 +0800
Subject: spi: cadence-quadspi: Disable irqs during indirect reads

On architecture where reading the SRAM is slower than the pace at
controller fills it, with interrupt enabled while reading from
SRAM FIFO causes unwanted interrupt storm to CPU.

The inner "bytes to read" loop never exits and waits for the completion
so it is enough to only enable the watermark interrupt when we
are out of bytes to read, which only happens when we start the
transfer (waiting for the FIFO to fill up initially) if the SRAM
is slow.

So only using read watermark interrupt, as the current implementation
doesn't utilize the SRAM full and indirect complete read interrupt.
And disable all the read interrupts while reading from SRAM.

Signed-off-by: Niravkumar L Rabara <niravkumar.l.rabara@intel.com>
Link: https://lore.kernel.org/r/20220813042616.1372110-1-niravkumar.l.rabara@intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-quadspi.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

(limited to 'drivers/spi')

diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 72b1a5a2298c..e12ab5b43f34 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -39,6 +39,7 @@
 #define CQSPI_DISABLE_DAC_MODE		BIT(1)
 #define CQSPI_SUPPORT_EXTERNAL_DMA	BIT(2)
 #define CQSPI_NO_SUPPORT_WR_COMPLETION	BIT(3)
+#define CQSPI_SLOW_SRAM		BIT(4)
 
 /* Capabilities */
 #define CQSPI_SUPPORTS_OCTAL		BIT(0)
@@ -87,6 +88,7 @@ struct cqspi_st {
 	bool			use_dma_read;
 	u32			pd_dev_id;
 	bool			wr_completion;
+	bool			slow_sram;
 };
 
 struct cqspi_driver_platdata {
@@ -333,7 +335,10 @@ static irqreturn_t cqspi_irq_handler(int this_irq, void *dev)
 		}
 	}
 
-	irq_status &= CQSPI_IRQ_MASK_RD | CQSPI_IRQ_MASK_WR;
+	else if (!cqspi->slow_sram)
+		irq_status &= CQSPI_IRQ_MASK_RD | CQSPI_IRQ_MASK_WR;
+	else
+		irq_status &= CQSPI_REG_IRQ_WATERMARK | CQSPI_IRQ_MASK_WR;
 
 	if (irq_status)
 		complete(&cqspi->transfer_complete);
@@ -673,7 +678,18 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
 	/* Clear all interrupts. */
 	writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS);
 
-	writel(CQSPI_IRQ_MASK_RD, reg_base + CQSPI_REG_IRQMASK);
+	/*
+	 * On SoCFPGA platform reading the SRAM is slow due to
+	 * hardware limitation and causing read interrupt storm to CPU,
+	 * so enabling only watermark interrupt to disable all read
+	 * interrupts later as we want to run "bytes to read" loop with
+	 * all the read interrupts disabled for max performance.
+	 */
+
+	if (!cqspi->slow_sram)
+		writel(CQSPI_IRQ_MASK_RD, reg_base + CQSPI_REG_IRQMASK);
+	else
+		writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);
 
 	reinit_completion(&cqspi->transfer_complete);
 	writel(CQSPI_REG_INDIRECTRD_START_MASK,
@@ -684,6 +700,13 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
 						 msecs_to_jiffies(CQSPI_READ_TIMEOUT_MS)))
 			ret = -ETIMEDOUT;
 
+		/*
+		 * Disable all read interrupts until
+		 * we are out of "bytes to read"
+		 */
+		if (cqspi->slow_sram)
+			writel(0x0, reg_base + CQSPI_REG_IRQMASK);
+
 		bytes_to_read = cqspi_get_rd_sram_level(cqspi);
 
 		if (ret && bytes_to_read == 0) {
@@ -715,8 +738,11 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
 			bytes_to_read = cqspi_get_rd_sram_level(cqspi);
 		}
 
-		if (remaining > 0)
+		if (remaining > 0) {
 			reinit_completion(&cqspi->transfer_complete);
+			if (cqspi->slow_sram)
+				writel(CQSPI_REG_IRQ_WATERMARK, reg_base + CQSPI_REG_IRQMASK);
+		}
 	}
 
 	/* Check indirect done status */
@@ -1667,6 +1693,8 @@ static int cqspi_probe(struct platform_device *pdev)
 			cqspi->use_dma_read = true;
 		if (ddata->quirks & CQSPI_NO_SUPPORT_WR_COMPLETION)
 			cqspi->wr_completion = false;
+		if (ddata->quirks & CQSPI_SLOW_SRAM)
+			cqspi->slow_sram = true;
 
 		if (of_device_is_compatible(pdev->dev.of_node,
 					    "xlnx,versal-ospi-1.0"))
@@ -1779,7 +1807,9 @@ static const struct cqspi_driver_platdata intel_lgm_qspi = {
 };
 
 static const struct cqspi_driver_platdata socfpga_qspi = {
-	.quirks = CQSPI_DISABLE_DAC_MODE | CQSPI_NO_SUPPORT_WR_COMPLETION,
+	.quirks = CQSPI_DISABLE_DAC_MODE
+			| CQSPI_NO_SUPPORT_WR_COMPLETION
+			| CQSPI_SLOW_SRAM,
 };
 
 static const struct cqspi_driver_platdata versal_ospi = {
-- 
cgit 


From b30f7c8eb0780e1479a9882526e838664271f4c9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 1 Sep 2022 13:07:32 +0100
Subject: spi: mux: Fix mux interaction with fast path optimisations

The spi-mux driver is rather too clever and attempts to resubmit any
message that is submitted to it to the parent controller with some
adjusted callbacks.  This does not play at all nicely with the fast
path which now sets flags on the message indicating that it's being
handled through the fast path, we see async messages flagged as being on
the fast path.  Ideally the spi-mux code would duplicate the message but
that's rather invasive and a bit fragile in that it relies on the mux
knowing which fields in the message to copy.  Instead teach the core
that there are controllers which can't cope with the fast path and have
the mux flag itself as being such a controller, ensuring that messages
going via the mux don't get partially handled via the fast path.

This will reduce the performance of any spi-mux connected device since
we'll now always use the thread for both the actual controller and the
mux controller instead of just the actual controller but given that we
were always hitting the slow path anyway it's hopefully not too much of
an additional cost and it allows us to keep the fast path.

Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync")
Reported-by: Casper Andersson <casper.casan@gmail.com>
Tested-by: Casper Andersson <casper.casan@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220901120732.49245-1-broonie@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mux.c | 1 +
 drivers/spi/spi.c     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/spi')

diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c
index f5d32ec4634e..0709e987bd5a 100644
--- a/drivers/spi/spi-mux.c
+++ b/drivers/spi/spi-mux.c
@@ -161,6 +161,7 @@ static int spi_mux_probe(struct spi_device *spi)
 	ctlr->num_chipselect = mux_control_states(priv->mux);
 	ctlr->bus_num = -1;
 	ctlr->dev.of_node = spi->dev.of_node;
+	ctlr->must_async = true;
 
 	ret = devm_spi_register_controller(&spi->dev, ctlr);
 	if (ret)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8f97a3eacdea..5cbe090f7676 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -4033,7 +4033,7 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
 	 * guard against reentrancy from a different context. The io_mutex
 	 * will catch those cases.
 	 */
-	if (READ_ONCE(ctlr->queue_empty)) {
+	if (READ_ONCE(ctlr->queue_empty) && !ctlr->must_async) {
 		message->actual_length = 0;
 		message->status = -EINPROGRESS;
 
-- 
cgit 


From 9c9c9da7aa108e6bf952c18289527a5234e4fc59 Mon Sep 17 00:00:00 2001
From: David Jander <david@protonic.nl>
Date: Thu, 1 Sep 2022 14:36:30 +0200
Subject: spi: spi: Fix queue hang if previous transfer failed

The queue worker always needs to be kicked one final time after a transfer
is done in order to transition to idle (ctlr->busy = false).

Commit 69fa95905d40 ("spi: Ensure the io_mutex is held until
spi_finalize_current_message()") moved this code into
__spi_pump_messages(), but it was executed only if the transfer was
successful. This condition check causes ctlr-busy to stay true in case of
a failed transfer.
This in turn causes that no new work is ever scheduled to the work queue.

Fixes: 69fa95905d40 ("spi: Ensure the io_mutex is held until spi_finalize_current_message()")
Reported-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: David Jander <david@protonic.nl>
Link: https://lore.kernel.org/r/20220901123630.1098433-1-david@protonic.nl
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/spi')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 5cbe090f7676..f7852c2c186c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1727,8 +1727,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
 	ret = __spi_pump_transfer_message(ctlr, msg, was_busy);
-	if (!ret)
-		kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
+	kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
 
 	ctlr->cur_msg = NULL;
 	ctlr->fallback = false;
-- 
cgit