summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS10
-rw-r--r--block/bio.c43
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-iolatency.c1
-rw-r--r--block/blk-mq.c8
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/blk-sysfs.c12
-rw-r--r--drivers/auxdisplay/Kconfig38
-rw-r--r--drivers/auxdisplay/Makefile2
-rw-r--r--drivers/auxdisplay/charlcd.c55
-rw-r--r--drivers/auxdisplay/hd44780.c4
-rw-r--r--drivers/auxdisplay/panel.c4
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/paride/pcd.c6
-rw-r--r--drivers/block/paride/pf.c16
-rw-r--r--drivers/block/rbd.c28
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c6
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c23
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c7
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c2
-rw-r--r--drivers/scsi/scsi_lib.c15
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c2
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/io_uring.c439
-rw-r--r--fs/iomap.c12
-rw-r--r--include/linux/blk-mq.h3
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/blkdev.h3
-rw-r--r--include/linux/ceph/libceph.h2
-rw-r--r--include/linux/sbitmap.h2
-rw-r--r--include/linux/uio.h24
-rw-r--r--include/misc/charlcd.h1
-rw-r--r--net/ceph/ceph_common.c18
-rw-r--r--net/ceph/mon_client.c9
34 files changed, 479 insertions, 342 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index e17ebf70b548..3e5a5d263f29 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8096,6 +8096,16 @@ F: include/linux/iommu.h
F: include/linux/of_iommu.h
F: include/linux/iova.h
+IO_URING
+M: Jens Axboe <axboe@kernel.dk>
+L: linux-block@vger.kernel.org
+L: linux-fsdevel@vger.kernel.org
+T: git git://git.kernel.dk/linux-block
+T: git git://git.kernel.dk/liburing
+S: Maintained
+F: fs/io_uring.c
+F: include/uapi/linux/io_uring.h
+
IP MASQUERADING
M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
S: Maintained
diff --git a/block/bio.c b/block/bio.c
index 71a78d9fb8b7..b64cedc7f87c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
size = bio_add_page(bio, bv->bv_page, len,
bv->bv_offset + iter->iov_offset);
if (size == len) {
- struct page *page;
- int i;
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+ struct page *page;
+ int i;
+
+ mp_bvec_for_each_page(page, bv, i)
+ get_page(page);
+ }
- /*
- * For the normal O_DIRECT case, we could skip grabbing this
- * reference and then not have to put them again when IO
- * completes. But this breaks some in-kernel users, like
- * splicing to/from a loop device, where we release the pipe
- * pages unconditionally. If we can fix that case, we can
- * get rid of the get here and the need to call
- * bio_release_pages() at IO completion time.
- */
- mp_bvec_for_each_page(page, bv, i)
- get_page(page);
iov_iter_advance(iter, size);
return 0;
}
@@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* This takes either an iterator pointing to user memory, or one pointing to
* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
* map them into the kernel. On IO completion, the caller should put those
- * pages. For now, when adding kernel pages, we still grab a reference to the
- * page. This isn't strictly needed for the common case, but some call paths
- * end up releasing pages from eg a pipe and we can't easily control these.
- * See comment in __bio_iov_bvec_add_pages().
+ * pages. If we're adding kernel pages, and the caller told us it's safe to
+ * do so, we just have to add the pages to the bio directly. We don't grab an
+ * extra reference to those pages (the user should already have that), and we
+ * don't put the page on IO completion. The caller needs to check if the bio is
+ * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
+ * released.
*
* The function tries, but does not guarantee, to pin as many pages as
* fit into the bio, or are requested in *iter, whatever is smaller. If
@@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
const bool is_bvec = iov_iter_is_bvec(iter);
unsigned short orig_vcnt = bio->bi_vcnt;
+ /*
+ * If this is a BVEC iter, then the pages are kernel pages. Don't
+ * release them on IO completion, if the caller asked us to.
+ */
+ if (is_bvec && iov_iter_bvec_no_ref(iter))
+ bio_set_flag(bio, BIO_NO_PAGE_REF);
+
do {
int ret;
@@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work)
next = bio->bi_private;
bio_set_pages_dirty(bio);
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ bio_release_pages(bio);
bio_put(bio);
}
}
@@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio)
goto defer;
}
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ bio_release_pages(bio);
bio_put(bio);
return;
defer:
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 77f37ef8ef06..617a2b3f7582 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1736,8 +1736,8 @@ out:
/**
* blkcg_schedule_throttle - this task needs to check for throttling
- * @q - the request queue IO was submitted on
- * @use_memdelay - do we charge this to memory delay for PSI
+ * @q: the request queue IO was submitted on
+ * @use_memdelay: do we charge this to memory delay for PSI
*
* This is called by the IO controller when we know there's delay accumulated
* for the blkg for this task. We do not pass the blkg because there are places
@@ -1769,8 +1769,9 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
/**
* blkcg_add_delay - add delay to this blkg
- * @now - the current time in nanoseconds
- * @delta - how many nanoseconds of delay to add
+ * @blkg: blkg of interest
+ * @now: the current time in nanoseconds
+ * @delta: how many nanoseconds of delay to add
*
* Charge @delta to the blkg's current delay accumulation. This is used to
* throttle tasks if an IO controller thinks we need more throttling.
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 2620baa1f699..507212d75ee2 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -75,6 +75,7 @@
#include <linux/blk-mq.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
+#include "blk.h"
#define DEFAULT_SCALE_COOKIE 1000000U
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9c181603cbd..70b210a308c4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -782,7 +782,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
if (kick_requeue_list)
blk_mq_kick_requeue_list(q);
}
-EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q)
{
@@ -1093,8 +1092,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
bool ret;
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) {
- if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
- set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+ blk_mq_sched_mark_restart_hctx(hctx);
/*
* It's possible that a tag was freed in the window between the
@@ -2857,7 +2855,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/*
* Default to classic polling
*/
- q->poll_nsec = -1;
+ q->poll_nsec = BLK_MQ_POLL_CLASSIC;
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
blk_mq_add_queue_tag_set(set, q);
@@ -3392,7 +3390,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
{
struct request *rq;
- if (q->poll_nsec == -1)
+ if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
return false;
if (!blk_qc_t_is_internal(cookie))
diff --git a/block/blk-mq.h b/block/blk-mq.h
index c11353a3749d..0ed8e5a8729f 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -41,6 +41,8 @@ void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+ bool kick_requeue_list);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
bool blk_mq_get_driver_tag(struct request *rq);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 59685918167e..422327089e0f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -360,8 +360,8 @@ static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
{
int val;
- if (q->poll_nsec == -1)
- val = -1;
+ if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
+ val = BLK_MQ_POLL_CLASSIC;
else
val = q->poll_nsec / 1000;
@@ -380,10 +380,12 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
if (err < 0)
return err;
- if (val == -1)
- q->poll_nsec = -1;
- else
+ if (val == BLK_MQ_POLL_CLASSIC)
+ q->poll_nsec = BLK_MQ_POLL_CLASSIC;
+ else if (val >= 0)
q->poll_nsec = val * 1000;
+ else
+ return -EINVAL;
return count;
}
diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index 57410f9c5d44..c52c738e554a 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -164,9 +164,7 @@ config ARM_CHARLCD
line and the Linux version on the second line, but that's
still useful.
-endif # AUXDISPLAY
-
-menuconfig PANEL
+menuconfig PARPORT_PANEL
tristate "Parallel port LCD/Keypad Panel support"
depends on PARPORT
select CHARLCD
@@ -178,7 +176,7 @@ menuconfig PANEL
compiled as a module, or linked into the kernel and started at boot.
If you don't understand what all this is about, say N.
-if PANEL
+if PARPORT_PANEL
config PANEL_PARPORT
int "Default parallel port number (0=LPT1)"
@@ -419,8 +417,11 @@ config PANEL_LCD_PIN_BL
Default for the 'BL' pin in custom profile is '0' (uncontrolled).
+endif # PARPORT_PANEL
+
config PANEL_CHANGE_MESSAGE
bool "Change LCD initialization message ?"
+ depends on CHARLCD
default "n"
---help---
This allows you to replace the boot message indicating the kernel version
@@ -444,7 +445,34 @@ config PANEL_BOOT_MESSAGE
An empty message will only clear the display at driver init time. Any other
printf()-formatted message is valid with newline and escape codes.
-endif # PANEL
+choice
+ prompt "Backlight initial state"
+ default CHARLCD_BL_FLASH
+
+ config CHARLCD_BL_OFF
+ bool "Off"
+ help
+ Backlight is initially turned off
+
+ config CHARLCD_BL_ON
+ bool "On"
+ help
+ Backlight is initially turned on
+
+ config CHARLCD_BL_FLASH
+ bool "Flash"
+ help
+ Backlight is flashed briefly on init
+
+endchoice
+
+endif # AUXDISPLAY
+
+config PANEL
+ tristate "Parallel port LCD/Keypad Panel support (OLD OPTION)"
+ depends on PARPORT
+ select AUXDISPLAY
+ select PARPORT_PANEL
config CHARLCD
tristate "Character LCD core support" if COMPILE_TEST
diff --git a/drivers/auxdisplay/Makefile b/drivers/auxdisplay/Makefile
index 7ac6776ca3f6..cf54b5efb07e 100644
--- a/drivers/auxdisplay/Makefile
+++ b/drivers/auxdisplay/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_CFAG12864B) += cfag12864b.o cfag12864bfb.o
obj-$(CONFIG_IMG_ASCII_LCD) += img-ascii-lcd.o
obj-$(CONFIG_HD44780) += hd44780.o
obj-$(CONFIG_HT16K33) += ht16k33.o
-obj-$(CONFIG_PANEL) += panel.o
+obj-$(CONFIG_PARPORT_PANEL) += panel.o
diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index 60e0b772673f..92745efefb54 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -91,7 +91,7 @@ struct charlcd_priv {
unsigned long long drvdata[0];
};
-#define to_priv(p) container_of(p, struct charlcd_priv, lcd)
+#define charlcd_to_priv(p) container_of(p, struct charlcd_priv, lcd)
/* Device single-open policy control */
static atomic_t charlcd_available = ATOMIC_INIT(1);
@@ -105,7 +105,7 @@ static void long_sleep(int ms)
/* turn the backlight on or off */
static void charlcd_backlight(struct charlcd *lcd, int on)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
if (!lcd->ops->backlight)
return;
@@ -134,7 +134,7 @@ static void charlcd_bl_off(struct work_struct *work)
/* turn the backlight on for a little while */
void charlcd_poke(struct charlcd *lcd)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
if (!lcd->ops->backlight)
return;
@@ -152,7 +152,7 @@ EXPORT_SYMBOL_GPL(charlcd_poke);
static void charlcd_gotoxy(struct charlcd *lcd)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
unsigned int addr;
/*
@@ -170,7 +170,7 @@ static void charlcd_gotoxy(struct charlcd *lcd)
static void charlcd_home(struct charlcd *lcd)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
priv->addr.x = 0;
priv->addr.y = 0;
@@ -179,7 +179,7 @@ static void charlcd_home(struct charlcd *lcd)
static void charlcd_print(struct charlcd *lcd, char c)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
if (priv->addr.x < lcd->bwidth) {
if (lcd->char_conv)
@@ -211,7 +211,7 @@ static void charlcd_clear_fast(struct charlcd *lcd)
/* clears the display and resets X/Y */
static void charlcd_clear_display(struct charlcd *lcd)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
lcd->ops->write_cmd(lcd, LCD_CMD_DISPLAY_CLEAR);
priv->addr.x = 0;
@@ -223,7 +223,7 @@ static void charlcd_clear_display(struct charlcd *lcd)
static int charlcd_init_display(struct charlcd *lcd)
{
void (*write_cmd_raw)(struct charlcd *lcd, int cmd);
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
u8 init;
if (lcd->ifwidth != 4 && lcd->ifwidth != 8)
@@ -369,7 +369,7 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y)
static inline int handle_lcd_special_code(struct charlcd *lcd)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
/* LCD special codes */
@@ -580,7 +580,7 @@ static inline int handle_lcd_special_code(struct charlcd *lcd)
static void charlcd_write_char(struct charlcd *lcd, char c)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
/* first, we'll test if we're in escape mode */
if ((c != '\n') && priv->esc_seq.len >= 0) {
@@ -705,7 +705,7 @@ static ssize_t charlcd_write(struct file *file, const char __user *buf,
static int charlcd_open(struct inode *inode, struct file *file)
{
- struct charlcd_priv *priv = to_priv(the_charlcd);
+ struct charlcd_priv *priv = charlcd_to_priv(the_charlcd);
int ret;
ret = -EBUSY;
@@ -763,10 +763,24 @@ static void charlcd_puts(struct charlcd *lcd, const char *s)
}
}
+#ifdef CONFIG_PANEL_BOOT_MESSAGE
+#define LCD_INIT_TEXT CONFIG_PANEL_BOOT_MESSAGE
+#else
+#define LCD_INIT_TEXT "Linux-" UTS_RELEASE "\n"
+#endif
+
+#ifdef CONFIG_CHARLCD_BL_ON
+#define LCD_INIT_BL "\x1b[L+"
+#elif defined(CONFIG_CHARLCD_BL_FLASH)
+#define LCD_INIT_BL "\x1b[L*"
+#else
+#define LCD_INIT_BL "\x1b[L-"
+#endif
+
/* initialize the LCD driver */
static int charlcd_init(struct charlcd *lcd)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
int ret;
if (lcd->ops->backlight) {
@@ -784,13 +798,8 @@ static int charlcd_init(struct charlcd *lcd)
return ret;
/* display a short message */
-#ifdef CONFIG_PANEL_CHANGE_MESSAGE
-#ifdef CONFIG_PANEL_BOOT_MESSAGE
- charlcd_puts(lcd, "\x1b[Lc\x1b[Lb\x1b[L*" CONFIG_PANEL_BOOT_MESSAGE);
-#endif
-#else
- charlcd_puts(lcd, "\x1b[Lc\x1b[Lb\x1b[L*Linux-" UTS_RELEASE "\n");
-#endif
+ charlcd_puts(lcd, "\x1b[Lc\x1b[Lb" LCD_INIT_BL LCD_INIT_TEXT);
+
/* clear the display on the next device opening */
priv->must_clear = true;
charlcd_home(lcd);
@@ -818,6 +827,12 @@ struct charlcd *charlcd_alloc(unsigned int drvdata_size)
}
EXPORT_SYMBOL_GPL(charlcd_alloc);
+void charlcd_free(struct charlcd *lcd)
+{
+ kfree(charlcd_to_priv(lcd));
+}
+EXPORT_SYMBOL_GPL(charlcd_free);
+
static int panel_notify_sys(struct notifier_block *this, unsigned long code,
void *unused)
{
@@ -866,7 +881,7 @@ EXPORT_SYMBOL_GPL(charlcd_register);
int charlcd_unregister(struct charlcd *lcd)
{
- struct charlcd_priv *priv = to_priv(lcd);
+ struct charlcd_priv *priv = charlcd_to_priv(lcd);
unregister_reboot_notifier(&panel_notifier);
charlcd_puts(lcd, "\x0cLCD driver unloaded.\x1b[Lc\x1b[Lb\x1b[L-");
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index 9ad93ea42fdc..ab15b64707ad 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -271,7 +271,7 @@ static int hd44780_probe(struct platform_device *pdev)
return 0;
fail:
- kfree(lcd);
+ charlcd_free(lcd);
return ret;
}
@@ -280,6 +280,8 @@ static int hd44780_remove(struct platform_device *pdev)
struct charlcd *lcd = platform_get_drvdata(pdev);
charlcd_unregister(lcd);
+
+ charlcd_free(lcd);
return 0;
}
diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c
index 21b9b2f2470a..e06de63497cf 100644
--- a/drivers/auxdisplay/panel.c
+++ b/drivers/auxdisplay/panel.c
@@ -1620,7 +1620,7 @@ err_lcd_unreg:
if (lcd.enabled)
charlcd_unregister(lcd.charlcd);
err_unreg_device:
- kfree(lcd.charlcd);
+ charlcd_free(lcd.charlcd);
lcd.charlcd = NULL;
parport_unregister_device(pprt);
pprt = NULL;
@@ -1647,7 +1647,7 @@ static void panel_detach(struct parport *port)
if (lcd.enabled) {
charlcd_unregister(lcd.charlcd);
lcd.initialized = false;
- kfree(lcd.charlcd);
+ charlcd_free(lcd.charlcd);
lcd.charlcd = NULL;
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1e6edd568214..bf1c61cab8eb 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -656,7 +656,7 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
return -EBADF;
l = f->f_mapping->host->i_bdev->bd_disk->private_data;
- if (l->lo_state == Lo_unbound) {
+ if (l->lo_state != Lo_bound) {
return -EINVAL;
}
f = l->lo_backing_file;
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 96670eefaeb2..377a694dc228 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -749,8 +749,12 @@ static int pcd_detect(void)
return 0;
printk("%s: No CD-ROM drive found\n", name);
- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+ for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+ blk_cleanup_queue(cd->disk->queue);
+ cd->disk->queue = NULL;
+ blk_mq_free_tag_set(&cd->tag_set);
put_disk(cd->disk);
+ }
pi_unregister_driver(par_drv);
return -1;
}
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index e92e7a8eeeb2..103b617cdc31 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -761,8 +761,12 @@ static int pf_detect(void)
return 0;
printk("%s: No ATAPI disk detected\n", name);
- for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+ for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+ blk_cleanup_queue(pf->disk->queue);
+ pf->disk->queue = NULL;
+ blk_mq_free_tag_set(&pf->tag_set);
put_disk(pf->disk);
+ }
pi_unregister_driver(par_drv);
return -1;
}
@@ -1047,13 +1051,15 @@ static void __exit pf_exit(void)
int unit;
unregister_blkdev(major, name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
- if (!pf->present)
- continue;
- del_gendisk(pf->disk);
+ if (pf->present)
+ del_gendisk(pf->disk);
+
blk_cleanup_queue(pf->disk->queue);
blk_mq_free_tag_set(&pf->tag_set);
put_disk(pf->disk);
- pi_release(pf->pi);
+
+ if (pf->present)
+ pi_release(pf->pi);
}
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4ba967d65cf9..2210c1b9491b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -833,7 +833,7 @@ static int parse_rbd_opts_token(char *c, void *private)
pctx->opts->queue_depth = intval;
break;
case Opt_alloc_size:
- if (intval < 1) {
+ if (intval < SECTOR_SIZE) {
pr_err("alloc_size out of range\n");
return -EINVAL;
}
@@ -924,23 +924,6 @@ static void rbd_put_client(struct rbd_client *rbdc)
kref_put(&rbdc->kref, rbd_client_release);
}
-static int wait_for_latest_osdmap(struct ceph_client *client)
-{
- u64 newest_epoch;
- int ret;
-
- ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
- if (ret)
- return ret;
-
- if (client->osdc.osdmap->epoch >= newest_epoch)
- return 0;
-
- ceph_osdc_maybe_request_map(&client->osdc);
- return ceph_monc_wait_osdmap(&client->monc, newest_epoch,
- client->options->mount_timeout);
-}
-
/*
* Get a ceph client with specific addr and configuration, if one does
* not exist create it. Either way, ceph_opts is consumed by this
@@ -960,7 +943,8 @@ static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
* Using an existing client. Make sure ->pg_pools is up to
* date before we look up the pool id in do_rbd_add().
*/
- ret = wait_for_latest_osdmap(rbdc->client);
+ ret = ceph_wait_for_latest_osdmap(rbdc->client,
+ rbdc->client->options->mount_timeout);
if (ret) {
rbd_warn(NULL, "failed to get latest osdmap: %d", ret);
rbd_put_client(rbdc);
@@ -4203,12 +4187,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
q->limits.max_sectors = queue_max_hw_sectors(q);
blk_queue_max_segments(q, USHRT_MAX);
blk_queue_max_segment_size(q, UINT_MAX);
- blk_queue_io_min(q, objset_bytes);
- blk_queue_io_opt(q, objset_bytes);
+ blk_queue_io_min(q, rbd_dev->opts->alloc_size);
+ blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
if (rbd_dev->opts->trim) {
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
- q->limits.discard_granularity = objset_bytes;
+ q->limits.discard_granularity = rbd_dev->opts->alloc_size;
blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
}
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 3c3cf89f713f..14bac4966c87 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1801,6 +1801,12 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device)
}
hisi_sas_dereg_device(hisi_hba, device);
+ if (dev_is_sata(device)) {
+ rc = hisi_sas_softreset_ata_disk(device);
+ if (rc)
+ return TMF_RESP_FUNC_FAILED;
+ }
+
rc = hisi_sas_debug_I_T_nexus_reset(device);
if ((rc == TMF_RESP_FUNC_COMPLETE) || (rc == -ENODEV))
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 1135e74646e2..8cec5230fe31 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -96,6 +96,7 @@ static int client_reserve = 1;
static char partition_name[96] = "UNKNOWN";
static unsigned int partition_number = -1;
static LIST_HEAD(ibmvscsi_head);
+static DEFINE_SPINLOCK(ibmvscsi_driver_lock);
static struct scsi_transport_template *ibmvscsi_transport_template;
@@ -2270,7 +2271,9 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
}
dev_set_drvdata(&vdev->dev, hostdata);
+ spin_lock(&ibmvscsi_driver_lock);
list_add_tail(&hostdata->host_list, &ibmvscsi_head);
+ spin_unlock(&ibmvscsi_driver_lock);
return 0;
add_srp_port_failed:
@@ -2292,15 +2295,27 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
static int ibmvscsi_remove(struct vio_dev *vdev)
{
struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev);
- list_del(&hostdata->host_list);
- unmap_persist_bufs(hostdata);
+ unsigned long flags;
+
+ srp_remove_host(hostdata->host);
+ scsi_remove_host(hostdata->host);
+
+ purge_requests(hostdata, DID_ERROR);
+
+ spin_lock_irqsave(hostdata->host->host_lock, flags);
release_event_pool(&hostdata->pool, hostdata);
+ spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
ibmvscsi_release_crq_queue(&hostdata->queue, hostdata,
max_events);
kthread_stop(hostdata->work_thread);
- srp_remove_host(hostdata->host);
- scsi_remove_host(hostdata->host);
+ unmap_persist_bufs(hostdata);
+
+ spin_lock(&ibmvscsi_driver_lock);
+ list_del(&hostdata->host_list);
+ spin_unlock(&ibmvscsi_driver_lock);
+
scsi_host_put(hostdata->host);
return 0;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 420045155ba0..0c700b140ce7 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -4991,6 +4991,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
if ((domain & 0xf0) == 0xf0)
continue;
+ /* Bypass if not same domain and area of adapter. */
+ if (area && domain && ((area != vha->d_id.b.area) ||
+ (domain != vha->d_id.b.domain)) &&
+ (ha->current_topology == ISP_CFG_NL))
+ continue;
+
+
/* Bypass invalid local loop ID. */
if (loop_id > LAST_LOCAL_LOOP_ID)
continue;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 677f82fdf56f..91f576d743fe 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1517,7 +1517,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
goto eh_reset_failed;
}
err = 2;
- if (do_reset(fcport, cmd->device->lun, blk_mq_rq_cpu(cmd->request) + 1)
+ if (do_reset(fcport, cmd->device->lun, 1)
!= QLA_SUCCESS) {
ql_log(ql_log_warn, vha, 0x800c,
"do_reset failed for cmd=%p.\n", cmd);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 20189675677a..601b9f1de267 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -585,10 +585,17 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
if (!blk_rq_is_scsi(req)) {
WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
cmd->flags &= ~SCMD_INITIALIZED;
- destroy_rcu_head(&cmd->rcu);
}
/*
+ * Calling rcu_barrier() is not necessary here because the
+ * SCSI error handler guarantees that the function called by
+ * call_rcu() has been called before scsi_end_request() is
+ * called.
+ */
+ destroy_rcu_head(&cmd->rcu);
+
+ /*
* In the MQ case the command gets freed by __blk_mq_end_request,
* so we have to do all cleanup that depends on it earlier.
*
@@ -2541,8 +2548,10 @@ void scsi_device_resume(struct scsi_device *sdev)
* device deleted during suspend)
*/
mutex_lock(&sdev->state_mutex);
- sdev->quiesced_by = NULL;
- blk_clear_pm_only(sdev->request_queue);
+ if (sdev->quiesced_by) {
+ sdev->quiesced_by = NULL;
+ blk_clear_pm_only(sdev->request_queue);
+ }
if (sdev->sdev_state == SDEV_QUIESCE)
scsi_device_set_state(sdev, SDEV_RUNNING);
mutex_unlock(&sdev->state_mutex);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 0508831d6fb9..0a82e93566dc 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -2200,6 +2200,8 @@ void iscsi_remove_session(struct iscsi_cls_session *session)
scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE);
/* flush running scans then delete devices */
flush_work(&session->scan_work);
+ /* flush running unbind operations */
+ flush_work(&session->unbind_work);
__iscsi_unbind_session(&session->unbind_work);
/* hw iscsi may not have removed all connections from session */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e9faa52bb489..78d3257435c0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio)
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- struct bio_vec *bvec;
- int i;
- struct bvec_iter_all iter_all;
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ int i;
- bio_for_each_segment_all(bvec, bio, i, iter_all)
- put_page(bvec->bv_page);
+ bio_for_each_segment_all(bvec, bio, i, iter_all)
+ put_page(bvec->bv_page);
+ }
bio_put(bio);
}
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c88088d92613..6aaa30580a2b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -189,17 +189,28 @@ struct sqe_submit {
bool needs_fixed_file;
};
+/*
+ * First field must be the file pointer in all the
+ * iocb unions! See also 'struct kiocb' in <linux/fs.h>
+ */
struct io_poll_iocb {
struct file *file;
struct wait_queue_head *head;
__poll_t events;
- bool woken;
+ bool done;
bool canceled;
struct wait_queue_entry wait;
};
+/*
+ * NOTE! Each of the iocb union members has the file pointer
+ * as the first entry in their struct definition. So you can
+ * access the file pointer through any of the sub-structs,
+ * or directly as just 'ki_filp' in this struct.
+ */
struct io_kiocb {
union {
+ struct file *file;
struct kiocb rw;
struct io_poll_iocb poll;
};
@@ -214,6 +225,7 @@ struct io_kiocb {
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
+#define REQ_F_PREPPED 16 /* prep already done */
u64 user_data;
u64 error;
@@ -355,20 +367,25 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
}
}
-static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+{
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+ if (waitqueue_active(&ctx->sqo_wait))
+ wake_up(&ctx->sqo_wait);
+}
+
+static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
long res, unsigned ev_flags)
{
unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags);
- io_cqring_fill_event(ctx, ki_user_data, res, ev_flags);
+ io_cqring_fill_event(ctx, user_data, res, ev_flags);
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
- if (waitqueue_active(&ctx->sqo_wait))
- wake_up(&ctx->sqo_wait);
+ io_cqring_ev_posted(ctx);
}
static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
@@ -382,13 +399,14 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
struct io_submit_state *state)
{
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
struct io_kiocb *req;
if (!percpu_ref_tryget(&ctx->refs))
return NULL;
if (!state) {
- req = kmem_cache_alloc(req_cachep, __GFP_NOWARN);
+ req = kmem_cache_alloc(req_cachep, gfp);
if (unlikely(!req))
goto out;
} else if (!state->free_reqs) {
@@ -396,10 +414,18 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
int ret;
sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
- ret = kmem_cache_alloc_bulk(req_cachep, __GFP_NOWARN, sz,
- state->reqs);
- if (unlikely(ret <= 0))
- goto out;
+ ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
+
+ /*
+ * Bulk alloc is all-or-nothing. If we fail to get a batch,
+ * retry single alloc to be on the safe side.
+ */
+ if (unlikely(ret <= 0)) {
+ state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
+ if (!state->reqs[0])
+ goto out;
+ ret = 1;
+ }
state->free_reqs = ret - 1;
state->cur_req = 1;
req = state->reqs[0];
@@ -411,7 +437,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
req->ctx = ctx;
req->flags = 0;
- refcount_set(&req->refs, 0);
+ /* one is dropped after submission, the other at completion */
+ refcount_set(&req->refs, 2);
return req;
out:
io_ring_drop_ctx_refs(ctx, 1);
@@ -429,10 +456,16 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
static void io_free_req(struct io_kiocb *req)
{
- if (!refcount_read(&req->refs) || refcount_dec_and_test(&req->refs)) {
- io_ring_drop_ctx_refs(req->ctx, 1);
- kmem_cache_free(req_cachep, req);
- }
+ if (req->file && !(req->flags & REQ_F_FIXED_FILE))
+ fput(req->file);
+ io_ring_drop_ctx_refs(req->ctx, 1);
+ kmem_cache_free(req_cachep, req);
+}
+
+static void io_put_req(struct io_kiocb *req)
+{
+ if (refcount_dec_and_test(&req->refs))
+ io_free_req(req);
}
/*
@@ -442,44 +475,34 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
struct list_head *done)
{
void *reqs[IO_IOPOLL_BATCH];
- int file_count, to_free;
- struct file *file = NULL;
struct io_kiocb *req;
+ int to_free;
- file_count = to_free = 0;
+ to_free = 0;
while (!list_empty(done)) {
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
io_cqring_fill_event(ctx, req->user_data, req->error, 0);
-
- reqs[to_free++] = req;
(*nr_events)++;
- /*
- * Batched puts of the same file, to avoid dirtying the
- * file usage count multiple times, if avoidable.
- */
- if (!(req->flags & REQ_F_FIXED_FILE)) {
- if (!file) {
- file = req->rw.ki_filp;
- file_count = 1;
- } else if (file == req->rw.ki_filp) {
- file_count++;
+ if (refcount_dec_and_test(&req->refs)) {
+ /* If we're not using fixed files, we have to pair the
+ * completion part with the file put. Use regular
+ * completions for those, only batch free for fixed
+ * file.
+ */
+ if (req->flags & REQ_F_FIXED_FILE) {
+ reqs[to_free++] = req;
+ if (to_free == ARRAY_SIZE(reqs))
+ io_free_req_many(ctx, reqs, &to_free);
} else {
- fput_many(file, file_count);
- file = req->rw.ki_filp;
- file_count = 1;
+ io_free_req(req);
}
}
-
- if (to_free == ARRAY_SIZE(reqs))
- io_free_req_many(ctx, reqs, &to_free);
}
- io_commit_cqring(ctx);
- if (file)
- fput_many(file, file_count);
+ io_commit_cqring(ctx);
io_free_req_many(ctx, reqs, &to_free);
}
@@ -602,21 +625,14 @@ static void kiocb_end_write(struct kiocb *kiocb)
}
}
-static void io_fput(struct io_kiocb *req)
-{
- if (!(req->flags & REQ_F_FIXED_FILE))
- fput(req->rw.ki_filp);
-}
-
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
kiocb_end_write(kiocb);
- io_fput(req);
io_cqring_add_event(req->ctx, req->user_data, res, 0);
- io_free_req(req);
+ io_put_req(req);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -731,31 +747,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
const struct io_uring_sqe *sqe = s->sqe;
struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw;
- unsigned ioprio, flags;
- int fd, ret;
+ unsigned ioprio;
+ int ret;
+ if (!req->file)
+ return -EBADF;
/* For -EAGAIN retry, everything is already prepped */
- if (kiocb->ki_filp)
+ if (req->flags & REQ_F_PREPPED)
return 0;
- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
+ if (force_nonblock && !io_file_supports_async(req->file))
+ force_nonblock = false;
- if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->user_files ||
- (unsigned) fd >= ctx->nr_user_files))
- return -EBADF;
- kiocb->ki_filp = ctx->user_files[fd];
- req->flags |= REQ_F_FIXED_FILE;
- } else {
- if (s->needs_fixed_file)
- return -EBADF;
- kiocb->ki_filp = io_file_get(state, fd);
- if (unlikely(!kiocb->ki_filp))
- return -EBADF;
- if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
- force_nonblock = false;
- }
kiocb->ki_pos = READ_ONCE(sqe->off);
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
@@ -764,7 +767,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
if (ioprio) {
ret = ioprio_check_cap(ioprio);
if (ret)
- goto out_fput;
+ return ret;
kiocb->ki_ioprio = ioprio;
} else
@@ -772,38 +775,26 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
if (unlikely(ret))
- goto out_fput;
+ return ret;
if (force_nonblock) {
kiocb->ki_flags |= IOCB_NOWAIT;
req->flags |= REQ_F_FORCE_NONBLOCK;
}
if (ctx->flags & IORING_SETUP_IOPOLL) {
- ret = -EOPNOTSUPP;
if (!(kiocb->ki_flags & IOCB_DIRECT) ||
!kiocb->ki_filp->f_op->iopoll)
- goto out_fput;
+ return -EOPNOTSUPP;
req->error = 0;
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
} else {
- if (kiocb->ki_flags & IOCB_HIPRI) {
- ret = -EINVAL;
- goto out_fput;
- }
+ if (kiocb->ki_flags & IOCB_HIPRI)
+ return -EINVAL;
kiocb->ki_complete = io_complete_rw;
}
+ req->flags |= REQ_F_PREPPED;
return 0;
-out_fput:
- if (!(flags & IOSQE_FIXED_FILE)) {
- /*
- * in case of error, we didn't use this file reference. drop it.
- */
- if (state)
- state->used_refs--;
- io_file_put(state, kiocb->ki_filp);
- }
- return ret;
}
static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
@@ -864,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
if (offset)
iov_iter_advance(iter, offset);
+
+ /* don't drop a reference to these pages */
+ iter->type |= ITER_BVEC_FLAG_NO_REF;
return 0;
}
@@ -887,7 +881,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
opcode = READ_ONCE(sqe->opcode);
if (opcode == IORING_OP_READ_FIXED ||
opcode == IORING_OP_WRITE_FIXED) {
- ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
+ int ret = io_import_fixed(ctx, rw, sqe, iter);
*iovec = NULL;
return ret;
}
@@ -945,31 +939,29 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
async_list->io_end = io_end;
}
-static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
+ bool force_nonblock, struct io_submit_state *state)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
struct iov_iter iter;
struct file *file;
size_t iov_count;
- ssize_t ret;
+ int ret;
ret = io_prep_rw(req, s, force_nonblock, state);
if (ret)
return ret;
file = kiocb->ki_filp;
- ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
- goto out_fput;
- ret = -EINVAL;
+ return -EBADF;
if (unlikely(!file->f_op->read_iter))
- goto out_fput;
+ return -EINVAL;
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
if (ret)
- goto out_fput;
+ return ret;
iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
@@ -991,38 +983,32 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
}
}
kfree(iovec);
-out_fput:
- /* Hold on to the file for -EAGAIN */
- if (unlikely(ret && ret != -EAGAIN))
- io_fput(req);
return ret;
}
-static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
+ bool force_nonblock, struct io_submit_state *state)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
struct iov_iter iter;
struct file *file;
size_t iov_count;
- ssize_t ret;
+ int ret;
ret = io_prep_rw(req, s, force_nonblock, state);
if (ret)
return ret;
- ret = -EBADF;
file = kiocb->ki_filp;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
- goto out_fput;
- ret = -EINVAL;
+ return -EBADF;
if (unlikely(!file->f_op->write_iter))
- goto out_fput;
+ return -EINVAL;
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
if (ret)
- goto out_fput;
+ return ret;
iov_count = iov_iter_count(&iter);
@@ -1054,10 +1040,6 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
}
out_free:
kfree(iovec);
-out_fput:
- /* Hold on to the file for -EAGAIN */
- if (unlikely(ret && ret != -EAGAIN))
- io_fput(req);
return ret;
}
@@ -1072,29 +1054,19 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- /*
- * Twilight zone - it's possible that someone issued an opcode that
- * has a file attached, then got -EAGAIN on submission, and changed
- * the sqe before we retried it from async context. Avoid dropping
- * a file reference for this malicious case, and flag the error.
- */
- if (req->rw.ki_filp) {
- err = -EBADF;
- io_fput(req);
- }
io_cqring_add_event(ctx, user_data, err, 0);
- io_free_req(req);
+ io_put_req(req);
return 0;
}
static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
- unsigned flags;
- int fd;
- /* Prep already done */
- if (req->rw.ki_filp)
+ if (!req->file)
+ return -EBADF;
+ /* Prep already done (EAGAIN retry) */
+ if (req->flags & REQ_F_PREPPED)
return 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
@@ -1102,20 +1074,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL;
- fd = READ_ONCE(sqe->fd);
- flags = READ_ONCE(sqe->flags);
-
- if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
- return -EBADF;
- req->rw.ki_filp = ctx->user_files[fd];
- req->flags |= REQ_F_FIXED_FILE;
- } else {
- req->rw.ki_filp = fget(fd);
- if (unlikely(!req->rw.ki_filp))
- return -EBADF;
- }
-
+ req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -1144,9 +1103,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
end > 0 ? end : LLONG_MAX,
fsync_flags & IORING_FSYNC_DATASYNC);
- io_fput(req);
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
- io_free_req(req);
+ io_put_req(req);
return 0;
}
@@ -1204,15 +1162,16 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
spin_unlock_irq(&ctx->completion_lock);
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
- io_free_req(req);
+ io_put_req(req);
return 0;
}
-static void io_poll_complete(struct io_kiocb *req, __poll_t mask)
+static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ __poll_t mask)
{
- io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0);
- io_fput(req);
- io_free_req(req);
+ req->poll.done = true;
+ io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0);
+ io_commit_cqring(ctx);
}
static void io_poll_complete_work(struct work_struct *work)
@@ -1240,9 +1199,11 @@ static void io_poll_complete_work(struct work_struct *work)
return;
}
list_del_init(&req->list);
+ io_poll_complete(ctx, req, mask);
spin_unlock_irq(&ctx->completion_lock);
- io_poll_complete(req, mask);
+ io_cqring_ev_posted(ctx);
+ io_put_req(req);
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -1253,29 +1214,25 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
struct io_ring_ctx *ctx = req->ctx;
__poll_t mask = key_to_poll(key);
-
- poll->woken = true;
+ unsigned long flags;
/* for instances that support it check for an event match first: */
- if (mask) {
- unsigned long flags;
+ if (mask && !(mask & poll->events))
+ return 0;
- if (!(mask & poll->events))
- return 0;
+ list_del_init(&poll->wait.entry);
- /* try to complete the iocb inline if we can: */
- if (spin_trylock_irqsave(&ctx->completion_lock, flags)) {
- list_del(&req->list);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
+ list_del(&req->list);
+ io_poll_complete(ctx, req, mask);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
- list_del_init(&poll->wait.entry);
- io_poll_complete(req, mask);
- return 1;
- }
+ io_cqring_ev_posted(ctx);
+ io_put_req(req);
+ } else {
+ queue_work(ctx->sqo_wq, &req->work);
}
- list_del_init(&poll->wait.entry);
- queue_work(ctx->sqo_wq, &req->work);
return 1;
}
@@ -1305,36 +1262,23 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_poll_iocb *poll = &req->poll;
struct io_ring_ctx *ctx = req->ctx;
struct io_poll_table ipt;
- unsigned flags;
+ bool cancel = false;
__poll_t mask;
u16 events;
- int fd;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
return -EINVAL;
+ if (!poll->file)
+ return -EBADF;
INIT_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
- flags = READ_ONCE(sqe->flags);
- fd = READ_ONCE(sqe->fd);
-
- if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
- return -EBADF;
- poll->file = ctx->user_files[fd];
- req->flags |= REQ_F_FIXED_FILE;
- } else {
- poll->file = fget(fd);
- }
- if (unlikely(!poll->file))
- return -EBADF;
-
poll->head = NULL;
- poll->woken = false;
+ poll->done = false;
poll->canceled = false;
ipt.pt._qproc = io_poll_queue_proc;
@@ -1346,56 +1290,44 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
INIT_LIST_HEAD(&poll->wait.entry);
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
- /* one for removal from waitqueue, one for this function */
- refcount_set(&req->refs, 2);
-
mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
- if (unlikely(!poll->head)) {
- /* we did not manage to set up a waitqueue, done */
- goto out;
- }
spin_lock_irq(&ctx->completion_lock);
- spin_lock(&poll->head->lock);
- if (poll->woken) {
- /* wake_up context handles the rest */
- mask = 0;
+ if (likely(poll->head)) {
+ spin_lock(&poll->head->lock);
+ if (unlikely(list_empty(&poll->wait.entry))) {
+ if (ipt.error)
+ cancel = true;
+ ipt.error = 0;
+ mask = 0;
+ }
+ if (mask || ipt.error)
+ list_del_init(&poll->wait.entry);
+ else if (cancel)
+ WRITE_ONCE(poll->canceled, true);
+ else if (!poll->done) /* actually waiting for an event */
+ list_add_tail(&req->list, &ctx->cancel_list);
+ spin_unlock(&poll->head->lock);
+ }
+ if (mask) { /* no async, we'd stolen it */
+ req->error = mangle_poll(mask);
ipt.error = 0;
- } else if (mask || ipt.error) {
- /* if we get an error or a mask we are done */
- WARN_ON_ONCE(list_empty(&poll->wait.entry));
- list_del_init(&poll->wait.entry);
- } else {
- /* actually waiting for an event */
- list_add_tail(&req->list, &ctx->cancel_list);
+ io_poll_complete(ctx, req, mask);
}
- spin_unlock(&poll->head->lock);
spin_unlock_irq(&ctx->completion_lock);
-out:
- if (unlikely(ipt.error)) {
- if (!(flags & IOSQE_FIXED_FILE))
- fput(poll->file);
- /*
- * Drop one of our refs to this req, __io_submit_sqe() will
- * drop the other one since we're returning an error.
- */
- io_free_req(req);
- return ipt.error;
+ if (mask) {
+ io_cqring_ev_posted(ctx);
+ io_put_req(req);
}
-
- if (mask)
- io_poll_complete(req, mask);
- io_free_req(req);
- return 0;
+ return ipt.error;
}
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct sqe_submit *s, bool force_nonblock,
struct io_submit_state *state)
{
- ssize_t ret;
- int opcode;
+ int ret, opcode;
if (unlikely(s->index >= ctx->sq_entries))
return -EINVAL;
@@ -1524,10 +1456,13 @@ restart:
break;
cond_resched();
} while (1);
+
+ /* drop submission reference */
+ io_put_req(req);
}
if (ret) {
io_cqring_add_event(ctx, sqe->user_data, ret, 0);
- io_free_req(req);
+ io_put_req(req);
}
/* async context always use a copy of the sqe */
@@ -1614,11 +1549,55 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
return ret;
}
+static bool io_op_needs_file(const struct io_uring_sqe *sqe)
+{
+ int op = READ_ONCE(sqe->opcode);
+
+ switch (op) {
+ case IORING_OP_NOP:
+ case IORING_OP_POLL_REMOVE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
+ struct io_submit_state *state, struct io_kiocb *req)
+{
+ unsigned flags;
+ int fd;
+
+ flags = READ_ONCE(s->sqe->flags);
+ fd = READ_ONCE(s->sqe->fd);
+
+ if (!io_op_needs_file(s->sqe)) {
+ req->file = NULL;
+ return 0;
+ }
+
+ if (flags & IOSQE_FIXED_FILE) {
+ if (unlikely(!ctx->user_files ||
+ (unsigned) fd >= ctx->nr_user_files))
+ return -EBADF;
+ req->file = ctx->user_files[fd];
+ req->flags |= REQ_F_FIXED_FILE;
+ } else {
+ if (s->needs_fixed_file)
+ return -EBADF;
+ req->file = io_file_get(state, fd);
+ if (unlikely(!req->file))
+ return -EBADF;
+ }
+
+ return 0;
+}
+
static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
struct io_submit_state *state)
{
struct io_kiocb *req;
- ssize_t ret;
+ int ret;
/* enforce forwards compatibility on users */
if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE))
@@ -1628,7 +1607,9 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(!req))
return -EAGAIN;
- req->rw.ki_filp = NULL;
+ ret = io_req_set_file(ctx, s, state, req);
+ if (unlikely(ret))
+ goto out;
ret = __io_submit_sqe(ctx, req, s, true, state);
if (ret == -EAGAIN) {
@@ -1649,11 +1630,23 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
INIT_WORK(&req->work, io_sq_wq_submit_work);
queue_work(ctx->sqo_wq, &req->work);
}
- ret = 0;
+
+ /*
+ * Queued up for async execution, worker will release
+ * submit reference when the iocb is actually
+ * submitted.
+ */
+ return 0;
}
}
+
+out:
+ /* drop submission reference */
+ io_put_req(req);
+
+ /* and drop final reference, if we failed */
if (ret)
- io_free_req(req);
+ io_put_req(req);
return ret;
}
diff --git a/fs/iomap.c b/fs/iomap.c
index 97cb9d486a7d..abdd18e404f8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- struct bio_vec *bvec;
- int i;
- struct bvec_iter_all iter_all;
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ int i;
- bio_for_each_segment_all(bvec, bio, i, iter_all)
- put_page(bvec->bv_page);
+ bio_for_each_segment_all(bvec, bio, i, iter_all)
+ put_page(bvec->bv_page);
+ }
bio_put(bio);
}
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b0c814bcc7e3..cb2aa7ecafff 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -57,7 +57,6 @@ struct blk_mq_hw_ctx {
unsigned int queue_num;
atomic_t nr_active;
- unsigned int nr_expired;
struct hlist_node cpuhp_dead;
struct kobject kobj;
@@ -300,8 +299,6 @@ void blk_mq_end_request(struct request *rq, blk_status_t error);
void __blk_mq_end_request(struct request *rq, blk_status_t error);
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
- bool kick_requeue_list);
void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
bool blk_mq_complete_request(struct request *rq);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d66bf5f32610..791fee35df88 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -215,6 +215,7 @@ struct bio {
/*
* bio flags
*/
+#define BIO_NO_PAGE_REF 0 /* don't put release vec pages */
#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
#define BIO_CLONED 2 /* doesn't own data */
#define BIO_BOUNCED 3 /* bio is a bounce bio */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0de92b29f589..5c58a3b2bf00 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -50,6 +50,9 @@ struct blk_stat_callback;
/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
+/* Doing classic polling */
+#define BLK_MQ_POLL_CLASSIC -1
+
/*
* Maximum number of blkcg policies allowed to be registered concurrently.
* Defined here to simplify include dependency.
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index a420c07904bc..337d5049ff93 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -294,6 +294,8 @@ extern void ceph_destroy_client(struct ceph_client *client);
extern int __ceph_open_session(struct ceph_client *client,
unsigned long started);
extern int ceph_open_session(struct ceph_client *client);
+int ceph_wait_for_latest_osdmap(struct ceph_client *client,
+ unsigned long timeout);
/* pagevec.c */
extern void ceph_release_page_vector(struct page **pages, int num_pages);
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 14d558146aea..20f3e3f029b9 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -330,7 +330,7 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
/*
* This one is special, since it doesn't actually clear the bit, rather it
* sets the corresponding bit in the ->cleared mask instead. Paired with
- * the caller doing sbitmap_batch_clear() if a given index is full, which
+ * the caller doing sbitmap_deferred_clear() if a given index is full, which
* will clear the previously freed entries in the corresponding ->word.
*/
static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 87477e1640f9..f184af1999a8 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -23,14 +23,23 @@ struct kvec {
};
enum iter_type {
- ITER_IOVEC = 0,
- ITER_KVEC = 2,
- ITER_BVEC = 4,
- ITER_PIPE = 8,
- ITER_DISCARD = 16,
+ /* set if ITER_BVEC doesn't hold a bv_page ref */
+ ITER_BVEC_FLAG_NO_REF = 2,
+
+ /* iter types */
+ ITER_IOVEC = 4,
+ ITER_KVEC = 8,
+ ITER_BVEC = 16,
+ ITER_PIPE = 32,
+ ITER_DISCARD = 64,
};
struct iov_iter {
+ /*
+ * Bit 0 is the read/write bit, set if we're writing.
+ * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
+ * the caller isn't expecting to drop a page reference when done.
+ */
unsigned int type;
size_t iov_offset;
size_t count;
@@ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
return i->type & (READ | WRITE);
}
+static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
+{
+ return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
+}
+
/*
* Total number of bytes covered by an iovec.
*
diff --git a/include/misc/charlcd.h b/include/misc/charlcd.h
index 23f61850f363..1832402324ce 100644
--- a/include/misc/charlcd.h
+++ b/include/misc/charlcd.h
@@ -35,6 +35,7 @@ struct charlcd_ops {
};
struct charlcd *charlcd_alloc(unsigned int drvdata_size);
+void charlcd_free(struct charlcd *lcd);
int charlcd_register(struct charlcd *lcd);
int charlcd_unregister(struct charlcd *lcd);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 9cab80207ced..79eac465ec65 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -738,7 +738,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
}
EXPORT_SYMBOL(__ceph_open_session);
-
int ceph_open_session(struct ceph_client *client)
{
int ret;
@@ -754,6 +753,23 @@ int ceph_open_session(struct ceph_client *client)
}
EXPORT_SYMBOL(ceph_open_session);
+int ceph_wait_for_latest_osdmap(struct ceph_client *client,
+ unsigned long timeout)
+{
+ u64 newest_epoch;
+ int ret;
+
+ ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+ if (ret)
+ return ret;
+
+ if (client->osdc.osdmap->epoch >= newest_epoch)
+ return 0;
+
+ ceph_osdc_maybe_request_map(&client->osdc);
+ return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout);
+}
+EXPORT_SYMBOL(ceph_wait_for_latest_osdmap);
static int __init init_ceph_lib(void)
{
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 18deb3d889c4..a53e4fbb6319 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
mutex_unlock(&monc->mutex);
ret = wait_generic_request(req);
+ if (!ret)
+ /*
+ * Make sure we have the osdmap that includes the blacklist
+ * entry. This is needed to ensure that the OSDs pick up the
+ * new blacklist before processing any future requests from
+ * this client.
+ */
+ ret = ceph_wait_for_latest_osdmap(monc->client, 0);
+
out:
put_generic_request(req);
return ret;