diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-08-15 08:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-08-15 08:20:36 -0700 |
commit | ee94b00c1a648530333d9734200be7a45e6e00cd (patch) | |
tree | a2e6d0e199673f48ddf62a9f7dadab90f829ba22 /drivers | |
parent | 4ad976b0e8ea3247c607cd37abb09440806f898d (diff) | |
parent | 8f5845e0743bf3512b71b3cb8afe06c192d6acc4 (diff) |
Pull block fixes from Jens Axboe:
- Fix for unprivileged daemons in ublk
- Speedup ublk release by removing unnecessary quiesce
- Fix for blk-wbt, where a regression caused it to not be possible to
enable at runtime
- blk-wbt cleanups
- Kill the page pool from drbd
- Remove redundant __GFP_NOWARN uses in a few spots
- Fix for a kobject double initialization issues
* tag 'block-6.17-20250815' of git://git.kernel.dk/linux:
block: restore default wbt enablement
Docs: admin-guide: Correct spelling mistake
blk-wbt: doc: Update the doc of the wbt_lat_usec interface
blk-wbt: Eliminate ambiguity in the comments of struct rq_wb
blk-wbt: Optimize wbt_done() for non-throttled writes
block: fix kobject double initialization in add_disk
blk-cgroup: remove redundant __GFP_NOWARN
block, bfq: remove redundant __GFP_NOWARN
ublk: check for unprivileged daemon on each I/O fetch
ublk: don't quiesce in ublk_ch_release
drbd: Remove the open-coded page pool
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 59 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 264 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 56 | ||||
-rw-r--r-- | drivers/block/ublk_drv.c | 28 |
5 files changed, 83 insertions, 363 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e21492981f7d..f6d6276974ee 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -380,6 +380,9 @@ enum { /* this is/was a write request */ __EE_WRITE, + /* hand back using mempool_free(e, drbd_buffer_page_pool) */ + __EE_RELEASE_TO_MEMPOOL, + /* this is/was a write same request */ __EE_WRITE_SAME, @@ -402,6 +405,7 @@ enum { #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) #define EE_SUBMITTED (1<<__EE_SUBMITTED) #define EE_WRITE (1<<__EE_WRITE) +#define EE_RELEASE_TO_MEMPOOL (1<<__EE_RELEASE_TO_MEMPOOL) #define EE_WRITE_SAME (1<<__EE_WRITE_SAME) #define EE_APPLICATION (1<<__EE_APPLICATION) #define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ) @@ -858,7 +862,6 @@ struct drbd_device { struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ struct list_head done_ee; /* need to send P_WRITE_ACK */ struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */ - struct list_head net_ee; /* zero-copy network send in progress */ struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ @@ -1329,24 +1332,6 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t drbd_request_mempool; extern mempool_t drbd_ee_mempool; -/* drbd's page pool, used to buffer data received from the peer, - * or data requested by the peer. - * - * This does not have an emergency reserve. - * - * When allocating from this pool, it first takes pages from the pool. - * Only if the pool is depleted will try to allocate from the system. - * - * The assumption is that pages taken from this pool will be processed, - * and given back, "quickly", and then can be recycled, so we can avoid - * frequent calls to alloc_page(), and still will be able to make progress even - * under memory pressure. - */ -extern struct page *drbd_pp_pool; -extern spinlock_t drbd_pp_lock; -extern int drbd_pp_vacant; -extern wait_queue_head_t drbd_pp_wait; - /* We also need a standard (emergency-reserve backed) page pool * for meta data IO (activity log, bitmap). * We can keep it global, as long as it is used as "N pages at a time". @@ -1354,6 +1339,7 @@ extern wait_queue_head_t drbd_pp_wait; */ #define DRBD_MIN_POOL_PAGES 128 extern mempool_t drbd_md_io_page_pool; +extern mempool_t drbd_buffer_page_pool; /* We also need to make sure we get a bio * when we need it for housekeeping purposes */ @@ -1488,10 +1474,7 @@ extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, sector_t, unsigned int, unsigned int, gfp_t) __must_hold(local); -extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, - int); -#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) -#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) +extern void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *req); extern struct page *drbd_alloc_pages(struct drbd_peer_device *, unsigned int, bool); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); @@ -1610,16 +1593,6 @@ static inline struct page *page_chain_next(struct page *page) for (; page && ({ n = page_chain_next(page); 1; }); page = n) -static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req) -{ - struct page *page = peer_req->pages; - page_chain_for_each(page) { - if (page_count(page) > 1) - return 1; - } - return 0; -} - static inline union drbd_state drbd_read_state(struct drbd_device *device) { struct drbd_resource *resource = device->resource; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 52724b79be30..c73376886e7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -114,20 +114,10 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t drbd_request_mempool; mempool_t drbd_ee_mempool; mempool_t drbd_md_io_page_pool; +mempool_t drbd_buffer_page_pool; struct bio_set drbd_md_io_bio_set; struct bio_set drbd_io_bio_set; -/* I do not use a standard mempool, because: - 1) I want to hand out the pre-allocated objects first. - 2) I want to be able to interrupt sleeping allocation with a signal. - Note: This is a single linked list, the next pointer is the private - member of struct page. - */ -struct page *drbd_pp_pool; -DEFINE_SPINLOCK(drbd_pp_lock); -int drbd_pp_vacant; -wait_queue_head_t drbd_pp_wait; - DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); static const struct block_device_operations drbd_ops = { @@ -1611,6 +1601,7 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, struct drbd_peer_request *peer_req) { + bool use_sendpage = !(peer_req->flags & EE_RELEASE_TO_MEMPOOL); struct page *page = peer_req->pages; unsigned len = peer_req->i.size; int err; @@ -1619,8 +1610,13 @@ static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - err = _drbd_send_page(peer_device, page, 0, l, - page_chain_next(page) ? MSG_MORE : 0); + if (likely(use_sendpage)) + err = _drbd_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + else + err = _drbd_no_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + if (err) return err; len -= l; @@ -1962,7 +1958,6 @@ void drbd_init_set_defaults(struct drbd_device *device) INIT_LIST_HEAD(&device->sync_ee); INIT_LIST_HEAD(&device->done_ee); INIT_LIST_HEAD(&device->read_ee); - INIT_LIST_HEAD(&device->net_ee); INIT_LIST_HEAD(&device->resync_reads); INIT_LIST_HEAD(&device->resync_work.list); INIT_LIST_HEAD(&device->unplug_work.list); @@ -2043,7 +2038,6 @@ void drbd_device_cleanup(struct drbd_device *device) D_ASSERT(device, list_empty(&device->sync_ee)); D_ASSERT(device, list_empty(&device->done_ee)); D_ASSERT(device, list_empty(&device->read_ee)); - D_ASSERT(device, list_empty(&device->net_ee)); D_ASSERT(device, list_empty(&device->resync_reads)); D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); D_ASSERT(device, list_empty(&device->resync_work.list)); @@ -2055,19 +2049,11 @@ void drbd_device_cleanup(struct drbd_device *device) static void drbd_destroy_mempools(void) { - struct page *page; - - while (drbd_pp_pool) { - page = drbd_pp_pool; - drbd_pp_pool = (struct page *)page_private(page); - __free_page(page); - drbd_pp_vacant--; - } - /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ bioset_exit(&drbd_io_bio_set); bioset_exit(&drbd_md_io_bio_set); + mempool_exit(&drbd_buffer_page_pool); mempool_exit(&drbd_md_io_page_pool); mempool_exit(&drbd_ee_mempool); mempool_exit(&drbd_request_mempool); @@ -2086,9 +2072,8 @@ static void drbd_destroy_mempools(void) static int drbd_create_mempools(void) { - struct page *page; const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count; - int i, ret; + int ret; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2125,6 +2110,10 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; + ret = mempool_init_page_pool(&drbd_buffer_page_pool, number, 0); + if (ret) + goto Enomem; + ret = mempool_init_slab_pool(&drbd_request_mempool, number, drbd_request_cache); if (ret) @@ -2134,15 +2123,6 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; - for (i = 0; i < number; i++) { - page = alloc_page(GFP_HIGHUSER); - if (!page) - goto Enomem; - set_page_private(page, (unsigned long)drbd_pp_pool); - drbd_pp_pool = page; - } - drbd_pp_vacant = number; - return 0; Enomem: @@ -2169,10 +2149,6 @@ static void drbd_release_all_peer_reqs(struct drbd_device *device) rr = drbd_free_peer_reqs(device, &device->done_ee); if (rr) drbd_err(device, "%d EEs in done list found!\n", rr); - - rr = drbd_free_peer_reqs(device, &device->net_ee); - if (rr) - drbd_err(device, "%d EEs in net list found!\n", rr); } /* caution. no locking. */ @@ -2863,11 +2839,6 @@ static int __init drbd_init(void) return err; } - /* - * allocate all necessary structs - */ - init_waitqueue_head(&drbd_pp_wait); - drbd_proc = NULL; /* play safe for drbd_cleanup */ idr_init(&drbd_devices); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 975024cf03c5..caaf2781136d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -33,6 +33,7 @@ #include <linux/string.h> #include <linux/scatterlist.h> #include <linux/part_stat.h> +#include <linux/mempool.h> #include "drbd_int.h" #include "drbd_protocol.h" #include "drbd_req.h" @@ -63,182 +64,31 @@ static int e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) -/* - * some helper functions to deal with single linked page lists, - * page->private being our "next" pointer. - */ - -/* If at least n pages are linked at head, get n pages off. - * Otherwise, don't modify head, and return NULL. - * Locking is the responsibility of the caller. - */ -static struct page *page_chain_del(struct page **head, int n) -{ - struct page *page; - struct page *tmp; - - BUG_ON(!n); - BUG_ON(!head); - - page = *head; - - if (!page) - return NULL; - - while (page) { - tmp = page_chain_next(page); - if (--n == 0) - break; /* found sufficient pages */ - if (tmp == NULL) - /* insufficient pages, don't use any of them. */ - return NULL; - page = tmp; - } - - /* add end of list marker for the returned list */ - set_page_private(page, 0); - /* actual return value, and adjustment of head */ - page = *head; - *head = tmp; - return page; -} - -/* may be used outside of locks to find the tail of a (usually short) - * "private" page chain, before adding it back to a global chain head - * with page_chain_add() under a spinlock. */ -static struct page *page_chain_tail(struct page *page, int *len) -{ - struct page *tmp; - int i = 1; - while ((tmp = page_chain_next(page))) { - ++i; - page = tmp; - } - if (len) - *len = i; - return page; -} - -static int page_chain_free(struct page *page) -{ - struct page *tmp; - int i = 0; - page_chain_for_each_safe(page, tmp) { - put_page(page); - ++i; - } - return i; -} - -static void page_chain_add(struct page **head, - struct page *chain_first, struct page *chain_last) -{ -#if 1 - struct page *tmp; - tmp = page_chain_tail(chain_first, NULL); - BUG_ON(tmp != chain_last); -#endif - - /* add chain to head */ - set_page_private(chain_last, (unsigned long)*head); - *head = chain_first; -} - -static struct page *__drbd_alloc_pages(struct drbd_device *device, - unsigned int number) +static struct page *__drbd_alloc_pages(unsigned int number) { struct page *page = NULL; struct page *tmp = NULL; unsigned int i = 0; - /* Yes, testing drbd_pp_vacant outside the lock is racy. - * So what. It saves a spin_lock. */ - if (drbd_pp_vacant >= number) { - spin_lock(&drbd_pp_lock); - page = page_chain_del(&drbd_pp_pool, number); - if (page) - drbd_pp_vacant -= number; - spin_unlock(&drbd_pp_lock); - if (page) - return page; - } - /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ for (i = 0; i < number; i++) { - tmp = alloc_page(GFP_TRY); + tmp = mempool_alloc(&drbd_buffer_page_pool, GFP_TRY); if (!tmp) - break; + goto fail; set_page_private(tmp, (unsigned long)page); page = tmp; } - - if (i == number) - return page; - - /* Not enough pages immediately available this time. - * No need to jump around here, drbd_alloc_pages will retry this - * function "soon". */ - if (page) { - tmp = page_chain_tail(page, NULL); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); + return page; +fail: + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + mempool_free(page, &drbd_buffer_page_pool); } return NULL; } -static void reclaim_finished_net_peer_reqs(struct drbd_device *device, - struct list_head *to_be_freed) -{ - struct drbd_peer_request *peer_req, *tmp; - - /* The EEs are always appended to the end of the list. Since - they are sent in order over the wire, they have to finish - in order. As soon as we see the first not finished we can - stop to examine the list... */ - - list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { - if (drbd_peer_req_has_active_page(peer_req)) - break; - list_move(&peer_req->w.list, to_be_freed); - } -} - -static void drbd_reclaim_net_peer_reqs(struct drbd_device *device) -{ - LIST_HEAD(reclaimed); - struct drbd_peer_request *peer_req, *t; - - spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); - spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); -} - -static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) -{ - struct drbd_peer_device *peer_device; - int vnr; - - rcu_read_lock(); - idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { - struct drbd_device *device = peer_device->device; - if (!atomic_read(&device->pp_in_use_by_net)) - continue; - - kref_get(&device->kref); - rcu_read_unlock(); - drbd_reclaim_net_peer_reqs(device); - kref_put(&device->kref, drbd_destroy_device); - rcu_read_lock(); - } - rcu_read_unlock(); -} - /** * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) * @peer_device: DRBD device. @@ -263,9 +113,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int bool retry) { struct drbd_device *device = peer_device->device; - struct page *page = NULL; + struct page *page; struct net_conf *nc; - DEFINE_WAIT(wait); unsigned int mxb; rcu_read_lock(); @@ -273,37 +122,9 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int mxb = nc ? nc->max_buffers : 1000000; rcu_read_unlock(); - if (atomic_read(&device->pp_in_use) < mxb) - page = __drbd_alloc_pages(device, number); - - /* Try to keep the fast path fast, but occasionally we need - * to reclaim the pages we lended to the network stack. */ - if (page && atomic_read(&device->pp_in_use_by_net) > 512) - drbd_reclaim_net_peer_reqs(device); - - while (page == NULL) { - prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); - - drbd_reclaim_net_peer_reqs(device); - - if (atomic_read(&device->pp_in_use) < mxb) { - page = __drbd_alloc_pages(device, number); - if (page) - break; - } - - if (!retry) - break; - - if (signal_pending(current)) { - drbd_warn(device, "drbd_alloc_pages interrupted!\n"); - break; - } - - if (schedule_timeout(HZ/10) == 0) - mxb = UINT_MAX; - } - finish_wait(&drbd_pp_wait, &wait); + if (atomic_read(&device->pp_in_use) >= mxb) + schedule_timeout_interruptible(HZ / 10); + page = __drbd_alloc_pages(number); if (page) atomic_add(number, &device->pp_in_use); @@ -314,29 +135,25 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int * Is also used from inside an other spin_lock_irq(&resource->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ -static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) +static void drbd_free_pages(struct drbd_device *device, struct page *page) { - atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; - int i; + struct page *tmp; + int i = 0; if (page == NULL) return; - if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) - i = page_chain_free(page); - else { - struct page *tmp; - tmp = page_chain_tail(page, &i); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); - } - i = atomic_sub_return(i, a); + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + if (page_count(page) == 1) + mempool_free(page, &drbd_buffer_page_pool); + else + put_page(page); + i++; + } + i = atomic_sub_return(i, &device->pp_in_use); if (i < 0) - drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", - is_net ? "pp_in_use_by_net" : "pp_in_use", i); - wake_up(&drbd_pp_wait); + drbd_warn(device, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); } /* @@ -380,6 +197,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto gfpflags_allow_blocking(gfp_mask)); if (!page) goto fail; + if (!mempool_is_saturated(&drbd_buffer_page_pool)) + peer_req->flags |= EE_RELEASE_TO_MEMPOOL; } memset(peer_req, 0, sizeof(*peer_req)); @@ -403,13 +222,12 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto return NULL; } -void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, - int is_net) +void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req) { might_sleep(); if (peer_req->flags & EE_HAS_DIGEST) kfree(peer_req->digest); - drbd_free_pages(device, peer_req->pages, is_net); + drbd_free_pages(device, peer_req->pages); D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { @@ -424,14 +242,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) LIST_HEAD(work_list); struct drbd_peer_request *peer_req, *t; int count = 0; - int is_net = list == &device->net_ee; spin_lock_irq(&device->resource->req_lock); list_splice_init(list, &work_list); spin_unlock_irq(&device->resource->req_lock); list_for_each_entry_safe(peer_req, t, &work_list, w.list) { - __drbd_free_peer_req(device, peer_req, is_net); + drbd_free_peer_req(device, peer_req); count++; } return count; @@ -443,18 +260,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) static int drbd_finish_peer_reqs(struct drbd_device *device) { LIST_HEAD(work_list); - LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; int err = 0; spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); list_splice_init(&device->done_ee, &work_list); spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); - /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_superseded. * all ignore the last argument. @@ -1975,7 +1787,7 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) data_size -= len; } kunmap(page); - drbd_free_pages(peer_device->device, page, 0); + drbd_free_pages(peer_device->device, page); return err; } @@ -5224,16 +5036,6 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) put_ldev(device); } - /* tcp_close and release of sendpage pages can be deferred. I don't - * want to use SO_LINGER, because apparently it can be deferred for - * more than 20 seconds (longest time I checked). - * - * Actually we don't care for exactly when the network stack does its - * put_page(), but release our reference on these pages right here. - */ - i = drbd_free_peer_reqs(device, &device->net_ee); - if (i) - drbd_info(device, "net_ee not empty, killed %u entries\n", i); i = atomic_read(&device->pp_in_use_by_net); if (i) drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); @@ -5980,8 +5782,6 @@ int drbd_ack_receiver(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - conn_reclaim_net_peer_reqs(connection); - if (test_and_clear_bit(SEND_PING, &connection->flags)) { if (drbd_send_ping(connection)) { drbd_err(connection, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a6ea737b3b71..dea3e79d044f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1030,22 +1030,6 @@ out: return 1; } -/* helper */ -static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) -{ - if (drbd_peer_req_has_active_page(peer_req)) { - /* This might happen if sendpage() has not finished */ - int i = PFN_UP(peer_req->i.size); - atomic_add(i, &device->pp_in_use_by_net); - atomic_sub(i, &device->pp_in_use); - spin_lock_irq(&device->resource->req_lock); - list_add_tail(&peer_req->w.list, &device->net_ee); - spin_unlock_irq(&device->resource->req_lock); - wake_up(&drbd_pp_wait); - } else - drbd_free_peer_req(device, peer_req); -} - /** * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST * @w: work object. @@ -1059,9 +1043,8 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { @@ -1074,12 +1057,12 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); } - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1120,9 +1103,8 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev_if_state(device, D_FAILED)) { @@ -1155,13 +1137,12 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) /* update resync data with failure */ drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); } - - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1176,9 +1157,8 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) int err, eq = 0; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev(device)) { @@ -1220,12 +1200,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) if (drbd_ratelimit()) drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); } - - dec_unacked(device); - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block/ack() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6561d2a561fa..99abd67b708b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -235,7 +235,7 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; - unsigned int nr_privileged_daemon; + bool unprivileged_daemons; struct mutex cancel_mutex; bool canceling; pid_t ublksrv_tgid; @@ -1389,7 +1389,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, { blk_status_t res; - if (unlikely(ubq->fail_io)) + if (unlikely(READ_ONCE(ubq->fail_io))) return BLK_STS_TARGET; /* With recovery feature enabled, force_abort is set in @@ -1401,7 +1401,8 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, * Note: force_abort is guaranteed to be seen because it is set * before request queue is unqiuesced. */ - if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) + if (ublk_nosrv_should_queue_io(ubq) && + unlikely(READ_ONCE(ubq->force_abort))) return BLK_STS_IOERR; if (check_cancel && unlikely(ubq->canceling)) @@ -1550,7 +1551,7 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; ub->nr_queues_ready = 0; - ub->nr_privileged_daemon = 0; + ub->unprivileged_daemons = false; ub->ublksrv_tgid = -1; } @@ -1644,7 +1645,6 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * Transition the device to the nosrv state. What exactly this * means depends on the recovery flags */ - blk_mq_quiesce_queue(disk->queue); if (ublk_nosrv_should_stop_dev(ub)) { /* * Allow any pending/future I/O to pass through quickly @@ -1652,8 +1652,7 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * waits for all pending I/O to complete */ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->force_abort = true; - blk_mq_unquiesce_queue(disk->queue); + WRITE_ONCE(ublk_get_queue(ub, i)->force_abort, true); ublk_stop_dev_unlocked(ub); } else { @@ -1663,9 +1662,8 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) } else { ub->dev_info.state = UBLK_S_DEV_FAIL_IO; for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->fail_io = true; + WRITE_ONCE(ublk_get_queue(ub, i)->fail_io, true); } - blk_mq_unquiesce_queue(disk->queue); } unlock: mutex_unlock(&ub->mutex); @@ -1980,12 +1978,10 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) __must_hold(&ub->mutex) { ubq->nr_io_ready++; - if (ublk_queue_ready(ubq)) { + if (ublk_queue_ready(ubq)) ub->nr_queues_ready++; - - if (capable(CAP_SYS_ADMIN)) - ub->nr_privileged_daemon++; - } + if (!ub->unprivileged_daemons && !capable(CAP_SYS_ADMIN)) + ub->unprivileged_daemons = true; if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { /* now we are ready for handling ublk io request */ @@ -2880,8 +2876,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, ublk_apply_params(ub); - /* don't probe partitions if any one ubq daemon is un-trusted */ - if (ub->nr_privileged_daemon != ub->nr_queues_ready) + /* don't probe partitions if any daemon task is un-trusted */ + if (ub->unprivileged_daemons) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); ublk_get_device(ub); |