summaryrefslogtreecommitdiff
path: root/drivers/block/drbd/drbd_receiver.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r--drivers/block/drbd/drbd_receiver.c604
1 files changed, 184 insertions, 420 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1f740e42e457..3de919b6f0e1 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-2.0-only
/*
drbd_receiver.c
@@ -27,13 +27,13 @@
#include <uapi/linux/sched/types.h>
#include <linux/sched/signal.h>
#include <linux/pkt_sched.h>
-#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
#include <linux/vmalloc.h>
#include <linux/random.h>
#include <linux/string.h>
#include <linux/scatterlist.h>
#include <linux/part_stat.h>
+#include <linux/mempool.h>
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
@@ -64,182 +64,31 @@ static int e_end_block(struct drbd_work *, int);
#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
-/*
- * some helper functions to deal with single linked page lists,
- * page->private being our "next" pointer.
- */
-
-/* If at least n pages are linked at head, get n pages off.
- * Otherwise, don't modify head, and return NULL.
- * Locking is the responsibility of the caller.
- */
-static struct page *page_chain_del(struct page **head, int n)
-{
- struct page *page;
- struct page *tmp;
-
- BUG_ON(!n);
- BUG_ON(!head);
-
- page = *head;
-
- if (!page)
- return NULL;
-
- while (page) {
- tmp = page_chain_next(page);
- if (--n == 0)
- break; /* found sufficient pages */
- if (tmp == NULL)
- /* insufficient pages, don't use any of them. */
- return NULL;
- page = tmp;
- }
-
- /* add end of list marker for the returned list */
- set_page_private(page, 0);
- /* actual return value, and adjustment of head */
- page = *head;
- *head = tmp;
- return page;
-}
-
-/* may be used outside of locks to find the tail of a (usually short)
- * "private" page chain, before adding it back to a global chain head
- * with page_chain_add() under a spinlock. */
-static struct page *page_chain_tail(struct page *page, int *len)
-{
- struct page *tmp;
- int i = 1;
- while ((tmp = page_chain_next(page))) {
- ++i;
- page = tmp;
- }
- if (len)
- *len = i;
- return page;
-}
-
-static int page_chain_free(struct page *page)
-{
- struct page *tmp;
- int i = 0;
- page_chain_for_each_safe(page, tmp) {
- put_page(page);
- ++i;
- }
- return i;
-}
-
-static void page_chain_add(struct page **head,
- struct page *chain_first, struct page *chain_last)
-{
-#if 1
- struct page *tmp;
- tmp = page_chain_tail(chain_first, NULL);
- BUG_ON(tmp != chain_last);
-#endif
-
- /* add chain to head */
- set_page_private(chain_last, (unsigned long)*head);
- *head = chain_first;
-}
-
-static struct page *__drbd_alloc_pages(struct drbd_device *device,
- unsigned int number)
+static struct page *__drbd_alloc_pages(unsigned int number)
{
struct page *page = NULL;
struct page *tmp = NULL;
unsigned int i = 0;
- /* Yes, testing drbd_pp_vacant outside the lock is racy.
- * So what. It saves a spin_lock. */
- if (drbd_pp_vacant >= number) {
- spin_lock(&drbd_pp_lock);
- page = page_chain_del(&drbd_pp_pool, number);
- if (page)
- drbd_pp_vacant -= number;
- spin_unlock(&drbd_pp_lock);
- if (page)
- return page;
- }
-
/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
* "criss-cross" setup, that might cause write-out on some other DRBD,
* which in turn might block on the other node at this very place. */
for (i = 0; i < number; i++) {
- tmp = alloc_page(GFP_TRY);
+ tmp = mempool_alloc(&drbd_buffer_page_pool, GFP_TRY);
if (!tmp)
- break;
+ goto fail;
set_page_private(tmp, (unsigned long)page);
page = tmp;
}
-
- if (i == number)
- return page;
-
- /* Not enough pages immediately available this time.
- * No need to jump around here, drbd_alloc_pages will retry this
- * function "soon". */
- if (page) {
- tmp = page_chain_tail(page, NULL);
- spin_lock(&drbd_pp_lock);
- page_chain_add(&drbd_pp_pool, page, tmp);
- drbd_pp_vacant += i;
- spin_unlock(&drbd_pp_lock);
+ return page;
+fail:
+ page_chain_for_each_safe(page, tmp) {
+ set_page_private(page, 0);
+ mempool_free(page, &drbd_buffer_page_pool);
}
return NULL;
}
-static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
- struct list_head *to_be_freed)
-{
- struct drbd_peer_request *peer_req, *tmp;
-
- /* The EEs are always appended to the end of the list. Since
- they are sent in order over the wire, they have to finish
- in order. As soon as we see the first not finished we can
- stop to examine the list... */
-
- list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
- if (drbd_peer_req_has_active_page(peer_req))
- break;
- list_move(&peer_req->w.list, to_be_freed);
- }
-}
-
-static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
-{
- LIST_HEAD(reclaimed);
- struct drbd_peer_request *peer_req, *t;
-
- spin_lock_irq(&device->resource->req_lock);
- reclaim_finished_net_peer_reqs(device, &reclaimed);
- spin_unlock_irq(&device->resource->req_lock);
- list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
- drbd_free_net_peer_req(device, peer_req);
-}
-
-static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
-{
- struct drbd_peer_device *peer_device;
- int vnr;
-
- rcu_read_lock();
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
- if (!atomic_read(&device->pp_in_use_by_net))
- continue;
-
- kref_get(&device->kref);
- rcu_read_unlock();
- drbd_reclaim_net_peer_reqs(device);
- kref_put(&device->kref, drbd_destroy_device);
- rcu_read_lock();
- }
- rcu_read_unlock();
-}
-
/**
* drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
* @peer_device: DRBD device.
@@ -264,9 +113,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
bool retry)
{
struct drbd_device *device = peer_device->device;
- struct page *page = NULL;
+ struct page *page;
struct net_conf *nc;
- DEFINE_WAIT(wait);
unsigned int mxb;
rcu_read_lock();
@@ -274,37 +122,9 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
mxb = nc ? nc->max_buffers : 1000000;
rcu_read_unlock();
- if (atomic_read(&device->pp_in_use) < mxb)
- page = __drbd_alloc_pages(device, number);
-
- /* Try to keep the fast path fast, but occasionally we need
- * to reclaim the pages we lended to the network stack. */
- if (page && atomic_read(&device->pp_in_use_by_net) > 512)
- drbd_reclaim_net_peer_reqs(device);
-
- while (page == NULL) {
- prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
-
- drbd_reclaim_net_peer_reqs(device);
-
- if (atomic_read(&device->pp_in_use) < mxb) {
- page = __drbd_alloc_pages(device, number);
- if (page)
- break;
- }
-
- if (!retry)
- break;
-
- if (signal_pending(current)) {
- drbd_warn(device, "drbd_alloc_pages interrupted!\n");
- break;
- }
-
- if (schedule_timeout(HZ/10) == 0)
- mxb = UINT_MAX;
- }
- finish_wait(&drbd_pp_wait, &wait);
+ if (atomic_read(&device->pp_in_use) >= mxb)
+ schedule_timeout_interruptible(HZ / 10);
+ page = __drbd_alloc_pages(number);
if (page)
atomic_add(number, &device->pp_in_use);
@@ -315,29 +135,25 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
* Is also used from inside an other spin_lock_irq(&resource->req_lock);
* Either links the page chain back to the global pool,
* or returns all pages to the system. */
-static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
+static void drbd_free_pages(struct drbd_device *device, struct page *page)
{
- atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
- int i;
+ struct page *tmp;
+ int i = 0;
if (page == NULL)
return;
- if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
- i = page_chain_free(page);
- else {
- struct page *tmp;
- tmp = page_chain_tail(page, &i);
- spin_lock(&drbd_pp_lock);
- page_chain_add(&drbd_pp_pool, page, tmp);
- drbd_pp_vacant += i;
- spin_unlock(&drbd_pp_lock);
- }
- i = atomic_sub_return(i, a);
+ page_chain_for_each_safe(page, tmp) {
+ set_page_private(page, 0);
+ if (page_count(page) == 1)
+ mempool_free(page, &drbd_buffer_page_pool);
+ else
+ put_page(page);
+ i++;
+ }
+ i = atomic_sub_return(i, &device->pp_in_use);
if (i < 0)
- drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
- is_net ? "pp_in_use_by_net" : "pp_in_use", i);
- wake_up(&drbd_pp_wait);
+ drbd_warn(device, "ASSERTION FAILED: pp_in_use: %d < 0\n", i);
}
/*
@@ -364,7 +180,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req;
struct page *page = NULL;
- unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ unsigned int nr_pages = PFN_UP(payload_size);
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
return NULL;
@@ -381,6 +197,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
gfpflags_allow_blocking(gfp_mask));
if (!page)
goto fail;
+ if (!mempool_is_saturated(&drbd_buffer_page_pool))
+ peer_req->flags |= EE_RELEASE_TO_MEMPOOL;
}
memset(peer_req, 0, sizeof(*peer_req));
@@ -404,16 +222,15 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
return NULL;
}
-void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
- int is_net)
+void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req)
{
might_sleep();
if (peer_req->flags & EE_HAS_DIGEST)
kfree(peer_req->digest);
- drbd_free_pages(device, peer_req->pages, is_net);
+ drbd_free_pages(device, peer_req->pages);
D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
- if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
+ if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
drbd_al_complete_io(device, &peer_req->i);
}
@@ -425,14 +242,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
LIST_HEAD(work_list);
struct drbd_peer_request *peer_req, *t;
int count = 0;
- int is_net = list == &device->net_ee;
spin_lock_irq(&device->resource->req_lock);
list_splice_init(list, &work_list);
spin_unlock_irq(&device->resource->req_lock);
list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
- __drbd_free_peer_req(device, peer_req, is_net);
+ drbd_free_peer_req(device, peer_req);
count++;
}
return count;
@@ -444,18 +260,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
static int drbd_finish_peer_reqs(struct drbd_device *device)
{
LIST_HEAD(work_list);
- LIST_HEAD(reclaimed);
struct drbd_peer_request *peer_req, *t;
int err = 0;
spin_lock_irq(&device->resource->req_lock);
- reclaim_finished_net_peer_reqs(device, &reclaimed);
list_splice_init(&device->done_ee, &work_list);
spin_unlock_irq(&device->resource->req_lock);
- list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
- drbd_free_net_peer_req(device, peer_req);
-
/* possible callbacks here:
* e_end_block, and e_end_resync_block, e_send_superseded.
* all ignore the last argument.
@@ -507,7 +318,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
struct msghdr msg = {
.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
};
- iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, size);
return sock_recvmsg(sock, &msg, msg.msg_flags);
}
@@ -639,7 +450,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
* a free one dynamically.
*/
what = "bind before connect";
- err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
+ err = sock->ops->bind(sock, (struct sockaddr_unsized *) &src_in6, my_addr_len);
if (err < 0)
goto out;
@@ -647,7 +458,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
* stay C_WF_CONNECTION, don't go Disconnecting! */
disconnect_on_error = 0;
what = "connect";
- err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
+ err = sock->ops->connect(sock, (struct sockaddr_unsized *) &peer_in6, peer_addr_len, 0);
out:
if (err < 0) {
@@ -726,7 +537,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce
drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
what = "bind before listen";
- err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
+ err = s_listen->ops->bind(s_listen, (struct sockaddr_unsized *)&my_addr, my_addr_len);
if (err < 0)
goto out;
@@ -781,7 +592,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_connection *connection,
timeo = connect_int * HZ;
/* 28.5% random jitter */
- timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
+ timeo += get_random_u32_below(2) ? timeo / 7 : -timeo / 7;
err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
if (err <= 0)
@@ -1004,7 +815,7 @@ retry:
drbd_warn(connection, "Error receiving initial packet\n");
sock_release(s);
randomize:
- if (prandom_u32() & 1)
+ if (get_random_u32_below(2))
goto retry;
}
}
@@ -1030,6 +841,9 @@ randomize:
sock.socket->sk->sk_allocation = GFP_NOIO;
msock.socket->sk->sk_allocation = GFP_NOIO;
+ sock.socket->sk->sk_use_task_frag = false;
+ msock.socket->sk->sk_use_task_frag = false;
+
sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
@@ -1279,16 +1093,16 @@ static void one_flush_endio(struct bio *bio)
static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
{
- struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
+ REQ_OP_WRITE | REQ_PREFLUSH, GFP_NOIO);
struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
- if (!bio || !octx) {
- drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
+
+ if (!octx) {
+ drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
/* FIXME: what else can I do now? disconnecting or detaching
* really does not help to improve the state of the world, either.
*/
- kfree(octx);
- if (bio)
- bio_put(bio);
+ bio_put(bio);
ctx->error = -ENOMEM;
put_ldev(device);
@@ -1298,10 +1112,8 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
octx->device = device;
octx->ctx = ctx;
- bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
- bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
device->flush_jif = jiffies;
set_bit(FLUSH_PENDING, &device->flags);
@@ -1513,7 +1325,6 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
{
struct block_device *bdev = device->ldev->backing_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
sector_t tmp, nr;
unsigned int max_discard_sectors, granularity;
int alignment;
@@ -1523,10 +1334,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
goto zero_out;
/* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
+ granularity = max(bdev_discard_granularity(bdev) >> 9, 1U);
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
- max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+ max_discard_sectors = min(bdev_max_discard_sectors(bdev), (1U << 22));
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
goto zero_out;
@@ -1550,7 +1361,8 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
start = tmp;
}
while (nr_sectors >= max_discard_sectors) {
- err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
+ err |= blkdev_issue_discard(bdev, start, max_discard_sectors,
+ GFP_NOIO);
nr_sectors -= max_discard_sectors;
start += max_discard_sectors;
}
@@ -1562,7 +1374,7 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
nr = nr_sectors;
nr -= (unsigned int)nr % granularity;
if (nr) {
- err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+ err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO);
nr_sectors -= nr;
start += nr;
}
@@ -1577,11 +1389,10 @@ int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, u
static bool can_do_reliable_discards(struct drbd_device *device)
{
- struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
struct disk_conf *dc;
bool can_do;
- if (!blk_queue_discard(q))
+ if (!bdev_max_discard_sectors(device->ldev->backing_bdev))
return false;
rcu_read_lock();
@@ -1606,21 +1417,19 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru
drbd_endio_write_sec_final(peer_req);
}
-static void drbd_issue_peer_wsame(struct drbd_device *device,
- struct drbd_peer_request *peer_req)
+static int peer_request_fault_type(struct drbd_peer_request *peer_req)
{
- struct block_device *bdev = device->ldev->backing_bdev;
- sector_t s = peer_req->i.sector;
- sector_t nr = peer_req->i.size >> 9;
- if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
- peer_req->flags |= EE_WAS_ERROR;
- drbd_endio_write_sec_final(peer_req);
+ if (peer_req_op(peer_req) == REQ_OP_READ) {
+ return peer_req->flags & EE_APPLICATION ?
+ DRBD_FAULT_DT_RD : DRBD_FAULT_RS_RD;
+ } else {
+ return peer_req->flags & EE_APPLICATION ?
+ DRBD_FAULT_DT_WR : DRBD_FAULT_RS_WR;
+ }
}
-
-/*
+/**
* drbd_submit_peer_request()
- * @device: DRBD device.
* @peer_req: peer request
*
* May spread the pages to multiple bios,
@@ -1634,19 +1443,16 @@ static void drbd_issue_peer_wsame(struct drbd_device *device,
* on certain Xen deployments.
*/
/* TODO allocate from our own bio_set. */
-int drbd_submit_peer_request(struct drbd_device *device,
- struct drbd_peer_request *peer_req,
- const unsigned op, const unsigned op_flags,
- const int fault_type)
+int drbd_submit_peer_request(struct drbd_peer_request *peer_req)
{
+ struct drbd_device *device = peer_req->peer_device->device;
struct bio *bios = NULL;
struct bio *bio;
struct page *page = peer_req->pages;
sector_t sector = peer_req->i.sector;
- unsigned data_size = peer_req->i.size;
- unsigned n_bios = 0;
- unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
- int err = -ENOMEM;
+ unsigned int data_size = peer_req->i.size;
+ unsigned int n_bios = 0;
+ unsigned int nr_pages = PFN_UP(data_size);
/* TRIM/DISCARD: for now, always use the helper function
* blkdev_issue_zeroout(..., discard=true).
@@ -1654,7 +1460,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
* Correctness first, performance later. Next step is to code an
* asynchronous variant of the same.
*/
- if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
+ if (peer_req->flags & (EE_TRIM | EE_ZEROOUT)) {
/* wait for all pending IO completions, before we start
* zeroing things out. */
conn_wait_active_ee_empty(peer_req->peer_device->connection);
@@ -1671,10 +1477,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
spin_unlock_irq(&device->resource->req_lock);
}
- if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
- drbd_issue_peer_discard_or_zero_out(device, peer_req);
- else /* EE_WRITE_SAME */
- drbd_issue_peer_wsame(device, peer_req);
+ drbd_issue_peer_discard_or_zero_out(device, peer_req);
return 0;
}
@@ -1687,15 +1490,20 @@ int drbd_submit_peer_request(struct drbd_device *device,
* generated bio, but a bio allocated on behalf of the peer.
*/
next_bio:
- bio = bio_alloc(GFP_NOIO, nr_pages);
- if (!bio) {
- drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
- goto fail;
+ /* _DISCARD, _WRITE_ZEROES handled above.
+ * REQ_OP_FLUSH (empty flush) not expected,
+ * should have been mapped to a "drbd protocol barrier".
+ * REQ_OP_SECURE_ERASE: I don't see how we could ever support that.
+ */
+ if (!(peer_req_op(peer_req) == REQ_OP_WRITE ||
+ peer_req_op(peer_req) == REQ_OP_READ)) {
+ drbd_err(device, "Invalid bio op received: 0x%x\n", peer_req->opf);
+ return -EINVAL;
}
+
+ bio = bio_alloc(device->ldev->backing_bdev, nr_pages, peer_req->opf, GFP_NOIO);
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, device->ldev->backing_bdev);
- bio_set_op_attrs(bio, op, op_flags);
bio->bi_private = peer_req;
bio->bi_end_io = drbd_peer_request_endio;
@@ -1723,17 +1531,9 @@ next_bio:
bios = bios->bi_next;
bio->bi_next = NULL;
- drbd_submit_bio_noacct(device, fault_type, bio);
+ drbd_submit_bio_noacct(device, peer_request_fault_type(peer_req), bio);
} while (bios);
return 0;
-
-fail:
- while (bios) {
- bio = bios;
- bios = bios->bi_next;
- bio_put(bio);
- }
- return err;
}
static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
@@ -1870,7 +1670,6 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
unsigned long *data;
struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
- struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
digest_size = 0;
if (!trim && peer_device->connection->peer_integrity_tfm) {
@@ -1885,36 +1684,24 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
data_size -= digest_size;
}
- /* assume request_size == data_size, but special case trim and wsame. */
+ /* assume request_size == data_size, but special case trim. */
ds = data_size;
if (trim) {
- if (!expect(data_size == 0))
+ if (!expect(peer_device, data_size == 0))
return NULL;
ds = be32_to_cpu(trim->size);
} else if (zeroes) {
- if (!expect(data_size == 0))
+ if (!expect(peer_device, data_size == 0))
return NULL;
ds = be32_to_cpu(zeroes->size);
- } else if (wsame) {
- if (data_size != queue_logical_block_size(device->rq_queue)) {
- drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
- data_size, queue_logical_block_size(device->rq_queue));
- return NULL;
- }
- if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
- drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
- data_size, bdev_logical_block_size(device->ldev->backing_bdev));
- return NULL;
- }
- ds = be32_to_cpu(wsame->size);
}
- if (!expect(IS_ALIGNED(ds, 512)))
+ if (!expect(peer_device, IS_ALIGNED(ds, 512)))
return NULL;
- if (trim || wsame || zeroes) {
- if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
+ if (trim || zeroes) {
+ if (!expect(peer_device, ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
return NULL;
- } else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
+ } else if (!expect(peer_device, ds <= DRBD_MAX_BIO_SIZE))
return NULL;
/* even though we trust out peer,
@@ -1943,21 +1730,19 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
peer_req->flags |= EE_ZEROOUT;
return peer_req;
}
- if (wsame)
- peer_req->flags |= EE_WRITE_SAME;
/* receive payload size bytes into page chain */
ds = data_size;
page = peer_req->pages;
page_chain_for_each(page) {
unsigned len = min_t(int, ds, PAGE_SIZE);
- data = kmap(page);
+ data = kmap_local_page(page);
err = drbd_recv_all_warn(peer_device->connection, data, len);
if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
drbd_err(device, "Fault injection: Corrupting data on receive\n");
data[0] = data[0] ^ (unsigned long)-1;
}
- kunmap(page);
+ kunmap_local(data);
if (err) {
drbd_free_peer_req(device, peer_req);
return NULL;
@@ -1992,7 +1777,7 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
page = drbd_alloc_pages(peer_device, 1, 1);
- data = kmap(page);
+ data = kmap_local_page(page);
while (data_size) {
unsigned int len = min_t(int, data_size, PAGE_SIZE);
@@ -2001,8 +1786,8 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
break;
data_size -= len;
}
- kunmap(page);
- drbd_free_pages(peer_device->device, page, 0);
+ kunmap_local(data);
+ drbd_free_pages(peer_device->device, page);
return err;
}
@@ -2033,10 +1818,10 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
bio_for_each_segment(bvec, bio, iter) {
- void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
+ void *mapped = bvec_kmap_local(&bvec);
expect = min_t(int, data_size, bvec.bv_len);
err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
- kunmap(bvec.bv_page);
+ kunmap_local(mapped);
if (err)
return err;
data_size -= expect;
@@ -2070,11 +1855,11 @@ static int e_end_resync_block(struct drbd_work *w, int unused)
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
- drbd_set_in_sync(device, sector, peer_req->i.size);
+ drbd_set_in_sync(peer_device, sector, peer_req->i.size);
err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
} else {
/* Record failure to sync */
- drbd_rs_failed_io(device, sector, peer_req->i.size);
+ drbd_rs_failed_io(peer_device, sector, peer_req->i.size);
err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
}
@@ -2093,13 +1878,14 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
if (!peer_req)
goto fail;
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
inc_unacked(device);
/* corresponding dec_unacked() in e_end_resync_block()
* respective _drbd_clear_done_ee */
peer_req->w.cb = e_end_resync_block;
+ peer_req->opf = REQ_OP_WRITE;
peer_req->submit_jif = jiffies;
spin_lock_irq(&device->resource->req_lock);
@@ -2107,8 +1893,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
- DRBD_FAULT_RS_WR) == 0)
+ if (drbd_submit_peer_request(peer_req) == 0)
return 0;
/* don't care for the reason here */
@@ -2162,12 +1947,9 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i
if (unlikely(!req))
return -EIO;
- /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
- * special casing it there for the various failure cases.
- * still no race with drbd_fail_pending_reads */
err = recv_dless_read(peer_device, req, sector, pi->size);
if (!err)
- req_mod(req, DATA_RECEIVED);
+ req_mod(req, DATA_RECEIVED, peer_device);
/* else: nothing. handled from drbd_disconnect...
* I don't think we may complete this just yet
* in case we are "on-disconnect: freeze" */
@@ -2197,7 +1979,7 @@ static int receive_RSDataReply(struct drbd_connection *connection, struct packet
* or in drbd_peer_request_endio. */
err = recv_resync_read(peer_device, sector, pi);
} else {
- if (__ratelimit(&drbd_ratelimit_state))
+ if (drbd_ratelimit())
drbd_err(device, "Can not write resync data to local disk.\n");
err = drbd_drain_block(peer_device, pi->size);
@@ -2225,7 +2007,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
continue;
/* as it is RQ_POSTPONED, this will cause it to
* be queued on the retry workqueue. */
- __req_mod(req, CONFLICT_RESOLVED, NULL);
+ __req_mod(req, CONFLICT_RESOLVED, NULL, NULL);
}
}
@@ -2249,7 +2031,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
P_RS_WRITE_ACK : P_WRITE_ACK;
err = drbd_send_ack(peer_device, pcmd, peer_req);
if (pcmd == P_RS_WRITE_ACK)
- drbd_set_in_sync(device, sector, peer_req->i.size);
+ drbd_set_in_sync(peer_device, sector, peer_req->i.size);
} else {
err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
/* we expect it to be marked out of sync anyways...
@@ -2427,31 +2209,29 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
return ret;
}
-/* see also bio_flags_to_wire()
- * DRBD_REQ_*, because we need to semantically map the flags to data packet
- * flags and back. We may replicate to other kernel versions. */
-static unsigned long wire_flags_to_bio_flags(u32 dpf)
-{
- return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
- (dpf & DP_FUA ? REQ_FUA : 0) |
- (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
-}
-
-static unsigned long wire_flags_to_bio_op(u32 dpf)
+static enum req_op wire_flags_to_bio_op(u32 dpf)
{
if (dpf & DP_ZEROES)
return REQ_OP_WRITE_ZEROES;
if (dpf & DP_DISCARD)
return REQ_OP_DISCARD;
- if (dpf & DP_WSAME)
- return REQ_OP_WRITE_SAME;
else
return REQ_OP_WRITE;
}
+/* see also bio_flags_to_wire() */
+static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
+{
+ return wire_flags_to_bio_op(dpf) |
+ (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
+ (dpf & DP_FUA ? REQ_FUA : 0) |
+ (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
+}
+
static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
unsigned int size)
{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_interval *i;
repeat:
@@ -2465,7 +2245,7 @@ static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
if (!(req->rq_state & RQ_POSTPONED))
continue;
req->rq_state &= ~RQ_POSTPONED;
- __req_mod(req, NEG_ACKED, &m);
+ __req_mod(req, NEG_ACKED, peer_device, &m);
spin_unlock_irq(&device->resource->req_lock);
if (m.bio)
complete_master_bio(device, &m);
@@ -2532,7 +2312,11 @@ static int handle_write_conflicts(struct drbd_device *device,
peer_req->w.cb = superseded ? e_send_superseded :
e_send_retry_write;
list_add_tail(&peer_req->w.list, &device->done_ee);
- queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
+ /* put is in drbd_send_acks_wf() */
+ kref_get(&device->kref);
+ if (!queue_work(connection->ack_sender,
+ &peer_req->peer_device->send_acks_work))
+ kref_put(&device->kref, drbd_destroy_device);
err = -ENOENT;
goto out;
@@ -2592,7 +2376,6 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
struct drbd_peer_request *peer_req;
struct p_data *p = pi->data;
u32 peer_seq = be32_to_cpu(p->seq_num);
- int op, op_flags;
u32 dp_flags;
int err, tp;
@@ -2631,11 +2414,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
peer_req->flags |= EE_APPLICATION;
dp_flags = be32_to_cpu(p->dp_flags);
- op = wire_flags_to_bio_op(dp_flags);
- op_flags = wire_flags_to_bio_flags(dp_flags);
+ peer_req->opf = wire_flags_to_bio(connection, dp_flags);
if (pi->cmd == P_TRIM) {
D_ASSERT(peer_device, peer_req->i.size > 0);
- D_ASSERT(peer_device, op == REQ_OP_DISCARD);
+ D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_DISCARD);
D_ASSERT(peer_device, peer_req->pages == NULL);
/* need to play safe: an older DRBD sender
* may mean zero-out while sending P_TRIM. */
@@ -2643,7 +2425,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
peer_req->flags |= EE_ZEROOUT;
} else if (pi->cmd == P_ZEROES) {
D_ASSERT(peer_device, peer_req->i.size > 0);
- D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
+ D_ASSERT(peer_device, peer_req_op(peer_req) == REQ_OP_WRITE_ZEROES);
D_ASSERT(peer_device, peer_req->pages == NULL);
/* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
if (dp_flags & DP_DISCARD)
@@ -2711,11 +2493,11 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, peer_seq);
spin_lock_irq(&device->resource->req_lock);
}
- /* TRIM and WRITE_SAME are processed synchronously,
+ /* TRIM and is processed synchronously,
* we wait for all pending requests, respectively wait for
* active_ee to become empty in drbd_submit_peer_request();
* better not add ourselves here. */
- if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
+ if ((peer_req->flags & (EE_TRIM | EE_ZEROOUT)) == 0)
list_add_tail(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock);
@@ -2724,14 +2506,13 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
if (device->state.pdsk < D_INCONSISTENT) {
/* In case we have the only disk of the cluster, */
- drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
+ drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
drbd_al_begin_io(device, &peer_req->i);
peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
}
- err = drbd_submit_peer_request(device, peer_req, op, op_flags,
- DRBD_FAULT_DT_WR);
+ err = drbd_submit_peer_request(peer_req);
if (!err)
return 0;
@@ -2764,9 +2545,10 @@ out_interrupted:
* The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster.
*/
-bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
bool throttle_if_app_is_waiting)
{
+ struct drbd_device *device = peer_device->device;
struct lc_element *tmp;
bool throttle = drbd_rs_c_min_rate_throttle(device);
@@ -2842,7 +2624,6 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
struct drbd_peer_request *peer_req;
struct digest_info *di = NULL;
int size, verb;
- unsigned int fault_type;
struct p_block_req *p = pi->data;
peer_device = conn_peer_device(connection, pi->vnr);
@@ -2879,13 +2660,13 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
break;
case P_OV_REPLY:
verb = 0;
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
break;
default:
BUG();
}
- if (verb && __ratelimit(&drbd_ratelimit_state))
+ if (verb && drbd_ratelimit())
drbd_err(device, "Can not satisfy peer's read request, "
"no local data.\n");
@@ -2902,11 +2683,11 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
put_ldev(device);
return -ENOMEM;
}
+ peer_req->opf = REQ_OP_READ;
switch (pi->cmd) {
case P_DATA_REQUEST:
peer_req->w.cb = w_e_end_data_req;
- fault_type = DRBD_FAULT_DT_RD;
/* application IO, don't drbd_rs_begin_io */
peer_req->flags |= EE_APPLICATION;
goto submit;
@@ -2920,14 +2701,12 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
fallthrough;
case P_RS_DATA_REQUEST:
peer_req->w.cb = w_e_end_rsdata_req;
- fault_type = DRBD_FAULT_RS_RD;
/* used in the sector offset progress display */
device->bm_resync_fo = BM_SECT_TO_BIT(sector);
break;
case P_OV_REPLY:
case P_CSUM_RS_REQUEST:
- fault_type = DRBD_FAULT_RS_RD;
di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
if (!di)
goto out_free_e;
@@ -2952,7 +2731,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
/* track progress, we may need to throttle */
atomic_add(size >> 9, &device->rs_sect_in);
peer_req->w.cb = w_e_end_ov_reply;
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
/* drbd_rs_begin_io done when we sent this request,
* but accounting still needs to be done. */
goto submit_for_resync;
@@ -2976,7 +2755,6 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
(unsigned long long)sector);
}
peer_req->w.cb = w_e_end_ov_req;
- fault_type = DRBD_FAULT_RS_RD;
break;
default:
@@ -3016,7 +2794,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
update_receiver_timing_details(connection, drbd_rs_should_slow_down);
if (device->state.peer != R_PRIMARY
- && drbd_rs_should_slow_down(device, sector, false))
+ && drbd_rs_should_slow_down(peer_device, sector, false))
schedule_timeout_uninterruptible(HZ/10);
update_receiver_timing_details(connection, drbd_rs_begin_io);
if (drbd_rs_begin_io(device, sector))
@@ -3028,8 +2806,7 @@ submit_for_resync:
submit:
update_receiver_timing_details(connection, drbd_submit_peer_request);
inc_unacked(device);
- if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
- fault_type) == 0)
+ if (drbd_submit_peer_request(peer_req) == 0)
return 0;
/* don't care for the reason here */
@@ -3266,10 +3043,11 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
-1096 requires proto 96
*/
-static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
+static int drbd_uuid_compare(struct drbd_peer_device *const peer_device,
+ enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
{
- struct drbd_peer_device *const peer_device = first_peer_device(device);
- struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
+ struct drbd_connection *const connection = peer_device->connection;
+ struct drbd_device *device = peer_device->device;
u64 self, peer;
int i, j;
@@ -3505,7 +3283,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
drbd_uuid_dump(device, "peer", device->p_uuid,
device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
- hg = drbd_uuid_compare(device, peer_role, &rule_nr);
+ hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr);
spin_unlock_irq(&device->ldev->md.uuid_lock);
drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
@@ -3631,7 +3409,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
if (abs(hg) >= 2) {
drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
- BM_LOCKED_SET_ALLOWED))
+ BM_LOCKED_SET_ALLOWED, NULL))
return C_MASK;
}
@@ -3799,8 +3577,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
drbd_info(connection, "peer data-integrity-alg: %s\n",
integrity_alg[0] ? integrity_alg : "(none)");
- synchronize_rcu();
- kfree(old_net_conf);
+ kvfree_rcu_mightsleep(old_net_conf);
return 0;
disconnect_rcu_unlock:
@@ -3921,7 +3698,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
/* initialize verify_alg and csums_alg */
p = pi->data;
- memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+ BUILD_BUG_ON(sizeof(p->algs) != 2 * SHARED_SECRET_MAX);
+ memset(&p->algs, 0, sizeof(p->algs));
err = drbd_recv_all(peer_device->connection, p, header_size);
if (err)
@@ -3950,7 +3728,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
drbd_err(device, "verify-alg of wrong size, "
"peer wants %u, accepting only up to %u byte\n",
data_size, SHARED_SECRET_MAX);
- err = -EIO;
goto reconnect;
}
@@ -4168,8 +3945,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
mutex_unlock(&connection->resource->conf_update);
- synchronize_rcu();
- kfree(old_disk_conf);
+ kvfree_rcu_mightsleep(old_disk_conf);
drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
(unsigned long)p_usize, (unsigned long)my_usize);
@@ -4312,7 +4088,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
"clear_n_write from receive_uuids",
- BM_LOCKED_TEST_ALLOWED);
+ BM_LOCKED_TEST_ALLOWED, NULL);
_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
_drbd_uuid_set(device, UI_BITMAP, 0);
_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
@@ -4490,7 +4266,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
else if (os.conn >= C_SYNC_SOURCE &&
peer_state.conn == C_CONNECTED) {
if (drbd_bm_total_weight(device) <= device->rs_failed)
- drbd_resync_finished(device);
+ drbd_resync_finished(peer_device);
return 0;
}
}
@@ -4498,8 +4274,8 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
/* explicit verify finished notification, stop sector reached. */
if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
- ov_out_of_sync_print(device);
- drbd_resync_finished(device);
+ ov_out_of_sync_print(peer_device);
+ drbd_resync_finished(peer_device);
return 0;
}
@@ -4808,11 +4584,11 @@ decode_bitmap_c(struct drbd_peer_device *peer_device,
return -EIO;
}
-void INFO_bm_xfer_stats(struct drbd_device *device,
+void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
const char *direction, struct bm_xfer_ctx *c)
{
/* what would it take to transfer it "plaintext" */
- unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
+ unsigned int header_size = drbd_header_size(peer_device->connection);
unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
unsigned int plain =
header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
@@ -4836,7 +4612,7 @@ void INFO_bm_xfer_stats(struct drbd_device *device,
r = 1000;
r = 1000 - r;
- drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
+ drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
"total %u; compression: %u.%u%%\n",
direction,
c->bytes[1], c->packets[1],
@@ -4914,12 +4690,12 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info
goto out;
}
- INFO_bm_xfer_stats(device, "receive", &c);
+ INFO_bm_xfer_stats(peer_device, "receive", &c);
if (device->state.conn == C_WF_BITMAP_T) {
enum drbd_state_rv rv;
- err = drbd_send_bitmap(device);
+ err = drbd_send_bitmap(device, peer_device);
if (err)
goto out;
/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
@@ -4977,7 +4753,7 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet
drbd_conn_str(device->state.conn));
}
- drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
+ drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
return 0;
}
@@ -4998,11 +4774,10 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
sector = be64_to_cpu(p->sector);
size = be32_to_cpu(p->blksize);
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
if (get_ldev(device)) {
struct drbd_peer_request *peer_req;
- const int op = REQ_OP_WRITE_ZEROES;
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
size, 0, GFP_NOIO);
@@ -5012,6 +4787,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
}
peer_req->w.cb = e_end_resync_block;
+ peer_req->opf = REQ_OP_DISCARD;
peer_req->submit_jif = jiffies;
peer_req->flags |= EE_TRIM;
@@ -5020,7 +4796,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
spin_unlock_irq(&device->resource->req_lock);
atomic_add(pi->size >> 9, &device->rs_sect_ev);
- err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
+ err = drbd_submit_peer_request(peer_req);
if (err) {
spin_lock_irq(&device->resource->req_lock);
@@ -5083,7 +4859,6 @@ static struct data_cmd drbd_cmd_handler[] = {
[P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
[P_ZEROES] = { 0, sizeof(struct p_trim), receive_Data },
[P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
- [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data },
};
static void drbdd(struct drbd_connection *connection)
@@ -5228,7 +5003,7 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device)
atomic_set(&device->rs_pending_cnt, 0);
wake_up(&device->misc_wait);
- del_timer_sync(&device->resync_timer);
+ timer_delete_sync(&device->resync_timer);
resync_timer_fn(&device->resync_timer);
/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
@@ -5257,20 +5032,10 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device)
if (get_ldev(device)) {
drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
- "write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
+ "write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL);
put_ldev(device);
}
- /* tcp_close and release of sendpage pages can be deferred. I don't
- * want to use SO_LINGER, because apparently it can be deferred for
- * more than 20 seconds (longest time I checked).
- *
- * Actually we don't care for exactly when the network stack does its
- * put_page(), but release our reference on these pages right here.
- */
- i = drbd_free_peer_reqs(device, &device->net_ee);
- if (i)
- drbd_info(device, "net_ee not empty, killed %u entries\n", i);
i = atomic_read(&device->pp_in_use_by_net);
if (i)
drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
@@ -5691,22 +5456,23 @@ static int got_IsInSync(struct drbd_connection *connection, struct packet_info *
if (get_ldev(device)) {
drbd_rs_complete_io(device, sector);
- drbd_set_in_sync(device, sector, blksize);
+ drbd_set_in_sync(peer_device, sector, blksize);
/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
put_ldev(device);
}
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
atomic_add(blksize >> 9, &device->rs_sect_in);
return 0;
}
static int
-validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
+validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
struct rb_root *root, const char *func,
enum drbd_req_event what, bool missing_ok)
{
+ struct drbd_device *device = peer_device->device;
struct drbd_request *req;
struct bio_and_error m;
@@ -5716,7 +5482,7 @@ validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t secto
spin_unlock_irq(&device->resource->req_lock);
return -EIO;
}
- __req_mod(req, what, &m);
+ __req_mod(req, what, peer_device, &m);
spin_unlock_irq(&device->resource->req_lock);
if (m.bio)
@@ -5741,8 +5507,8 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
if (p->block_id == ID_SYNCER) {
- drbd_set_in_sync(device, sector, blksize);
- dec_rs_pending(device);
+ drbd_set_in_sync(peer_device, sector, blksize);
+ dec_rs_pending(peer_device);
return 0;
}
switch (pi->cmd) {
@@ -5765,7 +5531,7 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info *
BUG();
}
- return validate_req_change_req_state(device, p->block_id, sector,
+ return validate_req_change_req_state(peer_device, p->block_id, sector,
&device->write_requests, __func__,
what, false);
}
@@ -5787,12 +5553,12 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
if (p->block_id == ID_SYNCER) {
- dec_rs_pending(device);
- drbd_rs_failed_io(device, sector, size);
+ dec_rs_pending(peer_device);
+ drbd_rs_failed_io(peer_device, sector, size);
return 0;
}
- err = validate_req_change_req_state(device, p->block_id, sector,
+ err = validate_req_change_req_state(peer_device, p->block_id, sector,
&device->write_requests, __func__,
NEG_ACKED, true);
if (err) {
@@ -5801,7 +5567,7 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi
request is no longer in the collision hash. */
/* In Protocol B we might already have got a P_RECV_ACK
but then get a P_NEG_ACK afterwards. */
- drbd_set_out_of_sync(device, sector, size);
+ drbd_set_out_of_sync(peer_device, sector, size);
}
return 0;
}
@@ -5823,7 +5589,7 @@ static int got_NegDReply(struct drbd_connection *connection, struct packet_info
drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
(unsigned long long)sector, be32_to_cpu(p->blksize));
- return validate_req_change_req_state(device, p->block_id, sector,
+ return validate_req_change_req_state(peer_device, p->block_id, sector,
&device->read_requests, __func__,
NEG_ACKED, false);
}
@@ -5846,13 +5612,13 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
if (get_ldev_if_state(device, D_FAILED)) {
drbd_rs_complete_io(device, sector);
switch (pi->cmd) {
case P_NEG_RS_DREPLY:
- drbd_rs_failed_io(device, sector, size);
+ drbd_rs_failed_io(peer_device, sector, size);
break;
case P_RS_CANCEL:
break;
@@ -5909,21 +5675,21 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
- drbd_ov_out_of_sync_found(device, sector, size);
+ drbd_ov_out_of_sync_found(peer_device, sector, size);
else
- ov_out_of_sync_print(device);
+ ov_out_of_sync_print(peer_device);
if (!get_ldev(device))
return 0;
drbd_rs_complete_io(device, sector);
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
--device->ov_left;
/* let's advance progress step marks only for every other megabyte */
if ((device->ov_left & 0x200) == 0x200)
- drbd_advance_rs_marks(device, device->ov_left);
+ drbd_advance_rs_marks(peer_device, device->ov_left);
if (device->ov_left == 0) {
dw = kmalloc(sizeof(*dw), GFP_NOIO);
@@ -5933,8 +5699,8 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info *
drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
} else {
drbd_err(device, "kmalloc(dw) failed.");
- ov_out_of_sync_print(device);
- drbd_resync_finished(device);
+ ov_out_of_sync_print(peer_device);
+ drbd_resync_finished(peer_device);
}
}
put_ldev(device);
@@ -6016,8 +5782,6 @@ int drbd_ack_receiver(struct drbd_thread *thi)
while (get_t_state(thi) == RUNNING) {
drbd_thread_current_set_cpu(thi);
- conn_reclaim_net_peer_reqs(connection);
-
if (test_and_clear_bit(SEND_PING, &connection->flags)) {
if (drbd_send_ping(connection)) {
drbd_err(connection, "drbd_send_ping has failed\n");