summaryrefslogtreecommitdiff
path: root/net/ceph/messenger.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/messenger.c')
-rw-r--r--net/ceph/messenger.c106
1 files changed, 94 insertions, 12 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5eb4898cccd4..70b25f4ecba6 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -252,7 +252,8 @@ int __init ceph_msgr_init(void)
* The number of active work items is limited by the number of
* connections, so leave @max_active at default.
*/
- ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_MEM_RECLAIM, 0);
+ ceph_msgr_wq = alloc_workqueue("ceph-msgr",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (ceph_msgr_wq)
return 0;
@@ -459,8 +460,8 @@ int ceph_tcp_connect(struct ceph_connection *con)
set_sock_callbacks(sock, con);
con_sock_state_connecting(con);
- ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss),
- O_NONBLOCK);
+ ret = kernel_connect(sock, (struct sockaddr_unsized *)&ss, sizeof(ss),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
ceph_pr_addr(&con->peer_addr),
@@ -969,6 +970,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
return true;
}
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+
+ cursor->iov_iter = data->iter;
+ cursor->lastlen = 0;
+ iov_iter_truncate(&cursor->iov_iter, length);
+ cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct page *page;
+ ssize_t len;
+
+ if (cursor->lastlen)
+ iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+ len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+ 1, page_offset);
+ BUG_ON(len < 0);
+
+ cursor->lastlen = len;
+
+ /*
+ * FIXME: The assumption is that the pages represented by the iov_iter
+ * are pinned, with the references held by the upper-level
+ * callers, or by virtue of being under writeback. Eventually,
+ * we'll get an iov_iter_get_pages2 variant that doesn't take
+ * page refs. Until then, just put the page ref.
+ */
+ VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+ put_page(page);
+
+ *length = min_t(size_t, len, cursor->resid);
+ return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ BUG_ON(bytes > cursor->resid);
+ cursor->resid -= bytes;
+
+ if (bytes < cursor->lastlen) {
+ cursor->lastlen -= bytes;
+ } else {
+ iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+ cursor->lastlen = 0;
+ }
+
+ return cursor->resid;
+}
+
/*
* Message data is handled (sent or received) in pieces, where each
* piece resides on a single page. The network layer might not
@@ -996,6 +1053,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
case CEPH_MSG_DATA_BVECS:
ceph_msg_data_bvecs_cursor_init(cursor, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ ceph_msg_data_iter_cursor_init(cursor, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
/* BUG(); */
@@ -1013,6 +1073,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->total_resid = length;
cursor->data = msg->data;
+ cursor->sr_resid = 0;
__ceph_msg_data_cursor_init(cursor);
}
@@ -1042,6 +1103,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
case CEPH_MSG_DATA_BVECS:
page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ page = ceph_msg_data_iter_next(cursor, page_offset, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
page = NULL;
@@ -1080,6 +1144,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
case CEPH_MSG_DATA_BVECS:
new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
break;
+ case CEPH_MSG_DATA_ITER:
+ new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+ break;
case CEPH_MSG_DATA_NONE:
default:
BUG();
@@ -1188,7 +1255,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
colon_p = memchr(name, ':', namelen);
if (delim_p && colon_p)
- end = delim_p < colon_p ? delim_p : colon_p;
+ end = min(delim_p, colon_p);
else if (!delim_p && colon_p)
end = colon_p;
else {
@@ -1458,7 +1525,7 @@ static void con_fault_finish(struct ceph_connection *con)
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
*/
- if (con->v1.auth_retry) {
+ if (!ceph_msgr2(from_msgr(con->msgr)) && con->v1.auth_retry) {
dout("auth_retry %d, invalidating\n", con->v1.auth_retry);
if (con->ops->invalidate_authorizer)
con->ops->invalidate_authorizer(con);
@@ -1648,9 +1715,10 @@ static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
if (con->state == CEPH_CON_S_STANDBY) {
- dout("clear_standby %p and ++connect_seq\n", con);
+ dout("clear_standby %p\n", con);
con->state = CEPH_CON_S_PREOPEN;
- con->v1.connect_seq++;
+ if (!ceph_msgr2(from_msgr(con->msgr)))
+ con->v1.connect_seq++;
WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_WRITE_PENDING));
WARN_ON(ceph_con_flag_test(con, CEPH_CON_F_KEEPALIVE_PENDING));
}
@@ -1726,9 +1794,9 @@ void ceph_msg_revoke(struct ceph_msg *msg)
WARN_ON(con->state != CEPH_CON_S_OPEN);
dout("%s con %p msg %p was sending\n", __func__, con, msg);
if (ceph_msgr2(from_msgr(con->msgr)))
- ceph_con_v2_revoke(con);
+ ceph_con_v2_revoke(con, msg);
else
- ceph_con_v1_revoke(con);
+ ceph_con_v1_revoke(con, msg);
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
} else {
@@ -1879,6 +1947,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
}
EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter)
+{
+ struct ceph_msg_data *data;
+
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_ITER;
+ data->iter = *iter;
+
+ msg->data_length += iov_iter_count(&data->iter);
+}
+
/*
* construct a new message with given type, size
* the new msg has a ref count of 1.
@@ -2031,11 +2111,13 @@ int ceph_con_in_msg_alloc(struct ceph_connection *con,
return ret;
}
-void ceph_con_get_out_msg(struct ceph_connection *con)
+struct ceph_msg *ceph_con_get_out_msg(struct ceph_connection *con)
{
struct ceph_msg *msg;
- BUG_ON(list_empty(&con->out_queue));
+ if (list_empty(&con->out_queue))
+ return NULL;
+
msg = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
WARN_ON(msg->con != con);
@@ -2062,7 +2144,7 @@ void ceph_con_get_out_msg(struct ceph_connection *con)
* message or in case of a fault.
*/
WARN_ON(con->out_msg);
- con->out_msg = ceph_msg_get(msg);
+ return con->out_msg = ceph_msg_get(msg);
}
/*