summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/trace/events/rxrpc.h6
-rw-r--r--net/rxrpc/ar-internal.h5
-rw-r--r--net/rxrpc/call_event.c83
-rw-r--r--net/rxrpc/call_object.c6
-rw-r--r--net/rxrpc/output.c48
-rw-r--r--net/rxrpc/sendmsg.c83
-rw-r--r--net/rxrpc/txbuf.c10
7 files changed, 161 insertions, 80 deletions
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 8bd48358f757..c3043fbea0e6 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -183,6 +183,7 @@
EM(rxrpc_call_queue_requeue, "QUE requeue ") \
EM(rxrpc_call_queue_resend, "QUE resend ") \
EM(rxrpc_call_queue_timer, "QUE timer ") \
+ EM(rxrpc_call_queue_tx_data, "QUE tx-data ") \
EM(rxrpc_call_see_accept, "SEE accept ") \
EM(rxrpc_call_see_activate_client, "SEE act-clnt") \
EM(rxrpc_call_see_connect_failed, "SEE con-fail") \
@@ -738,6 +739,7 @@ TRACE_EVENT(rxrpc_txqueue,
__field(rxrpc_seq_t, acks_hard_ack )
__field(rxrpc_seq_t, tx_bottom )
__field(rxrpc_seq_t, tx_top )
+ __field(rxrpc_seq_t, tx_prepared )
__field(int, tx_winsize )
),
@@ -747,16 +749,18 @@ TRACE_EVENT(rxrpc_txqueue,
__entry->acks_hard_ack = call->acks_hard_ack;
__entry->tx_bottom = call->tx_bottom;
__entry->tx_top = call->tx_top;
+ __entry->tx_prepared = call->tx_prepared;
__entry->tx_winsize = call->tx_winsize;
),
- TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u",
+ TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u/%u",
__entry->call,
__print_symbolic(__entry->why, rxrpc_txqueue_traces),
__entry->tx_bottom,
__entry->acks_hard_ack,
__entry->tx_top - __entry->tx_bottom,
__entry->tx_top - __entry->acks_hard_ack,
+ __entry->tx_prepared - __entry->tx_bottom,
__entry->tx_winsize)
);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 3bd6a5eb2fb7..6af7298af39b 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -646,9 +646,11 @@ struct rxrpc_call {
/* Transmitted data tracking. */
spinlock_t tx_lock; /* Transmit queue lock */
+ struct list_head tx_sendmsg; /* Sendmsg prepared packets */
struct list_head tx_buffer; /* Buffer of transmissible packets */
rxrpc_seq_t tx_bottom; /* First packet in buffer */
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
+ rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
u16 tx_backoff; /* Delay to insert due to Tx failure */
u8 tx_winsize; /* Maximum size of Tx window */
@@ -766,7 +768,7 @@ struct rxrpc_send_params {
*/
struct rxrpc_txbuf {
struct rcu_head rcu;
- struct list_head call_link; /* Link in call->tx_queue */
+ struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
struct list_head tx_link; /* Link in live Enc queue or Tx queue */
struct rxrpc_call *call; /* Call to which belongs */
ktime_t last_sent; /* Time at which last transmitted */
@@ -1067,6 +1069,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
void rxrpc_reject_packets(struct rxrpc_local *);
void rxrpc_send_keepalive(struct rxrpc_peer *);
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
/*
* peer_event.c
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 3925b55e2064..c9f835292f7b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -291,6 +291,72 @@ out:
_leave("");
}
+static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
+{
+ unsigned int winsize = min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize;
+ rxrpc_seq_t tx_top = call->tx_top;
+ int space;
+
+ space = wtop - tx_top;
+ return space > 0;
+}
+
+/*
+ * Decant some if the sendmsg prepared queue into the transmission buffer.
+ */
+static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (rxrpc_is_client_call(call) &&
+ !test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ rxrpc_expose_client_call(call);
+
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ spin_lock(&call->tx_lock);
+ list_del(&txb->call_link);
+ spin_unlock(&call->tx_lock);
+
+ call->tx_top = txb->seq;
+ list_add_tail(&txb->call_link, &call->tx_buffer);
+
+ rxrpc_transmit_one(call, txb);
+
+ // TODO: Drain the transmission buffers. Do this somewhere better
+ if (after(call->acks_hard_ack, call->tx_bottom + 16))
+ rxrpc_shrink_call_tx_buffer(call);
+
+ if (!rxrpc_tx_window_has_space(call))
+ break;
+ }
+}
+
+static void rxrpc_transmit_some_data(struct rxrpc_call *call)
+{
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ if (list_empty(&call->tx_sendmsg))
+ return;
+ fallthrough;
+
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ if (!rxrpc_tx_window_has_space(call))
+ return;
+ if (list_empty(&call->tx_sendmsg))
+ return;
+ rxrpc_decant_prepared_tx(call);
+ break;
+ default:
+ return;
+ }
+}
+
/*
* Handle retransmission and deferred ACK/abort generation.
*/
@@ -309,19 +375,22 @@ void rxrpc_process_call(struct work_struct *work)
call->debug_id, rxrpc_call_states[call->state], call->events);
recheck_state:
+ if (call->acks_hard_ack != call->tx_bottom)
+ rxrpc_shrink_call_tx_buffer(call);
+
/* Limit the number of times we do this before returning to the manager */
- iterations++;
- if (iterations > 5)
- goto requeue;
+ if (!rxrpc_tx_window_has_space(call) ||
+ list_empty(&call->tx_sendmsg)) {
+ iterations++;
+ if (iterations > 5)
+ goto requeue;
+ }
if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
rxrpc_send_abort_packet(call);
goto recheck_state;
}
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom)
- rxrpc_shrink_call_tx_buffer(call);
-
if (call->state == RXRPC_CALL_COMPLETE) {
del_timer_sync(&call->timer);
goto out;
@@ -387,6 +456,8 @@ recheck_state:
set_bit(RXRPC_CALL_EV_RESEND, &call->events);
}
+ rxrpc_transmit_some_data(call);
+
/* Process events */
if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 2622d06bb0d6..96a7edd3a842 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -156,6 +156,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
INIT_LIST_HEAD(&call->attend_link);
+ INIT_LIST_HEAD(&call->tx_sendmsg);
INIT_LIST_HEAD(&call->tx_buffer);
skb_queue_head_init(&call->recvmsg_queue);
skb_queue_head_init(&call->rx_oos_queue);
@@ -641,6 +642,11 @@ static void rxrpc_destroy_call(struct work_struct *work)
del_timer_sync(&call->timer);
rxrpc_cleanup_ring(call);
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
+ }
while ((txb = list_first_entry_or_null(&call->tx_buffer,
struct rxrpc_txbuf, call_link))) {
list_del(&txb->call_link);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index e2ce7dadbb7a..c8147e50060b 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -465,6 +465,14 @@ dont_set_request_ack:
trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+
+ /* Track what we've attempted to transmit at least once so that the
+ * retransmission algorithm doesn't try to resend what we haven't sent
+ * yet. However, this can race as we can receive an ACK before we get
+ * to this point. But, OTOH, if we won't get an ACK mentioning this
+ * packet unless the far side received it (though it could have
+ * discarded it anyway and NAK'd it).
+ */
cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
/* send the packet with the don't fragment bit set if we currently
@@ -712,3 +720,43 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
peer->last_tx_at = ktime_get_seconds();
_leave("");
}
+
+/*
+ * Schedule an instant Tx resend.
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb)
+{
+ if (call->state < RXRPC_CALL_COMPLETE)
+ kdebug("resend");
+}
+
+/*
+ * Transmit one packet.
+ */
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ int ret;
+
+ ret = rxrpc_send_data_packet(call, txb);
+ if (ret < 0) {
+ switch (ret) {
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ 0, ret);
+ break;
+ default:
+ _debug("need instant resend %d", ret);
+ rxrpc_instant_resend(call, txb);
+ }
+ } else {
+ unsigned long now = jiffies;
+ unsigned long resend_at = now + call->peer->rto_j;
+
+ WRITE_ONCE(call->resend_at, resend_at);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_send);
+ }
+}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 76b1e2e89c1e..11af37275d5b 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -22,30 +22,9 @@
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
{
- unsigned int win_size;
- rxrpc_seq_t tx_win = smp_load_acquire(&call->acks_hard_ack);
-
- /* If we haven't transmitted anything for >1RTT, we should reset the
- * congestion management state.
- */
- if (ktime_before(ktime_add_us(call->tx_last_sent,
- call->peer->srtt_us >> 3),
- ktime_get_real())) {
- if (RXRPC_TX_SMSS > 2190)
- win_size = 2;
- else if (RXRPC_TX_SMSS > 1095)
- win_size = 3;
- else
- win_size = 4;
- win_size += call->cong_extra;
- } else {
- win_size = min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra);
- }
-
if (_tx_win)
- *_tx_win = tx_win;
- return call->tx_top - tx_win < win_size;
+ *_tx_win = call->tx_bottom;
+ return call->tx_prepared - call->tx_bottom < 256;
}
/*
@@ -66,11 +45,6 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
if (signal_pending(current))
return sock_intr_errno(*timeo);
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
@@ -107,11 +81,6 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
tx_win == tx_start && signal_pending(current))
return -EINTR;
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
if (tx_win != tx_start) {
timeout = rtt;
tx_start = tx_win;
@@ -137,11 +106,6 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
- rxrpc_shrink_call_tx_buffer(call);
- continue;
- }
-
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
@@ -207,29 +171,27 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
unsigned long now;
rxrpc_seq_t seq = txb->seq;
bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags);
- int ret;
rxrpc_inc_stat(call->rxnet, stat_tx_data);
- ASSERTCMP(seq, ==, call->tx_top + 1);
+ ASSERTCMP(txb->seq, ==, call->tx_prepared + 1);
/* We have to set the timestamp before queueing as the retransmit
* algorithm can see the packet as soon as we queue it.
*/
txb->last_sent = ktime_get_real();
- /* Add the packet to the call's output buffer */
- rxrpc_get_txbuf(txb, rxrpc_txbuf_get_buffer);
- spin_lock(&call->tx_lock);
- list_add_tail(&txb->call_link, &call->tx_buffer);
- call->tx_top = seq;
- spin_unlock(&call->tx_lock);
-
if (last)
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last);
else
trace_rxrpc_txqueue(call, rxrpc_txqueue_queue);
+ /* Add the packet to the call's output buffer */
+ spin_lock(&call->tx_lock);
+ list_add_tail(&txb->call_link, &call->tx_sendmsg);
+ call->tx_prepared = seq;
+ spin_unlock(&call->tx_lock);
+
if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
_debug("________awaiting reply/ACK__________");
write_lock_bh(&call->state_lock);
@@ -258,30 +220,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
write_unlock_bh(&call->state_lock);
}
- if (seq == 1 && rxrpc_is_client_call(call))
- rxrpc_expose_client_call(call);
-
- ret = rxrpc_send_data_packet(call, txb);
- if (ret < 0) {
- switch (ret) {
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- 0, ret);
- goto out;
- }
- } else {
- unsigned long now = jiffies;
- unsigned long resend_at = now + call->peer->rto_j;
- WRITE_ONCE(call->resend_at, resend_at);
- rxrpc_reduce_call_timer(call, resend_at, now,
- rxrpc_timer_set_for_send);
- }
-
-out:
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans);
+ /* Stick the packet on the crypto queue or the transmission queue as
+ * appropriate.
+ */
+ rxrpc_queue_call(call, rxrpc_call_queue_tx_data);
}
/*
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index 90ff00c340cd..a5054389dfbb 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -34,7 +34,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
txb->offset = 0;
txb->flags = 0;
txb->ack_why = 0;
- txb->seq = call->tx_top + 1;
+ txb->seq = call->tx_prepared + 1;
txb->wire.epoch = htonl(call->conn->proto.epoch);
txb->wire.cid = htonl(call->cid);
txb->wire.callNumber = htonl(call->call_id);
@@ -107,6 +107,7 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
{
struct rxrpc_txbuf *txb;
rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack);
+ bool wake = false;
_enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
@@ -123,7 +124,7 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
if (txb->seq != call->tx_bottom + 1)
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step);
ASSERTCMP(txb->seq, ==, call->tx_bottom + 1);
- call->tx_bottom++;
+ smp_store_release(&call->tx_bottom, call->tx_bottom + 1);
list_del_rcu(&txb->call_link);
trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
@@ -131,7 +132,12 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
spin_unlock(&call->tx_lock);
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
+ if (after(call->acks_hard_ack, call->tx_bottom + 128))
+ wake = true;
}
spin_unlock(&call->tx_lock);
+
+ if (wake)
+ wake_up(&call->waitq);
}