summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/udp.h3
-rw-r--r--include/net/udp_tunnel.h4
-rw-r--r--include/trace/events/rxrpc.h361
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/ipv4/ip_sockglue.c1
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/datagram.c1
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c5
-rw-r--r--net/rxrpc/ar-internal.h224
-rw-r--r--net/rxrpc/call_accept.c8
-rw-r--r--net/rxrpc/call_event.c427
-rw-r--r--net/rxrpc/call_object.c63
-rw-r--r--net/rxrpc/conn_client.c3
-rw-r--r--net/rxrpc/conn_object.c4
-rw-r--r--net/rxrpc/input.c770
-rw-r--r--net/rxrpc/insecure.c16
-rw-r--r--net/rxrpc/local_object.c20
-rw-r--r--net/rxrpc/misc.c23
-rw-r--r--net/rxrpc/net_ns.c2
-rw-r--r--net/rxrpc/output.c390
-rw-r--r--net/rxrpc/peer_event.c282
-rw-r--r--net/rxrpc/peer_object.c7
-rw-r--r--net/rxrpc/proc.c110
-rw-r--r--net/rxrpc/protocol.h9
-rw-r--r--net/rxrpc/recvmsg.c268
-rw-r--r--net/rxrpc/rxkad.c245
-rw-r--r--net/rxrpc/sendmsg.c218
-rw-r--r--net/rxrpc/skbuff.c20
-rw-r--r--net/rxrpc/sysctl.c11
-rw-r--r--net/rxrpc/txbuf.c135
33 files changed, 1844 insertions, 1797 deletions
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 81d6e4ec2294..0260f5ea98fe 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -208,8 +208,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {}
static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
#define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;})
+#define proc_create_net_data_write(name, mode, parent, ops, write, state_size, data) ({NULL;})
#define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
#define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
+#define proc_create_net_single_write(name, mode, parent, show, write, data) ({NULL;})
static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
{
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 5cdba00a904a..dea57aa37df6 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -70,7 +70,8 @@ struct udp_sock {
* For encapsulation sockets.
*/
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
- void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
+ void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload);
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
void (*encap_destroy)(struct sock *sk);
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 72394f441dad..0ca9b7a11baf 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -68,8 +68,8 @@ typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
struct sk_buff *skb);
typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk,
- struct sk_buff *skb,
- unsigned int udp_offset);
+ struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload);
typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
struct list_head *head,
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index d20bf4aa0204..b9886d1df825 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -17,7 +17,9 @@
* Declare tracing information enums and their string mappings for display.
*/
#define rxrpc_skb_traces \
+ EM(rxrpc_skb_ack, "ACK") \
EM(rxrpc_skb_cleaned, "CLN") \
+ EM(rxrpc_skb_cloned_jumbo, "CLJ") \
EM(rxrpc_skb_freed, "FRE") \
EM(rxrpc_skb_got, "GOT") \
EM(rxrpc_skb_lost, "*L*") \
@@ -34,7 +36,8 @@
EM(rxrpc_local_new, "NEW") \
EM(rxrpc_local_processing, "PRO") \
EM(rxrpc_local_put, "PUT") \
- E_(rxrpc_local_queued, "QUE")
+ EM(rxrpc_local_queued, "QUE") \
+ E_(rxrpc_local_tx_ack, "TAK")
#define rxrpc_peer_traces \
EM(rxrpc_peer_got, "GOT") \
@@ -73,6 +76,7 @@
EM(rxrpc_call_got, "GOT") \
EM(rxrpc_call_got_kernel, "Gke") \
EM(rxrpc_call_got_timer, "GTM") \
+ EM(rxrpc_call_got_tx, "Gtx") \
EM(rxrpc_call_got_userid, "Gus") \
EM(rxrpc_call_new_client, "NWc") \
EM(rxrpc_call_new_service, "NWs") \
@@ -81,20 +85,22 @@
EM(rxrpc_call_put_noqueue, "PnQ") \
EM(rxrpc_call_put_notimer, "PnT") \
EM(rxrpc_call_put_timer, "PTM") \
+ EM(rxrpc_call_put_tx, "Ptx") \
EM(rxrpc_call_put_userid, "Pus") \
EM(rxrpc_call_queued, "QUE") \
EM(rxrpc_call_queued_ref, "QUR") \
EM(rxrpc_call_release, "RLS") \
E_(rxrpc_call_seen, "SEE")
-#define rxrpc_transmit_traces \
- EM(rxrpc_transmit_await_reply, "AWR") \
- EM(rxrpc_transmit_end, "END") \
- EM(rxrpc_transmit_queue, "QUE") \
- EM(rxrpc_transmit_queue_last, "QLS") \
- EM(rxrpc_transmit_rotate, "ROT") \
- EM(rxrpc_transmit_rotate_last, "RLS") \
- E_(rxrpc_transmit_wait, "WAI")
+#define rxrpc_txqueue_traces \
+ EM(rxrpc_txqueue_await_reply, "AWR") \
+ EM(rxrpc_txqueue_dequeue, "DEQ") \
+ EM(rxrpc_txqueue_end, "END") \
+ EM(rxrpc_txqueue_queue, "QUE") \
+ EM(rxrpc_txqueue_queue_last, "QLS") \
+ EM(rxrpc_txqueue_rotate, "ROT") \
+ EM(rxrpc_txqueue_rotate_last, "RLS") \
+ E_(rxrpc_txqueue_wait, "WAI")
#define rxrpc_receive_traces \
EM(rxrpc_receive_end, "END") \
@@ -102,7 +108,12 @@
EM(rxrpc_receive_incoming, "INC") \
EM(rxrpc_receive_queue, "QUE") \
EM(rxrpc_receive_queue_last, "QLS") \
- E_(rxrpc_receive_rotate, "ROT")
+ EM(rxrpc_receive_queue_oos, "QUO") \
+ EM(rxrpc_receive_queue_oos_last, "QOL") \
+ EM(rxrpc_receive_oos, "OOS") \
+ EM(rxrpc_receive_oos_last, "OSL") \
+ EM(rxrpc_receive_rotate, "ROT") \
+ E_(rxrpc_receive_rotate_last, "RLS")
#define rxrpc_recvmsg_traces \
EM(rxrpc_recvmsg_cont, "CONT") \
@@ -133,7 +144,6 @@
#define rxrpc_timer_traces \
EM(rxrpc_timer_begin, "Begin ") \
- EM(rxrpc_timer_expired, "*EXPR*") \
EM(rxrpc_timer_exp_ack, "ExpAck") \
EM(rxrpc_timer_exp_hard, "ExpHrd") \
EM(rxrpc_timer_exp_idle, "ExpIdl") \
@@ -158,6 +168,7 @@
#define rxrpc_propose_ack_traces \
EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \
EM(rxrpc_propose_ack_input_data, "DataIn ") \
+ EM(rxrpc_propose_ack_input_data_hole, "DataInH") \
EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \
EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \
EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \
@@ -170,11 +181,6 @@
EM(rxrpc_propose_ack_rotate_rx, "RxAck ") \
E_(rxrpc_propose_ack_terminal_ack, "ClTerm ")
-#define rxrpc_propose_ack_outcomes \
- EM(rxrpc_propose_ack_subsume, " Subsume") \
- EM(rxrpc_propose_ack_update, " Update") \
- E_(rxrpc_propose_ack_use, " New")
-
#define rxrpc_congest_modes \
EM(RXRPC_CALL_CONGEST_AVOIDANCE, "CongAvoid") \
EM(RXRPC_CALL_FAST_RETRANSMIT, "FastReTx ") \
@@ -187,6 +193,7 @@
EM(rxrpc_cong_new_low_nack, " NewLowN") \
EM(rxrpc_cong_no_change, " -") \
EM(rxrpc_cong_progress, " Progres") \
+ EM(rxrpc_cong_idle_reset, " IdleRes") \
EM(rxrpc_cong_retransmit_again, " ReTxAgn") \
EM(rxrpc_cong_rtt_window_end, " RttWinE") \
E_(rxrpc_cong_saw_nack, " SawNack")
@@ -242,6 +249,33 @@
EM(rxrpc_tx_point_version_keepalive, "VerKeepalive") \
E_(rxrpc_tx_point_version_reply, "VerReply")
+#define rxrpc_req_ack_traces \
+ EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \
+ EM(rxrpc_reqack_already_on, "ALREADY-ON") \
+ EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \
+ EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \
+ EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \
+ EM(rxrpc_reqack_retrans, "RETRANS ") \
+ EM(rxrpc_reqack_slow_start, "SLOW-START") \
+ E_(rxrpc_reqack_small_txwin, "SMALL-TXWN")
+/* ---- Must update size of stat_why_req_ack[] if more are added! */
+
+#define rxrpc_txbuf_traces \
+ EM(rxrpc_txbuf_alloc_ack, "ALLOC ACK ") \
+ EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \
+ EM(rxrpc_txbuf_free, "FREE ") \
+ EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \
+ EM(rxrpc_txbuf_get_trans, "GET TRANS ") \
+ EM(rxrpc_txbuf_get_retrans, "GET RETRANS") \
+ EM(rxrpc_txbuf_put_ack_tx, "PUT ACK TX ") \
+ EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \
+ EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \
+ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \
+ EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \
+ EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \
+ EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \
+ E_(rxrpc_txbuf_see_unacked, "SEE UNACKED")
+
/*
* Generate enums for tracing information.
*/
@@ -263,12 +297,14 @@ enum rxrpc_propose_ack_outcome { rxrpc_propose_ack_outcomes } __mode(byte);
enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte);
enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte);
enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte);
+enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte);
enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte);
enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte);
enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte);
enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte);
-enum rxrpc_transmit_trace { rxrpc_transmit_traces } __mode(byte);
enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte);
+enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte);
+enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte);
#endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */
@@ -286,16 +322,17 @@ rxrpc_congest_changes;
rxrpc_congest_modes;
rxrpc_conn_traces;
rxrpc_local_traces;
-rxrpc_propose_ack_outcomes;
rxrpc_propose_ack_traces;
rxrpc_receive_traces;
rxrpc_recvmsg_traces;
+rxrpc_req_ack_traces;
rxrpc_rtt_rx_traces;
rxrpc_rtt_tx_traces;
rxrpc_skb_traces;
rxrpc_timer_traces;
-rxrpc_transmit_traces;
rxrpc_tx_points;
+rxrpc_txbuf_traces;
+rxrpc_txqueue_traces;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -449,14 +486,13 @@ TRACE_EVENT(rxrpc_call,
TRACE_EVENT(rxrpc_skb,
TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op,
- int usage, int mod_count, u8 flags, const void *where),
+ int usage, int mod_count, const void *where),
- TP_ARGS(skb, op, usage, mod_count, flags, where),
+ TP_ARGS(skb, op, usage, mod_count, where),
TP_STRUCT__entry(
__field(struct sk_buff *, skb )
__field(enum rxrpc_skb_trace, op )
- __field(u8, flags )
__field(int, usage )
__field(int, mod_count )
__field(const void *, where )
@@ -464,16 +500,14 @@ TRACE_EVENT(rxrpc_skb,
TP_fast_assign(
__entry->skb = skb;
- __entry->flags = flags;
__entry->op = op;
__entry->usage = usage;
__entry->mod_count = mod_count;
__entry->where = where;
),
- TP_printk("s=%p %cx %s u=%d m=%d p=%pSR",
+ TP_printk("s=%p Rx %s u=%d m=%d p=%pSR",
__entry->skb,
- __entry->flags & RXRPC_SKB_TX_BUFFER ? 'T' : 'R',
__print_symbolic(__entry->op, rxrpc_skb_traces),
__entry->usage,
__entry->mod_count,
@@ -578,15 +612,16 @@ TRACE_EVENT(rxrpc_call_complete,
__entry->abort_code)
);
-TRACE_EVENT(rxrpc_transmit,
- TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why),
+TRACE_EVENT(rxrpc_txqueue,
+ TP_PROTO(struct rxrpc_call *call, enum rxrpc_txqueue_trace why),
TP_ARGS(call, why),
TP_STRUCT__entry(
__field(unsigned int, call )
- __field(enum rxrpc_transmit_trace, why )
- __field(rxrpc_seq_t, tx_hard_ack )
+ __field(enum rxrpc_txqueue_trace, why )
+ __field(rxrpc_seq_t, acks_hard_ack )
+ __field(rxrpc_seq_t, tx_bottom )
__field(rxrpc_seq_t, tx_top )
__field(int, tx_winsize )
),
@@ -594,31 +629,33 @@ TRACE_EVENT(rxrpc_transmit,
TP_fast_assign(
__entry->call = call->debug_id;
__entry->why = why;
- __entry->tx_hard_ack = call->tx_hard_ack;
+ __entry->acks_hard_ack = call->acks_hard_ack;
+ __entry->tx_bottom = call->tx_bottom;
__entry->tx_top = call->tx_top;
__entry->tx_winsize = call->tx_winsize;
),
- TP_printk("c=%08x %s f=%08x n=%u/%u",
+ TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u",
__entry->call,
- __print_symbolic(__entry->why, rxrpc_transmit_traces),
- __entry->tx_hard_ack + 1,
- __entry->tx_top - __entry->tx_hard_ack,
+ __print_symbolic(__entry->why, rxrpc_txqueue_traces),
+ __entry->tx_bottom,
+ __entry->acks_hard_ack,
+ __entry->tx_top - __entry->tx_bottom,
+ __entry->tx_top - __entry->acks_hard_ack,
__entry->tx_winsize)
);
TRACE_EVENT(rxrpc_rx_data,
TP_PROTO(unsigned int call, rxrpc_seq_t seq,
- rxrpc_serial_t serial, u8 flags, u8 anno),
+ rxrpc_serial_t serial, u8 flags),
- TP_ARGS(call, seq, serial, flags, anno),
+ TP_ARGS(call, seq, serial, flags),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(rxrpc_seq_t, seq )
__field(rxrpc_serial_t, serial )
__field(u8, flags )
- __field(u8, anno )
),
TP_fast_assign(
@@ -626,15 +663,13 @@ TRACE_EVENT(rxrpc_rx_data,
__entry->seq = seq;
__entry->serial = serial;
__entry->flags = flags;
- __entry->anno = anno;
),
- TP_printk("c=%08x DATA %08x q=%08x fl=%02x a=%02x",
+ TP_printk("c=%08x DATA %08x q=%08x fl=%02x",
__entry->call,
__entry->serial,
__entry->seq,
- __entry->flags,
- __entry->anno)
+ __entry->flags)
);
TRACE_EVENT(rxrpc_rx_ack,
@@ -841,8 +876,7 @@ TRACE_EVENT(rxrpc_receive,
__field(enum rxrpc_receive_trace, why )
__field(rxrpc_serial_t, serial )
__field(rxrpc_seq_t, seq )
- __field(rxrpc_seq_t, hard_ack )
- __field(rxrpc_seq_t, top )
+ __field(u64, window )
),
TP_fast_assign(
@@ -850,8 +884,7 @@ TRACE_EVENT(rxrpc_receive,
__entry->why = why;
__entry->serial = serial;
__entry->seq = seq;
- __entry->hard_ack = call->rx_hard_ack;
- __entry->top = call->rx_top;
+ __entry->window = atomic64_read(&call->ackr_window);
),
TP_printk("c=%08x %s r=%08x q=%08x w=%08x-%08x",
@@ -859,12 +892,36 @@ TRACE_EVENT(rxrpc_receive,
__print_symbolic(__entry->why, rxrpc_receive_traces),
__entry->serial,
__entry->seq,
- __entry->hard_ack,
- __entry->top)
+ lower_32_bits(__entry->window),
+ upper_32_bits(__entry->window))
);
TRACE_EVENT(rxrpc_recvmsg,
TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why,
+ int ret),
+
+ TP_ARGS(call, why, ret),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(enum rxrpc_recvmsg_trace, why )
+ __field(int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call ? call->debug_id : 0;
+ __entry->why = why;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("c=%08x %s ret=%d",
+ __entry->call,
+ __print_symbolic(__entry->why, rxrpc_recvmsg_traces),
+ __entry->ret)
+ );
+
+TRACE_EVENT(rxrpc_recvdata,
+ TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why,
rxrpc_seq_t seq, unsigned int offset, unsigned int len,
int ret),
@@ -986,7 +1043,7 @@ TRACE_EVENT(rxrpc_timer,
__entry->call = call->debug_id;
__entry->why = why;
__entry->now = now;
- __entry->ack_at = call->ack_at;
+ __entry->ack_at = call->delay_ack_at;
__entry->ack_lost_at = call->ack_lost_at;
__entry->resend_at = call->resend_at;
__entry->expect_rx_by = call->expect_rx_by;
@@ -1007,6 +1064,47 @@ TRACE_EVENT(rxrpc_timer,
__entry->timer - __entry->now)
);
+TRACE_EVENT(rxrpc_timer_expired,
+ TP_PROTO(struct rxrpc_call *call, unsigned long now),
+
+ TP_ARGS(call, now),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(long, now )
+ __field(long, ack_at )
+ __field(long, ack_lost_at )
+ __field(long, resend_at )
+ __field(long, ping_at )
+ __field(long, expect_rx_by )
+ __field(long, expect_req_by )
+ __field(long, expect_term_by )
+ __field(long, timer )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->now = now;
+ __entry->ack_at = call->delay_ack_at;
+ __entry->ack_lost_at = call->ack_lost_at;
+ __entry->resend_at = call->resend_at;
+ __entry->expect_rx_by = call->expect_rx_by;
+ __entry->expect_req_by = call->expect_req_by;
+ __entry->expect_term_by = call->expect_term_by;
+ __entry->timer = call->timer.expires;
+ ),
+
+ TP_printk("c=%08x EXPIRED a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld",
+ __entry->call,
+ __entry->ack_at - __entry->now,
+ __entry->ack_lost_at - __entry->now,
+ __entry->resend_at - __entry->now,
+ __entry->expect_rx_by - __entry->now,
+ __entry->expect_req_by - __entry->now,
+ __entry->expect_term_by - __entry->now,
+ __entry->timer - __entry->now)
+ );
+
TRACE_EVENT(rxrpc_rx_lose,
TP_PROTO(struct rxrpc_skb_priv *sp),
@@ -1031,20 +1129,15 @@ TRACE_EVENT(rxrpc_rx_lose,
TRACE_EVENT(rxrpc_propose_ack,
TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why,
- u8 ack_reason, rxrpc_serial_t serial, bool immediate,
- bool background, enum rxrpc_propose_ack_outcome outcome),
+ u8 ack_reason, rxrpc_serial_t serial),
- TP_ARGS(call, why, ack_reason, serial, immediate, background,
- outcome),
+ TP_ARGS(call, why, ack_reason, serial),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(enum rxrpc_propose_ack_trace, why )
__field(rxrpc_serial_t, serial )
__field(u8, ack_reason )
- __field(bool, immediate )
- __field(bool, background )
- __field(enum rxrpc_propose_ack_outcome, outcome )
),
TP_fast_assign(
@@ -1052,45 +1145,91 @@ TRACE_EVENT(rxrpc_propose_ack,
__entry->why = why;
__entry->serial = serial;
__entry->ack_reason = ack_reason;
- __entry->immediate = immediate;
- __entry->background = background;
- __entry->outcome = outcome;
),
- TP_printk("c=%08x %s %s r=%08x i=%u b=%u%s",
+ TP_printk("c=%08x %s %s r=%08x",
__entry->call,
__print_symbolic(__entry->why, rxrpc_propose_ack_traces),
__print_symbolic(__entry->ack_reason, rxrpc_ack_names),
- __entry->serial,
- __entry->immediate,
- __entry->background,
- __print_symbolic(__entry->outcome, rxrpc_propose_ack_outcomes))
+ __entry->serial)
+ );
+
+TRACE_EVENT(rxrpc_send_ack,
+ TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why,
+ u8 ack_reason, rxrpc_serial_t serial),
+
+ TP_ARGS(call, why, ack_reason, serial),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(enum rxrpc_propose_ack_trace, why )
+ __field(rxrpc_serial_t, serial )
+ __field(u8, ack_reason )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->why = why;
+ __entry->serial = serial;
+ __entry->ack_reason = ack_reason;
+ ),
+
+ TP_printk("c=%08x %s %s r=%08x",
+ __entry->call,
+ __print_symbolic(__entry->why, rxrpc_propose_ack_traces),
+ __print_symbolic(__entry->ack_reason, rxrpc_ack_names),
+ __entry->serial)
+ );
+
+TRACE_EVENT(rxrpc_drop_ack,
+ TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why,
+ u8 ack_reason, rxrpc_serial_t serial, bool nobuf),
+
+ TP_ARGS(call, why, ack_reason, serial, nobuf),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(enum rxrpc_propose_ack_trace, why )
+ __field(rxrpc_serial_t, serial )
+ __field(u8, ack_reason )
+ __field(bool, nobuf )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call->debug_id;
+ __entry->why = why;
+ __entry->serial = serial;
+ __entry->ack_reason = ack_reason;
+ __entry->nobuf = nobuf;
+ ),
+
+ TP_printk("c=%08x %s %s r=%08x nbf=%u",
+ __entry->call,
+ __print_symbolic(__entry->why, rxrpc_propose_ack_traces),
+ __print_symbolic(__entry->ack_reason, rxrpc_ack_names),
+ __entry->serial, __entry->nobuf)
);
TRACE_EVENT(rxrpc_retransmit,
- TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation,
- s64 expiry),
+ TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, s64 expiry),
- TP_ARGS(call, seq, annotation, expiry),
+ TP_ARGS(call, seq, expiry),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(rxrpc_seq_t, seq )
- __field(u8, annotation )
__field(s64, expiry )
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->seq = seq;
- __entry->annotation = annotation;
__entry->expiry = expiry;
),
- TP_printk("c=%08x q=%x a=%02x xp=%lld",
+ TP_printk("c=%08x q=%x xp=%lld",
__entry->call,
__entry->seq,
- __entry->annotation,
__entry->expiry)
);
@@ -1113,14 +1252,14 @@ TRACE_EVENT(rxrpc_congest,
TP_fast_assign(
__entry->call = call->debug_id;
__entry->change = change;
- __entry->hard_ack = call->tx_hard_ack;
+ __entry->hard_ack = call->acks_hard_ack;
__entry->top = call->tx_top;
__entry->lowest_nak = call->acks_lowest_nak;
__entry->ack_serial = ack_serial;
memcpy(&__entry->sum, summary, sizeof(__entry->sum));
),
- TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
+ TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
__entry->call,
__entry->ack_serial,
__print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
@@ -1128,8 +1267,8 @@ TRACE_EVENT(rxrpc_congest,
__print_symbolic(__entry->sum.mode, rxrpc_congest_modes),
__entry->sum.cwnd,
__entry->sum.ssthresh,
- __entry->sum.nr_acks, __entry->sum.nr_nacks,
- __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks,
+ __entry->sum.nr_acks, __entry->sum.saw_nacks,
+ __entry->sum.nr_new_acks,
__entry->sum.nr_rot_new_acks,
__entry->top - __entry->hard_ack,
__entry->sum.cumulative_acks,
@@ -1230,26 +1369,23 @@ TRACE_EVENT(rxrpc_connect_call,
);
TRACE_EVENT(rxrpc_resend,
- TP_PROTO(struct rxrpc_call *call, int ix),
+ TP_PROTO(struct rxrpc_call *call),
- TP_ARGS(call, ix),
+ TP_ARGS(call),
TP_STRUCT__entry(
__field(unsigned int, call )
- __field(int, ix )
- __array(u8, anno, 64 )
+ __field(rxrpc_seq_t, seq )
),
TP_fast_assign(
__entry->call = call->debug_id;
- __entry->ix = ix;
- memcpy(__entry->anno, call->rxtx_annotations, 64);
+ __entry->seq = call->acks_hard_ack;
),
- TP_printk("c=%08x ix=%u a=%64phN",
+ TP_printk("c=%08x q=%x",
__entry->call,
- __entry->ix,
- __entry->anno)
+ __entry->seq)
);
TRACE_EVENT(rxrpc_rx_icmp,
@@ -1329,8 +1465,8 @@ TRACE_EVENT(rxrpc_call_reset,
__entry->call_id = call->call_id;
__entry->call_serial = call->rx_serial;
__entry->conn_serial = call->conn->hi_serial;
- __entry->tx_seq = call->tx_hard_ack;
- __entry->rx_seq = call->rx_hard_ack;
+ __entry->tx_seq = call->acks_hard_ack;
+ __entry->rx_seq = call->rx_highest_seq;
),
TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x",
@@ -1395,6 +1531,61 @@ TRACE_EVENT(rxrpc_rx_discard_ack,
__entry->call_ackr_prev)
);
+TRACE_EVENT(rxrpc_req_ack,
+ TP_PROTO(unsigned int call_debug_id, rxrpc_seq_t seq,
+ enum rxrpc_req_ack_trace why),
+
+ TP_ARGS(call_debug_id, seq, why),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call_debug_id )
+ __field(rxrpc_seq_t, seq )
+ __field(enum rxrpc_req_ack_trace, why )
+ ),
+
+ TP_fast_assign(
+ __entry->call_debug_id = call_debug_id;
+ __entry->seq = seq;
+ __entry->why = why;
+ ),
+
+ TP_printk("c=%08x q=%08x REQ-%s",
+ __entry->call_debug_id,
+ __entry->seq,
+ __print_symbolic(__entry->why, rxrpc_req_ack_traces))
+ );
+
+TRACE_EVENT(rxrpc_txbuf,
+ TP_PROTO(unsigned int debug_id,
+ unsigned int call_debug_id, rxrpc_seq_t seq,
+ int ref, enum rxrpc_txbuf_trace what),
+
+ TP_ARGS(debug_id, call_debug_id, seq, ref, what),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, debug_id )
+ __field(unsigned int, call_debug_id )
+ __field(rxrpc_seq_t, seq )
+ __field(int, ref )
+ __field(enum rxrpc_txbuf_trace, what )
+ ),
+
+ TP_fast_assign(
+ __entry->debug_id = debug_id;
+ __entry->call_debug_id = call_debug_id;
+ __entry->seq = seq;
+ __entry->ref = ref;
+ __entry->what = what;
+ ),
+
+ TP_printk("B=%08x c=%08x q=%08x %s r=%d",
+ __entry->debug_id,
+ __entry->call_debug_id,
+ __entry->seq,
+ __print_symbolic(__entry->what, rxrpc_txbuf_traces),
+ __entry->ref)
+ );
+
#undef EM
#undef E_
#endif /* _TRACE_RXRPC_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 42a35b59fb1e..d1779880b2a7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6435,6 +6435,7 @@ void skb_condense(struct sk_buff *skb)
*/
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
}
+EXPORT_SYMBOL(skb_condense);
#ifdef CONFIG_SKB_EXTENSIONS
static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f16807d3235..9f92ae35bb01 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -433,6 +433,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
}
kfree_skb(skb);
}
+EXPORT_SYMBOL_GPL(ip_icmp_error);
void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
{
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 89accc3c8bb3..b859d6c8298e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -784,7 +784,8 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
if (tunnel) {
/* ...not for tunnels though: we don't have a sending socket */
if (udp_sk(sk)->encap_err_rcv)
- udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2);
+ udp_sk(sk)->encap_err_rcv(sk, skb, err, uh->dest, info,
+ (u8 *)(uh+1));
goto out;
}
if (!inet->recverr) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index df7e032ce87d..7c7155b48f17 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -334,6 +334,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
if (sock_queue_err_skb(sk, skb))
kfree_skb(skb);
}
+EXPORT_SYMBOL_GPL(ipv6_icmp_error);
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
{
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 10f45658dbf6..e2de3d906c82 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -632,7 +632,8 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Tunnels don't have an application socket: don't pass errors back */
if (tunnel) {
if (udp_sk(sk)->encap_err_rcv)
- udp_sk(sk)->encap_err_rcv(sk, skb, offset);
+ udp_sk(sk)->encap_err_rcv(sk, skb, err, uh->dest,
+ ntohl(info), (u8 *)(uh+1));
goto out;
}
@@ -1639,6 +1640,7 @@ do_confirm:
err = 0;
goto out;
}
+EXPORT_SYMBOL(udpv6_sendmsg);
void udpv6_destroy_sock(struct sock *sk)
{
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index b11281bed2a4..fdeba488fc6e 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -30,6 +30,7 @@ rxrpc-y := \
sendmsg.o \
server_key.o \
skbuff.o \
+ txbuf.o \
utils.o
rxrpc-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index ceba28e9dce6..2f3991cf8715 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -39,7 +39,7 @@ atomic_t rxrpc_debug_id;
EXPORT_SYMBOL(rxrpc_debug_id);
/* count of skbs currently in use */
-atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
+atomic_t rxrpc_n_rx_skbs;
struct workqueue_struct *rxrpc_workqueue;
@@ -979,7 +979,7 @@ static int __init af_rxrpc_init(void)
goto error_call_jar;
}
- rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1);
+ rxrpc_workqueue = alloc_workqueue("krxrpcd", WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!rxrpc_workqueue) {
pr_notice("Failed to allocate work queue\n");
goto error_work_queue;
@@ -1059,7 +1059,6 @@ static void __exit af_rxrpc_exit(void)
sock_unregister(PF_RXRPC);
proto_unregister(&rxrpc_proto);
unregister_pernet_device(&rxrpc_net_ops);
- ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0);
ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
/* Make sure the local and peer records pinned by any dying connections
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 1ad0ec5afb50..0273a9029229 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -29,6 +29,7 @@ struct rxrpc_crypt {
struct key_preparsed_payload;
struct rxrpc_connection;
+struct rxrpc_txbuf;
/*
* Mark applied to socket buffers in skb->mark. skb->priority is used
@@ -93,6 +94,22 @@ struct rxrpc_net {
struct list_head peer_keepalive_new;
struct timer_list peer_keepalive_timer;
struct work_struct peer_keepalive_work;
+
+ atomic_t stat_tx_data;
+ atomic_t stat_tx_data_retrans;
+ atomic_t stat_tx_data_send;
+ atomic_t stat_tx_data_send_frag;
+ atomic_t stat_rx_data;
+ atomic_t stat_rx_data_reqack;
+ atomic_t stat_rx_data_jumbo;
+
+ atomic_t stat_tx_ack_fill;
+ atomic_t stat_tx_ack_send;
+ atomic_t stat_tx_ack_skip;
+ atomic_t stat_tx_acks[256];
+ atomic_t stat_rx_acks[256];
+
+ atomic_t stat_why_req_ack[8];
};
/*
@@ -178,20 +195,12 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
- atomic_t nr_ring_pins; /* Number of rxtx ring pins */
- u8 nr_subpackets; /* Number of subpackets */
- u8 rx_flags; /* Received packet flags */
-#define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */
-#define RXRPC_SKB_TX_BUFFER 0x02 /* - Is transmit buffer */
- union {
- int remain; /* amount of space remaining for next write */
-
- /* List of requested ACKs on subpackets */
- unsigned long rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) /
- BITS_PER_LONG];
- };
+ u16 offset; /* Offset of data */
+ u16 len; /* Length of data */
+ u8 flags;
+#define RXRPC_RX_VERIFIED 0x01
- struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
+ struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
@@ -233,19 +242,14 @@ struct rxrpc_security {
size_t *, size_t *, size_t *);
/* impose security on a packet */
- int (*secure_packet)(struct rxrpc_call *, struct sk_buff *, size_t);
+ int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *);
/* verify the security on a received packet */
- int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
- unsigned int, unsigned int, rxrpc_seq_t, u16);
+ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *);
/* Free crypto request on a call */
void (*free_call_crypto)(struct rxrpc_call *);
- /* Locate the data in a received packet that has been verified. */
- void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
- unsigned int *, unsigned int *);
-
/* issue a challenge */
int (*issue_challenge)(struct rxrpc_connection *);
@@ -276,6 +280,8 @@ struct rxrpc_local {
struct hlist_node link;
struct socket *socket; /* my UDP socket */
struct work_struct processor;
+ struct list_head ack_tx_queue; /* List of ACKs that need sending */
+ spinlock_t ack_tx_lock; /* ACK list lock */
struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
struct sk_buff_head reject_queue; /* packets awaiting rejection */
@@ -326,7 +332,7 @@ struct rxrpc_peer {
u32 rto_j; /* Retransmission timeout in jiffies */
u8 backoff; /* Backoff timeout */
- u8 cong_cwnd; /* Congestion window size */
+ u8 cong_ssthresh; /* Congestion slow-start threshold */
};
/*
@@ -490,6 +496,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_EXPOSED, /* The call was exposed to the world */
RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */
RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */
+ RXRPC_CALL_TX_ALL_ACKED, /* Last packet has been hard-acked */
RXRPC_CALL_SEND_PING, /* A ping will need to be sent */
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
@@ -498,16 +505,16 @@ enum rxrpc_call_flag {
RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
RXRPC_CALL_KERNEL, /* The call was made by the kernel */
RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */
+ RXRPC_CALL_DELAY_ACK_PENDING, /* DELAY ACK generation is pending */
+ RXRPC_CALL_IDLE_ACK_PENDING, /* IDLE ACK generation is pending */
};
/*
* Events that can be raised on a call.
*/
enum rxrpc_call_event {
- RXRPC_CALL_EV_ACK, /* need to generate ACK */
RXRPC_CALL_EV_ABORT, /* need to generate abort */
RXRPC_CALL_EV_RESEND, /* Tx resend required */
- RXRPC_CALL_EV_PING, /* Ping send required */
RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */
RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
};
@@ -566,7 +573,7 @@ struct rxrpc_call {
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
- unsigned long ack_at; /* When deferred ACK needs to happen */
+ unsigned long delay_ack_at; /* When DELAY ACK needs to happen */
unsigned long ack_lost_at; /* When ACK is figured as lost */
unsigned long resend_at; /* When next resend needs to happen */
unsigned long ping_at; /* When next to send a ping */
@@ -576,7 +583,6 @@ struct rxrpc_call {
unsigned long expect_term_by; /* When we expect call termination by */
u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
- struct skcipher_request *cipher_req; /* Packet cipher request buffer */
struct timer_list timer; /* Combined event timer */
struct work_struct processor; /* Event processor */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -587,14 +593,12 @@ struct rxrpc_call {
struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
struct list_head sock_link; /* Link in rx->sock_calls */
struct rb_node sock_node; /* Node in rx->calls */
- struct sk_buff *tx_pending; /* Tx socket buffer being filled */
+ struct rxrpc_txbuf *tx_pending; /* Tx buffer being filled */
wait_queue_head_t waitq; /* Wait queue for channel or Tx */
s64 tx_total_len; /* Total length left to be transmitted (or -1) */
- __be32 crypto_buf[2]; /* Temporary packet crypto buffer */
unsigned long user_call_ID; /* user-defined call ID */
unsigned long flags;
unsigned long events;
- spinlock_t lock;
spinlock_t notify_lock; /* Kernel notification lock */
rwlock_t state_lock; /* lock for state transition */
u32 abort_code; /* Local/remote abort code */
@@ -610,37 +614,27 @@ struct rxrpc_call {
int debug_id; /* debug ID for printks */
unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
unsigned short rx_pkt_len; /* Current recvmsg packet len */
- bool rx_pkt_last; /* Current recvmsg packet is last */
-
- /* Rx/Tx circular buffer, depending on phase.
- *
- * In the Rx phase, packets are annotated with 0 or the number of the
- * segment of a jumbo packet each buffer refers to. There can be up to
- * 47 segments in a maximum-size UDP packet.
- *
- * In the Tx phase, packets are annotated with which buffers have been
- * acked.
- */
-#define RXRPC_RXTX_BUFF_SIZE 64
-#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1)
-#define RXRPC_INIT_RX_WINDOW_SIZE 63
- struct sk_buff **rxtx_buffer;
- u8 *rxtx_annotations;
-#define RXRPC_TX_ANNO_ACK 0
-#define RXRPC_TX_ANNO_UNACK 1
-#define RXRPC_TX_ANNO_NAK 2
-#define RXRPC_TX_ANNO_RETRANS 3
-#define RXRPC_TX_ANNO_MASK 0x03
-#define RXRPC_TX_ANNO_LAST 0x04
-#define RXRPC_TX_ANNO_RESENT 0x08
-
-#define RXRPC_RX_ANNO_SUBPACKET 0x3f /* Subpacket number in jumbogram */
-#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */
- rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but
- * not hard-ACK'd packet follows this.
- */
+
+ /* Transmitted data tracking. */
+ spinlock_t tx_lock; /* Transmit queue lock */
+ struct list_head tx_buffer; /* Buffer of transmissible packets */
+ rxrpc_seq_t tx_bottom; /* First packet in buffer */
+ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
u16 tx_backoff; /* Delay to insert due to Tx failure */
+ u8 tx_winsize; /* Maximum size of Tx window */
+#define RXRPC_TX_MAX_WINDOW 128
+ ktime_t tx_last_sent; /* Last time a transmission occurred */
+
+ /* Received data tracking */
+ struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */
+ struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */
+
+ rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */
+ rxrpc_seq_t rx_consumed; /* Highest packet consumed */
+ rxrpc_serial_t rx_serial; /* Highest serial received for this call */
+ u8 rx_winsize; /* Size of Rx window */
+ spinlock_t input_lock; /* Lock for packet input to this call */
/* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
* is fixed, we keep these numbers in terms of segments (ie. DATA
@@ -655,25 +649,17 @@ struct rxrpc_call {
u8 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
- rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not
- * consumed packet follows this.
- */
- rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */
- rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */
- rxrpc_serial_t rx_serial; /* Highest serial received for this call */
- u8 rx_winsize; /* Size of Rx window */
- u8 tx_winsize; /* Maximum size of Tx window */
- bool tx_phase; /* T if transmission phase, F if receive phase */
- u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */
-
- spinlock_t input_lock; /* Lock for packet input to this call */
-
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */
+ atomic64_t ackr_window; /* Base (in LSW) and top (in MSW) of SACK window */
atomic_t ackr_nr_unacked; /* Number of unacked packets */
atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
+ struct {
+#define RXRPC_SACK_SIZE 256
+ /* SACK table for soft-acked packets */
+ u8 ackr_sack_table[RXRPC_SACK_SIZE];
+ } __aligned(8);
/* RTT management */
rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */
@@ -687,21 +673,24 @@ struct rxrpc_call {
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
rxrpc_seq_t acks_first_seq; /* first sequence number received */
rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
+ rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
+ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */
+ struct sk_buff *acks_soft_tbl; /* The last ACK packet with NAKs in it */
+ spinlock_t acks_ack_lock; /* Access to ->acks_last_ack */
};
/*
* Summary of a new ACK and the changes it made to the Tx buffer packet states.
*/
struct rxrpc_ack_summary {
+ u16 nr_acks; /* Number of ACKs in packet */
+ u16 nr_new_acks; /* Number of new ACKs in packet */
+ u16 nr_rot_new_acks; /* Number of rotated new ACKs */
u8 ack_reason;
- u8 nr_acks; /* Number of ACKs in packet */
- u8 nr_nacks; /* Number of NACKs in packet */
- u8 nr_new_acks; /* Number of new ACKs in packet */
- u8 nr_new_nacks; /* Number of new NACKs in packet */
- u8 nr_rot_new_acks; /* Number of rotated new ACKs */
+ bool saw_nacks; /* Saw NACKs in packet */
bool new_low_nack; /* T if new low NACK found */
bool retrans_timeo; /* T if reTx due to timeout happened */
u8 flight_size; /* Number of unreceived transmissions */
@@ -744,12 +733,58 @@ struct rxrpc_send_params {
bool upgrade; /* If the connection is upgradeable */
};
+/*
+ * Buffer of data to be output as a packet.
+ */
+struct rxrpc_txbuf {
+ struct rcu_head rcu;
+ struct list_head call_link; /* Link in call->tx_queue */
+ struct list_head tx_link; /* Link in live Enc queue or Tx queue */
+ struct rxrpc_call *call; /* Call to which belongs */
+ ktime_t last_sent; /* Time at which last transmitted */
+ refcount_t ref;
+ rxrpc_seq_t seq; /* Sequence number of this packet */
+ unsigned int call_debug_id;
+ unsigned int debug_id;
+ unsigned int len; /* Amount of data in buffer */
+ unsigned int space; /* Remaining data space */
+ unsigned int offset; /* Offset of fill point */
+ unsigned long flags;
+#define RXRPC_TXBUF_LAST 0 /* Set if last packet in Tx phase */
+#define RXRPC_TXBUF_RESENT 1 /* Set if has been resent */
+ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */
+ struct {
+ /* The packet for encrypting and DMA'ing. We align it such
+ * that data[] aligns correctly for any crypto blocksize.
+ */
+ u8 pad[64 - sizeof(struct rxrpc_wire_header)];
+ struct rxrpc_wire_header wire; /* Network-ready header */
+ union {
+ u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */
+ struct {
+ struct rxrpc_ackpacket ack;
+ u8 acks[0];
+ };
+ };
+ } __aligned(64);
+};
+
+static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb)
+{
+ return txb->wire.flags & RXRPC_CLIENT_INITIATED;
+}
+
+static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
+{
+ return !rxrpc_sending_to_server(txb);
+}
+
#include <trace/events/rxrpc.h>
/*
* af_rxrpc.c
*/
-extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
+extern atomic_t rxrpc_n_rx_skbs;
extern struct workqueue_struct *rxrpc_workqueue;
/*
@@ -766,8 +801,12 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
/*
* call_event.c
*/
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
- enum rxrpc_propose_ack_trace);
+void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
+ enum rxrpc_propose_ack_trace why);
+void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ack_trace);
+void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
+ enum rxrpc_propose_ack_trace);
+void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
void rxrpc_process_call(struct work_struct *);
void rxrpc_reduce_call_timer(struct rxrpc_call *call,
@@ -949,15 +988,12 @@ static inline bool __rxrpc_use_local(struct rxrpc_local *local)
* misc.c
*/
extern unsigned int rxrpc_max_backlog __read_mostly;
-extern unsigned long rxrpc_requested_ack_delay;
extern unsigned long rxrpc_soft_ack_delay;
extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
-extern const s8 rxrpc_ack_priority[];
-
/*
* net_ns.c
*/
@@ -972,16 +1008,15 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *);
+void rxrpc_transmit_ack_packets(struct rxrpc_local *);
int rxrpc_send_abort_packet(struct rxrpc_call *);
-int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
+int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
void rxrpc_reject_packets(struct rxrpc_local *);
void rxrpc_send_keepalive(struct rxrpc_peer *);
/*
* peer_event.c
*/
-void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
void rxrpc_error_report(struct sock *);
void rxrpc_peer_keepalive_worker(struct work_struct *);
@@ -1092,6 +1127,15 @@ void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
void rxrpc_purge_queue(struct sk_buff_head *);
/*
+ * stats.c
+ */
+int rxrpc_stats_show(struct seq_file *seq, void *v);
+int rxrpc_stats_clear(struct file *file, char *buf, size_t size);
+
+#define rxrpc_inc_stat(rxnet, s) atomic_inc(&(rxnet)->s)
+#define rxrpc_dec_stat(rxnet, s) atomic_dec(&(rxnet)->s)
+
+/*
* sysctl.c
*/
#ifdef CONFIG_SYSCTL
@@ -1103,6 +1147,16 @@ static inline void rxrpc_sysctl_exit(void) {}
#endif
/*
+ * txbuf.c
+ */
+extern atomic_t rxrpc_nr_txbuf;
+struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
+ gfp_t gfp);
+void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
+void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
+void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
+
+/*
* utils.c
*/
int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 99e10eea3732..48790ee77019 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -248,9 +248,8 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
if (call->peer->rtt_count < 3 ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
- true, true,
- rxrpc_propose_ack_ping_for_params);
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
+ rxrpc_propose_ack_ping_for_params);
}
/*
@@ -325,7 +324,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
call->security = conn->security;
call->security_ix = conn->security_ix;
call->peer = rxrpc_get_peer(conn->params.peer);
- call->cong_cwnd = call->peer->cong_cwnd;
+ call->cong_ssthresh = call->peer->cong_ssthresh;
+ call->tx_last_sent = ktime_get_real();
return call;
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 2a93e7b5fbd0..1e21a708390e 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -20,127 +20,103 @@
/*
* Propose a PING ACK be sent.
*/
-static void rxrpc_propose_ping(struct rxrpc_call *call,
- bool immediate, bool background)
+void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
+ enum rxrpc_propose_ack_trace why)
{
- if (immediate) {
- if (background &&
- !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
- rxrpc_queue_call(call);
- } else {
- unsigned long now = jiffies;
- unsigned long ping_at = now + rxrpc_idle_ack_delay;
-
- if (time_before(ping_at, call->ping_at)) {
- WRITE_ONCE(call->ping_at, ping_at);
- rxrpc_reduce_call_timer(call, ping_at, now,
- rxrpc_timer_set_for_ping);
- }
+ unsigned long now = jiffies;
+ unsigned long ping_at = now + rxrpc_idle_ack_delay;
+
+ if (time_before(ping_at, call->ping_at)) {
+ WRITE_ONCE(call->ping_at, ping_at);
+ rxrpc_reduce_call_timer(call, ping_at, now,
+ rxrpc_timer_set_for_ping);
+ trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
}
}
/*
- * propose an ACK be sent
+ * Propose a DELAY ACK be sent in the future.
*/
-static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u32 serial, bool immediate, bool background,
- enum rxrpc_propose_ack_trace why)
+void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
+ enum rxrpc_propose_ack_trace why)
{
- enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
unsigned long expiry = rxrpc_soft_ack_delay;
- s8 prior = rxrpc_ack_priority[ack_reason];
-
- /* Pings are handled specially because we don't want to accidentally
- * lose a ping response by subsuming it into a ping.
- */
- if (ack_reason == RXRPC_ACK_PING) {
- rxrpc_propose_ping(call, immediate, background);
- goto trace;
+ unsigned long now = jiffies, ack_at;
+
+ call->ackr_serial = serial;
+
+ if (rxrpc_soft_ack_delay < expiry)
+ expiry = rxrpc_soft_ack_delay;
+ if (call->peer->srtt_us != 0)
+ ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
+ else
+ ack_at = expiry;
+
+ ack_at += READ_ONCE(call->tx_backoff);
+ ack_at += now;
+ if (time_before(ack_at, call->delay_ack_at)) {
+ WRITE_ONCE(call->delay_ack_at, ack_at);
+ rxrpc_reduce_call_timer(call, ack_at, now,
+ rxrpc_timer_set_for_ack);
}
- /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
- * numbers, but we don't alter the timeout.
- */
- _debug("prior %u %u vs %u %u",
- ack_reason, prior,
- call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]);
- if (ack_reason == call->ackr_reason) {
- if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) {
- outcome = rxrpc_propose_ack_update;
- call->ackr_serial = serial;
- }
- if (!immediate)
- goto trace;
- } else if (prior > rxrpc_ack_priority[call->ackr_reason]) {
- call->ackr_reason = ack_reason;
- call->ackr_serial = serial;
- } else {
- outcome = rxrpc_propose_ack_subsume;
+ trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial);
+}
+
+/*
+ * Queue an ACK for immediate transmission.
+ */
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
+{
+ struct rxrpc_local *local = call->conn->params.local;
+ struct rxrpc_txbuf *txb;
+
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ return;
+ if (ack_reason == RXRPC_ACK_DELAY &&
+ test_and_set_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags)) {
+ trace_rxrpc_drop_ack(call, why, ack_reason, serial, false);
+ return;
}
- switch (ack_reason) {
- case RXRPC_ACK_REQUESTED:
- if (rxrpc_requested_ack_delay < expiry)
- expiry = rxrpc_requested_ack_delay;
- if (serial == 1)
- immediate = false;
- break;
-
- case RXRPC_ACK_DELAY:
- if (rxrpc_soft_ack_delay < expiry)
- expiry = rxrpc_soft_ack_delay;
- break;
-
- case RXRPC_ACK_IDLE:
- if (rxrpc_idle_ack_delay < expiry)
- expiry = rxrpc_idle_ack_delay;
- break;
-
- default:
- immediate = true;
- break;
+ rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
+
+ txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK,
+ in_softirq() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
+ if (!txb) {
+ kleave(" = -ENOMEM");
+ return;
}
- if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
- _debug("already scheduled");
- } else if (immediate || expiry == 0) {
- _debug("immediate ACK %lx", call->events);
- if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) &&
- background)
- rxrpc_queue_call(call);
- } else {
- unsigned long now = jiffies, ack_at;
-
- if (call->peer->srtt_us != 0)
- ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
- else
- ack_at = expiry;
-
- ack_at += READ_ONCE(call->tx_backoff);
- ack_at += now;
- if (time_before(ack_at, call->ack_at)) {
- WRITE_ONCE(call->ack_at, ack_at);
- rxrpc_reduce_call_timer(call, ack_at, now,
- rxrpc_timer_set_for_ack);
- }
+ txb->ack_why = why;
+ txb->wire.seq = 0;
+ txb->wire.type = RXRPC_PACKET_TYPE_ACK;
+ txb->wire.flags |= RXRPC_SLOW_START_OK;
+ txb->ack.bufferSpace = 0;
+ txb->ack.maxSkew = 0;
+ txb->ack.firstPacket = 0;
+ txb->ack.previousPacket = 0;
+ txb->ack.serial = htonl(serial);
+ txb->ack.reason = ack_reason;
+ txb->ack.nAcks = 0;
+
+ if (!rxrpc_try_get_call(call, rxrpc_call_got)) {
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_nomem);
+ return;
}
-trace:
- trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate,
- background, outcome);
-}
+ spin_lock_bh(&local->ack_tx_lock);
+ list_add_tail(&txb->tx_link, &local->ack_tx_queue);
+ spin_unlock_bh(&local->ack_tx_lock);
+ trace_rxrpc_send_ack(call, why, ack_reason, serial);
-/*
- * propose an ACK be sent, locking the call structure
- */
-void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u32 serial, bool immediate, bool background,
- enum rxrpc_propose_ack_trace why)
-{
- spin_lock_bh(&call->lock);
- __rxrpc_propose_ACK(call, ack_reason, serial,
- immediate, background, why);
- spin_unlock_bh(&call->lock);
+ if (in_task()) {
+ rxrpc_transmit_ack_packets(call->peer->local);
+ } else {
+ rxrpc_get_local(local);
+ rxrpc_queue_local(local);
+ }
}
/*
@@ -156,62 +132,131 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
*/
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
- struct sk_buff *skb;
+ struct rxrpc_ackpacket *ack = NULL;
+ struct rxrpc_txbuf *txb;
+ struct sk_buff *ack_skb = NULL;
unsigned long resend_at;
- rxrpc_seq_t cursor, seq, top;
+ rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
ktime_t now, max_age, oldest, ack_ts;
- int ix;
- u8 annotation, anno_type, retrans = 0, unacked = 0;
+ bool unacked = false;
+ unsigned int i;
+ LIST_HEAD(retrans_queue);
- _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
+ _enter("{%d,%d}", call->acks_hard_ack, call->tx_top);
now = ktime_get_real();
max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
+ oldest = now;
+
+ /* See if there's an ACK saved with a soft-ACK table in it. */
+ if (call->acks_soft_tbl) {
+ spin_lock_bh(&call->acks_ack_lock);
+ ack_skb = call->acks_soft_tbl;
+ if (ack_skb) {
+ rxrpc_get_skb(ack_skb, rxrpc_skb_ack);
+ ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
+ }
+ spin_unlock_bh(&call->acks_ack_lock);
+ }
- spin_lock_bh(&call->lock);
+ if (list_empty(&call->tx_buffer))
+ goto no_resend;
- cursor = call->tx_hard_ack;
- top = call->tx_top;
- ASSERT(before_eq(cursor, top));
- if (cursor == top)
- goto out_unlock;
+ spin_lock(&call->tx_lock);
- /* Scan the packet list without dropping the lock and decide which of
- * the packets in the Tx buffer we're going to resend and what the new
- * resend timeout will be.
- */
- trace_rxrpc_resend(call, (cursor + 1) & RXRPC_RXTX_BUFF_MASK);
- oldest = now;
- for (seq = cursor + 1; before_eq(seq, top); seq++) {
- ix = seq & RXRPC_RXTX_BUFF_MASK;
- annotation = call->rxtx_annotations[ix];
- anno_type = annotation & RXRPC_TX_ANNO_MASK;
- annotation &= ~RXRPC_TX_ANNO_MASK;
- if (anno_type == RXRPC_TX_ANNO_ACK)
- continue;
+ if (list_empty(&call->tx_buffer))
+ goto no_further_resend;
+
+ trace_rxrpc_resend(call);
+ txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
- skb = call->rxtx_buffer[ix];
- rxrpc_see_skb(skb, rxrpc_skb_seen);
+ /* Scan the soft ACK table without dropping the lock and resend any
+ * explicitly NAK'd packets.
+ */
+ if (ack) {
+ for (i = 0; i < ack->nAcks; i++) {
+ rxrpc_seq_t seq;
- if (anno_type == RXRPC_TX_ANNO_UNACK) {
- if (ktime_after(skb->tstamp, max_age)) {
- if (ktime_before(skb->tstamp, oldest))
- oldest = skb->tstamp;
+ if (ack->acks[i] & 1)
continue;
+ seq = ntohl(ack->firstPacket) + i;
+ if (after(txb->seq, transmitted))
+ break;
+ if (after(txb->seq, seq))
+ continue; /* A new hard ACK probably came in */
+ list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
+ if (txb->seq == seq)
+ goto found_txb;
+ }
+ goto no_further_resend;
+
+ found_txb:
+ if (after(ntohl(txb->wire.serial), call->acks_highest_serial))
+ continue; /* Ack point not yet reached */
+
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
+
+ if (list_empty(&txb->tx_link)) {
+ rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans);
+ rxrpc_get_call(call, rxrpc_call_got_tx);
+ list_add_tail(&txb->tx_link, &retrans_queue);
+ set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
}
- if (!(annotation & RXRPC_TX_ANNO_RESENT))
- unacked++;
+
+ trace_rxrpc_retransmit(call, txb->seq,
+ ktime_to_ns(ktime_sub(txb->last_sent,
+ max_age)));
+
+ if (list_is_last(&txb->call_link, &call->tx_buffer))
+ goto no_further_resend;
+ txb = list_next_entry(txb, call_link);
+ }
+ }
+
+ /* Fast-forward through the Tx queue to the point the peer says it has
+ * seen. Anything between the soft-ACK table and that point will get
+ * ACK'd or NACK'd in due course, so don't worry about it here; here we
+ * need to consider retransmitting anything beyond that point.
+ *
+ * Note that ACK for a packet can beat the update of tx_transmitted.
+ */
+ if (after_eq(READ_ONCE(call->acks_prev_seq), READ_ONCE(call->tx_transmitted)))
+ goto no_further_resend;
+
+ list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
+ if (before_eq(txb->seq, READ_ONCE(call->acks_prev_seq)))
+ continue;
+ if (after(txb->seq, READ_ONCE(call->tx_transmitted)))
+ break; /* Not transmitted yet */
+
+ if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE &&
+ before(ntohl(txb->wire.serial), ntohl(ack->serial)))
+ goto do_resend; /* Wasn't accounted for by a more recent ping. */
+
+ if (ktime_after(txb->last_sent, max_age)) {
+ if (ktime_before(txb->last_sent, oldest))
+ oldest = txb->last_sent;
+ continue;
}
- /* Okay, we need to retransmit a packet. */
- call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation;
- retrans++;
- trace_rxrpc_retransmit(call, seq, annotation | anno_type,
- ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
+ do_resend:
+ unacked = true;
+ if (list_empty(&txb->tx_link)) {
+ rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans);
+ list_add_tail(&txb->tx_link, &retrans_queue);
+ set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
+ }
}
+no_further_resend:
+ spin_unlock(&call->tx_lock);
+no_resend:
+ rxrpc_free_skb(ack_skb, rxrpc_skb_freed);
+
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
+ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer,
+ !list_empty(&retrans_queue));
WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
@@ -221,62 +266,28 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
* that an ACK got lost somewhere. Send a ping to find out instead of
* retransmitting data.
*/
- if (!retrans) {
+ if (list_empty(&retrans_queue)) {
rxrpc_reduce_call_timer(call, resend_at, now_j,
rxrpc_timer_set_for_resend);
- spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
goto out;
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
- rxrpc_propose_ack_ping_for_lost_ack);
- rxrpc_send_ack_packet(call, true, NULL);
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_lost_ack);
goto out;
}
- /* Now go through the Tx window and perform the retransmissions. We
- * have to drop the lock for each send. If an ACK comes in whilst the
- * lock is dropped, it may clear some of the retransmission markers for
- * packets that it soft-ACKs.
- */
- for (seq = cursor + 1; before_eq(seq, top); seq++) {
- ix = seq & RXRPC_RXTX_BUFF_MASK;
- annotation = call->rxtx_annotations[ix];
- anno_type = annotation & RXRPC_TX_ANNO_MASK;
- if (anno_type != RXRPC_TX_ANNO_RETRANS)
- continue;
-
- /* We need to reset the retransmission state, but we need to do
- * so before we drop the lock as a new ACK/NAK may come in and
- * confuse things
- */
- annotation &= ~RXRPC_TX_ANNO_MASK;
- annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT;
- call->rxtx_annotations[ix] = annotation;
-
- skb = call->rxtx_buffer[ix];
- if (!skb)
- continue;
-
- rxrpc_get_skb(skb, rxrpc_skb_got);
- spin_unlock_bh(&call->lock);
-
- if (rxrpc_send_data_packet(call, skb, true) < 0) {
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- return;
- }
+ while ((txb = list_first_entry_or_null(&retrans_queue,
+ struct rxrpc_txbuf, tx_link))) {
+ list_del_init(&txb->tx_link);
+ rxrpc_send_data_packet(call, txb);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans);
- if (rxrpc_is_client_call(call))
- rxrpc_expose_client_call(call);
-
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- spin_lock_bh(&call->lock);
- if (after(call->tx_hard_ack, seq))
- seq = call->tx_hard_ack;
+ trace_rxrpc_retransmit(call, txb->seq,
+ ktime_to_ns(ktime_sub(txb->last_sent,
+ max_age)));
}
-out_unlock:
- spin_unlock_bh(&call->lock);
out:
_leave("");
}
@@ -288,9 +299,9 @@ void rxrpc_process_call(struct work_struct *work)
{
struct rxrpc_call *call =
container_of(work, struct rxrpc_call, processor);
- rxrpc_serial_t *send_ack;
unsigned long now, next, t;
unsigned int iterations = 0;
+ rxrpc_serial_t ackr_serial;
rxrpc_see_call(call);
@@ -309,6 +320,9 @@ recheck_state:
goto recheck_state;
}
+ if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom)
+ rxrpc_shrink_call_tx_buffer(call);
+
if (call->state == RXRPC_CALL_COMPLETE) {
rxrpc_delete_call_timer(call);
goto out_put;
@@ -335,11 +349,13 @@ recheck_state:
set_bit(RXRPC_CALL_EV_EXPIRED, &call->events);
}
- t = READ_ONCE(call->ack_at);
+ t = READ_ONCE(call->delay_ack_at);
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
- cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET);
- set_bit(RXRPC_CALL_EV_ACK, &call->events);
+ cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
+ ackr_serial = xchg(&call->ackr_serial, 0);
+ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
+ rxrpc_propose_ack_ping_for_lost_ack);
}
t = READ_ONCE(call->ack_lost_at);
@@ -353,16 +369,16 @@ recheck_state:
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, true,
- rxrpc_propose_ack_ping_for_keepalive);
- set_bit(RXRPC_CALL_EV_PING, &call->events);
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_keepalive);
}
t = READ_ONCE(call->ping_at);
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
- set_bit(RXRPC_CALL_EV_PING, &call->events);
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_keepalive);
}
t = READ_ONCE(call->resend_at);
@@ -385,25 +401,10 @@ recheck_state:
goto recheck_state;
}
- send_ack = NULL;
if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) {
call->acks_lost_top = call->tx_top;
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
- rxrpc_propose_ack_ping_for_lost_ack);
- send_ack = &call->acks_lost_ping;
- }
-
- if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
- send_ack) {
- if (call->ackr_reason) {
- rxrpc_send_ack_packet(call, false, send_ack);
- goto recheck_state;
- }
- }
-
- if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) {
- rxrpc_send_ack_packet(call, true, NULL);
- goto recheck_state;
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_lost_ack);
}
if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
@@ -419,7 +420,7 @@ recheck_state:
set(call->expect_req_by);
set(call->expect_term_by);
- set(call->ack_at);
+ set(call->delay_ack_at);
set(call->ack_lost_at);
set(call->resend_at);
set(call->keepalive_at);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 6401cdf7a624..1befe22cd301 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -52,7 +52,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
if (call->state < RXRPC_CALL_COMPLETE) {
- trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
+ trace_rxrpc_timer_expired(call, jiffies);
__rxrpc_queue_call(call);
} else {
rxrpc_put_call(call, rxrpc_call_put);
@@ -129,16 +129,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
if (!call)
return NULL;
- call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE,
- sizeof(struct sk_buff *),
- gfp);
- if (!call->rxtx_buffer)
- goto nomem;
-
- call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp);
- if (!call->rxtx_annotations)
- goto nomem_2;
-
mutex_init(&call->user_mutex);
/* Prevent lockdep reporting a deadlock false positive between the afs
@@ -155,37 +145,39 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
INIT_LIST_HEAD(&call->accept_link);
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
+ INIT_LIST_HEAD(&call->tx_buffer);
+ skb_queue_head_init(&call->recvmsg_queue);
+ skb_queue_head_init(&call->rx_oos_queue);
init_waitqueue_head(&call->waitq);
- spin_lock_init(&call->lock);
spin_lock_init(&call->notify_lock);
+ spin_lock_init(&call->tx_lock);
spin_lock_init(&call->input_lock);
+ spin_lock_init(&call->acks_ack_lock);
rwlock_init(&call->state_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
call->next_req_timo = 1 * HZ;
+ atomic64_set(&call->ackr_window, 0x100000001ULL);
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
- /* Leave space in the ring to handle a maxed-out jumbo packet */
call->rx_winsize = rxrpc_rx_window_size;
call->tx_winsize = 16;
- call->rx_expect_next = 1;
- call->cong_cwnd = 2;
- call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
+ if (RXRPC_TX_SMSS > 2190)
+ call->cong_cwnd = 2;
+ else if (RXRPC_TX_SMSS > 1095)
+ call->cong_cwnd = 3;
+ else
+ call->cong_cwnd = 4;
+ call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
call->rxnet = rxnet;
call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK;
atomic_inc(&rxnet->nr_calls);
return call;
-
-nomem_2:
- kfree(call->rxtx_buffer);
-nomem:
- kmem_cache_free(rxrpc_call_jar, call);
- return NULL;
}
/*
@@ -206,7 +198,6 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
return ERR_PTR(-ENOMEM);
call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
call->service_id = srx->srx_service;
- call->tx_phase = true;
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;
@@ -223,7 +214,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
unsigned long now = jiffies;
unsigned long j = now + MAX_JIFFY_OFFSET;
- call->ack_at = j;
+ call->delay_ack_at = j;
call->ack_lost_at = j;
call->resend_at = j;
call->ping_at = j;
@@ -510,16 +501,12 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
}
/*
- * Clean up the RxTx skb ring.
+ * Clean up the Rx skb ring.
*/
static void rxrpc_cleanup_ring(struct rxrpc_call *call)
{
- int i;
-
- for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
- rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
- call->rxtx_buffer[i] = NULL;
- }
+ skb_queue_purge(&call->recvmsg_queue);
+ skb_queue_purge(&call->rx_oos_queue);
}
/*
@@ -539,10 +526,8 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
- spin_lock_bh(&call->lock);
if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
- spin_unlock_bh(&call->lock);
rxrpc_put_call_slot(call);
rxrpc_delete_call_timer(call);
@@ -656,8 +641,6 @@ static void rxrpc_destroy_call(struct work_struct *work)
rxrpc_put_connection(call->conn);
rxrpc_put_peer(call->peer);
- kfree(call->rxtx_buffer);
- kfree(call->rxtx_annotations);
kmem_cache_free(rxrpc_call_jar, call);
if (atomic_dec_and_test(&rxnet->nr_calls))
wake_up_var(&rxnet->nr_calls);
@@ -684,6 +667,8 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
*/
void rxrpc_cleanup_call(struct rxrpc_call *call)
{
+ struct rxrpc_txbuf *txb;
+
_net("DESTROY CALL %d", call->debug_id);
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
@@ -692,7 +677,13 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
rxrpc_cleanup_ring(call);
- rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
+ }
+ rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
+ rxrpc_free_skb(call->acks_soft_tbl, rxrpc_skb_cleaned);
call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3c9eeb5b750c..f020f308ed9e 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -363,7 +363,8 @@ static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
if (!cp->peer)
goto error;
- call->cong_cwnd = cp->peer->cong_cwnd;
+ call->tx_last_sent = ktime_get_real();
+ call->cong_ssthresh = cp->peer->cong_ssthresh;
if (call->cong_cwnd >= call->cong_ssthresh)
call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
else
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 22089e37e97f..156bd26daf74 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -175,7 +175,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
trace_rxrpc_disconnect_call(call);
switch (call->completion) {
case RXRPC_CALL_SUCCEEDED:
- chan->last_seq = call->rx_hard_ack;
+ chan->last_seq = call->rx_highest_seq;
chan->last_type = RXRPC_PACKET_TYPE_ACK;
break;
case RXRPC_CALL_LOCALLY_ABORTED:
@@ -207,7 +207,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- call->peer->cong_cwnd = call->cong_cwnd;
+ call->peer->cong_ssthresh = call->cong_ssthresh;
if (!hlist_unhashed(&call->error_link)) {
spin_lock_bh(&call->peer->lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 721d847ba92b..bdf70b81addc 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -7,20 +7,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/errqueue.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <linux/gfp.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
-#include <net/ip.h>
-#include <net/udp.h>
-#include <net/net_namespace.h>
#include "ar-internal.h"
static void rxrpc_proto_abort(const char *why,
@@ -46,7 +32,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
bool resend = false;
summary->flight_size =
- (call->tx_top - call->tx_hard_ack) - summary->nr_acks;
+ (call->tx_top - call->acks_hard_ack) - summary->nr_acks;
if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
summary->retrans_timeo = true;
@@ -72,9 +58,28 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
summary->cumulative_acks = cumulative_acks;
summary->dup_acks = call->cong_dup_acks;
+ /* If we haven't transmitted anything for >1RTT, we should reset the
+ * congestion management state.
+ */
+ if ((call->cong_mode == RXRPC_CALL_SLOW_START ||
+ call->cong_mode == RXRPC_CALL_CONGEST_AVOIDANCE) &&
+ ktime_before(ktime_add_us(call->tx_last_sent,
+ call->peer->srtt_us >> 3),
+ ktime_get_real())
+ ) {
+ change = rxrpc_cong_idle_reset;
+ summary->mode = RXRPC_CALL_SLOW_START;
+ if (RXRPC_TX_SMSS > 2190)
+ summary->cwnd = 2;
+ else if (RXRPC_TX_SMSS > 1095)
+ summary->cwnd = 3;
+ else
+ summary->cwnd = 4;
+ }
+
switch (call->cong_mode) {
case RXRPC_CALL_SLOW_START:
- if (summary->nr_nacks > 0)
+ if (summary->saw_nacks)
goto packet_loss_detected;
if (summary->cumulative_acks > 0)
cwnd += 1;
@@ -85,7 +90,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
goto out;
case RXRPC_CALL_CONGEST_AVOIDANCE:
- if (summary->nr_nacks > 0)
+ if (summary->saw_nacks)
goto packet_loss_detected;
/* We analyse the number of packets that get ACK'd per RTT
@@ -104,7 +109,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
goto out;
case RXRPC_CALL_PACKET_LOSS:
- if (summary->nr_nacks == 0)
+ if (!summary->saw_nacks)
goto resume_normality;
if (summary->new_low_nack) {
@@ -142,7 +147,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
} else {
change = rxrpc_cong_progress;
cwnd = call->cong_ssthresh;
- if (summary->nr_nacks == 0)
+ if (!summary->saw_nacks)
goto resume_normality;
}
goto out;
@@ -164,8 +169,8 @@ resume_normality:
out:
cumulative_acks = 0;
out_no_clear_ca:
- if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1)
- cwnd = RXRPC_RXTX_BUFF_SIZE - 1;
+ if (cwnd >= RXRPC_TX_MAX_WINDOW)
+ cwnd = RXRPC_TX_MAX_WINDOW;
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
trace_rxrpc_congest(call, summary, acked_serial, change);
@@ -183,9 +188,8 @@ send_extra_data:
/* Send some previously unsent DATA if we have some to advance the ACK
* state.
*/
- if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
- RXRPC_TX_ANNO_LAST ||
- summary->nr_acks != call->tx_top - call->tx_hard_ack) {
+ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ||
+ summary->nr_acks != call->tx_top - call->acks_hard_ack) {
call->cong_extra++;
wake_up(&call->waitq);
}
@@ -198,53 +202,39 @@ send_extra_data:
static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
struct rxrpc_ack_summary *summary)
{
- struct sk_buff *skb, *list = NULL;
+ struct rxrpc_txbuf *txb;
bool rot_last = false;
- int ix;
- u8 annotation;
-
- if (call->acks_lowest_nak == call->tx_hard_ack) {
- call->acks_lowest_nak = to;
- } else if (before_eq(call->acks_lowest_nak, to)) {
- summary->new_low_nack = true;
- call->acks_lowest_nak = to;
- }
-
- spin_lock(&call->lock);
-
- while (before(call->tx_hard_ack, to)) {
- call->tx_hard_ack++;
- ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
- skb = call->rxtx_buffer[ix];
- annotation = call->rxtx_annotations[ix];
- rxrpc_see_skb(skb, rxrpc_skb_rotated);
- call->rxtx_buffer[ix] = NULL;
- call->rxtx_annotations[ix] = 0;
- skb->next = list;
- list = skb;
- if (annotation & RXRPC_TX_ANNO_LAST) {
+ list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
+ if (before_eq(txb->seq, call->acks_hard_ack))
+ continue;
+ summary->nr_rot_new_acks++;
+ if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
}
- if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
- summary->nr_rot_new_acks++;
+ if (txb->seq == to)
+ break;
}
- spin_unlock(&call->lock);
+ if (rot_last)
+ set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags);
- trace_rxrpc_transmit(call, (rot_last ?
- rxrpc_transmit_rotate_last :
- rxrpc_transmit_rotate));
- wake_up(&call->waitq);
+ _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last);
- while (list) {
- skb = list;
- list = skb->next;
- skb_mark_not_on_list(skb);
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ if (call->acks_lowest_nak == call->acks_hard_ack) {
+ call->acks_lowest_nak = to;
+ } else if (after(to, call->acks_lowest_nak)) {
+ summary->new_low_nack = true;
+ call->acks_lowest_nak = to;
}
+ smp_store_release(&call->acks_hard_ack, to);
+
+ trace_rxrpc_txqueue(call, (rot_last ?
+ rxrpc_txqueue_rotate_last :
+ rxrpc_txqueue_rotate));
+ wake_up(&call->waitq);
return rot_last;
}
@@ -284,9 +274,9 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
write_unlock(&call->state_lock);
if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
- trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
else
- trace_rxrpc_transmit(call, rxrpc_transmit_end);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
_leave(" = ok");
return true;
@@ -307,13 +297,10 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
rxrpc_seq_t top = READ_ONCE(call->tx_top);
if (call->ackr_reason) {
- spin_lock_bh(&call->lock);
- call->ackr_reason = 0;
- spin_unlock_bh(&call->lock);
now = jiffies;
timo = now + MAX_JIFFY_OFFSET;
WRITE_ONCE(call->resend_at, timo);
- WRITE_ONCE(call->ack_at, timo);
+ WRITE_ONCE(call->delay_ack_at, timo);
trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
}
@@ -323,85 +310,230 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
return false;
}
}
- if (!rxrpc_end_tx_phase(call, true, "ETD"))
- return false;
- call->tx_phase = false;
- return true;
+ return rxrpc_end_tx_phase(call, true, "ETD");
+}
+
+static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
+ rxrpc_seq_t window, rxrpc_seq_t wtop)
+{
+ atomic64_set_release(&call->ackr_window, ((u64)wtop) << 32 | window);
}
/*
- * Scan a data packet to validate its structure and to work out how many
- * subpackets it contains.
- *
- * A jumbo packet is a collection of consecutive packets glued together with
- * little headers between that indicate how to change the initial header for
- * each subpacket.
- *
- * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but
- * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any
- * size.
+ * Push a DATA packet onto the Rx queue.
*/
-static bool rxrpc_validate_data(struct sk_buff *skb)
+static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
+ rxrpc_seq_t window, rxrpc_seq_t wtop,
+ enum rxrpc_receive_trace why)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int offset = sizeof(struct rxrpc_wire_header);
- unsigned int len = skb->len;
- u8 flags = sp->hdr.flags;
+ bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
- for (;;) {
- if (flags & RXRPC_REQUEST_ACK)
- __set_bit(sp->nr_subpackets, sp->rx_req_ack);
- sp->nr_subpackets++;
+ __skb_queue_tail(&call->recvmsg_queue, skb);
+ rxrpc_input_update_ack_window(call, window, wtop);
- if (!(flags & RXRPC_JUMBO_PACKET))
- break;
+ trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq);
+}
- if (len - offset < RXRPC_JUMBO_SUBPKTLEN)
- goto protocol_error;
- if (flags & RXRPC_LAST_PACKET)
- goto protocol_error;
- offset += RXRPC_JUMBO_DATALEN;
- if (skb_copy_bits(skb, offset, &flags, 1) < 0)
- goto protocol_error;
- offset += sizeof(struct rxrpc_jumbo_header);
+/*
+ * Process a DATA packet.
+ */
+static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct sk_buff *oos;
+ rxrpc_serial_t serial = sp->hdr.serial;
+ u64 win = atomic64_read(&call->ackr_window);
+ rxrpc_seq_t window = lower_32_bits(win);
+ rxrpc_seq_t wtop = upper_32_bits(win);
+ rxrpc_seq_t wlimit = window + call->rx_winsize - 1;
+ rxrpc_seq_t seq = sp->hdr.seq;
+ bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
+ int ack_reason = -1;
+
+ rxrpc_inc_stat(call->rxnet, stat_rx_data);
+ if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+ rxrpc_inc_stat(call->rxnet, stat_rx_data_reqack);
+ if (sp->hdr.flags & RXRPC_JUMBO_PACKET)
+ rxrpc_inc_stat(call->rxnet, stat_rx_data_jumbo);
+
+ if (last) {
+ if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+ seq + 1 != wtop) {
+ rxrpc_proto_abort("LSN", call, seq);
+ goto err_free;
+ }
+ } else {
+ if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+ after_eq(seq, wtop)) {
+ pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
+ call->debug_id, seq, window, wtop, wlimit);
+ rxrpc_proto_abort("LSA", call, seq);
+ goto err_free;
+ }
}
- if (flags & RXRPC_LAST_PACKET)
- sp->rx_flags |= RXRPC_SKB_INCL_LAST;
- return true;
+ if (after(seq, call->rx_highest_seq))
+ call->rx_highest_seq = seq;
-protocol_error:
- return false;
+ trace_rxrpc_rx_data(call->debug_id, seq, serial, sp->hdr.flags);
+
+ if (before(seq, window)) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
+ }
+ if (after(seq, wlimit)) {
+ ack_reason = RXRPC_ACK_EXCEEDS_WINDOW;
+ goto send_ack;
+ }
+
+ /* Queue the packet. */
+ if (seq == window) {
+ rxrpc_seq_t reset_from;
+ bool reset_sack = false;
+
+ if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+ ack_reason = RXRPC_ACK_REQUESTED;
+ /* Send an immediate ACK if we fill in a hole */
+ else if (!skb_queue_empty(&call->rx_oos_queue))
+ ack_reason = RXRPC_ACK_DELAY;
+
+ window++;
+ if (after(window, wtop))
+ wtop = window;
+
+ spin_lock(&call->recvmsg_queue.lock);
+ rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue);
+ skb = NULL;
+
+ while ((oos = skb_peek(&call->rx_oos_queue))) {
+ struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
+
+ if (after(osp->hdr.seq, window))
+ break;
+
+ __skb_unlink(oos, &call->rx_oos_queue);
+ last = osp->hdr.flags & RXRPC_LAST_PACKET;
+ seq = osp->hdr.seq;
+ if (!reset_sack) {
+ reset_from = seq;
+ reset_sack = true;
+ }
+
+ window++;
+ rxrpc_input_queue_data(call, oos, window, wtop,
+ rxrpc_receive_queue_oos);
+ }
+
+ spin_unlock(&call->recvmsg_queue.lock);
+
+ if (reset_sack) {
+ do {
+ call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0;
+ } while (reset_from++, before(reset_from, window));
+ }
+ } else {
+ bool keep = false;
+
+ ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE;
+
+ if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) {
+ call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1;
+ keep = 1;
+ }
+
+ if (after(seq + 1, wtop)) {
+ wtop = seq + 1;
+ rxrpc_input_update_ack_window(call, window, wtop);
+ }
+
+ if (!keep) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
+ }
+
+ skb_queue_walk(&call->rx_oos_queue, oos) {
+ struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
+
+ if (after(osp->hdr.seq, seq)) {
+ __skb_queue_before(&call->rx_oos_queue, oos, skb);
+ goto oos_queued;
+ }
+ }
+
+ __skb_queue_tail(&call->rx_oos_queue, skb);
+ oos_queued:
+ trace_rxrpc_receive(call, last ? rxrpc_receive_oos_last : rxrpc_receive_oos,
+ sp->hdr.serial, sp->hdr.seq);
+ skb = NULL;
+ }
+
+send_ack:
+ if (ack_reason < 0 &&
+ atomic_inc_return(&call->ackr_nr_unacked) > 2 &&
+ test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) {
+ ack_reason = RXRPC_ACK_IDLE;
+ } else if (ack_reason >= 0) {
+ set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags);
+ }
+
+ if (ack_reason >= 0)
+ rxrpc_send_ACK(call, ack_reason, serial,
+ rxrpc_propose_ack_input_data);
+ else
+ rxrpc_propose_delay_ACK(call, serial,
+ rxrpc_propose_ack_input_data);
+
+err_free:
+ rxrpc_free_skb(skb, rxrpc_skb_freed);
}
/*
- * Handle reception of a duplicate packet.
- *
- * We have to take care to avoid an attack here whereby we're given a series of
- * jumbograms, each with a sequence number one before the preceding one and
- * filled up to maximum UDP size. If they never send us the first packet in
- * the sequence, they can cause us to have to hold on to around 2MiB of kernel
- * space until the call times out.
- *
- * We limit the space usage by only accepting three duplicate jumbo packets per
- * call. After that, we tell the other side we're no longer accepting jumbos
- * (that information is encoded in the ACK packet).
+ * Split a jumbo packet and file the bits separately.
*/
-static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
- bool is_jumbo, bool *_jumbo_bad)
+static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb)
{
- /* Discard normal packets that are duplicates. */
- if (is_jumbo)
- return;
+ struct rxrpc_jumbo_header jhdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp;
+ struct sk_buff *jskb;
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
+ unsigned int len = skb->len - offset;
- /* Skip jumbo subpackets that are duplicates. When we've had three or
- * more partially duplicate jumbo packets, we refuse to take any more
- * jumbos for this call.
- */
- if (!*_jumbo_bad) {
- call->nr_jumbo_bad++;
- *_jumbo_bad = true;
+ while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
+ if (len < RXRPC_JUMBO_SUBPKTLEN)
+ goto protocol_error;
+ if (sp->hdr.flags & RXRPC_LAST_PACKET)
+ goto protocol_error;
+ if (skb_copy_bits(skb, offset + RXRPC_JUMBO_DATALEN,
+ &jhdr, sizeof(jhdr)) < 0)
+ goto protocol_error;
+
+ jskb = skb_clone(skb, GFP_ATOMIC);
+ if (!jskb) {
+ kdebug("couldn't clone");
+ return false;
+ }
+ rxrpc_new_skb(jskb, rxrpc_skb_cloned_jumbo);
+ jsp = rxrpc_skb(jskb);
+ jsp->offset = offset;
+ jsp->len = RXRPC_JUMBO_DATALEN;
+ rxrpc_input_data_one(call, jskb);
+
+ sp->hdr.flags = jhdr.flags;
+ sp->hdr._rsvd = ntohs(jhdr._rsvd);
+ sp->hdr.seq++;
+ sp->hdr.serial++;
+ offset += RXRPC_JUMBO_SUBPKTLEN;
+ len -= RXRPC_JUMBO_SUBPKTLEN;
}
+
+ sp->offset = offset;
+ sp->len = len;
+ rxrpc_input_data_one(call, skb);
+ return true;
+
+protocol_error:
+ return false;
}
/*
@@ -412,17 +544,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
enum rxrpc_call_state state;
- unsigned int j, nr_subpackets, nr_unacked = 0;
- rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial;
- rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack;
- bool immediate_ack = false, jumbo_bad = false;
- u8 ack = 0;
+ rxrpc_serial_t serial = sp->hdr.serial;
+ rxrpc_seq_t seq0 = sp->hdr.seq;
- _enter("{%u,%u},{%u,%u}",
- call->rx_hard_ack, call->rx_top, skb->len, seq0);
+ _enter("{%llx,%x},{%u,%x}",
+ atomic64_read(&call->ackr_window), call->rx_highest_seq,
+ skb->len, seq0);
- _proto("Rx DATA %%%u { #%u f=%02x n=%u }",
- sp->hdr.serial, seq0, sp->hdr.flags, sp->nr_subpackets);
+ _proto("Rx DATA %%%u { #%u f=%02x }",
+ sp->hdr.serial, seq0, sp->hdr.flags);
state = READ_ONCE(call->state);
if (state >= RXRPC_CALL_COMPLETE) {
@@ -430,6 +560,24 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
return;
}
+ /* Unshare the packet so that it can be modified for in-place
+ * decryption.
+ */
+ if (sp->hdr.securityIndex != 0) {
+ struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC);
+ if (!nskb) {
+ rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem);
+ return;
+ }
+
+ if (nskb != skb) {
+ rxrpc_eaten_skb(skb, rxrpc_skb_received);
+ skb = nskb;
+ rxrpc_new_skb(skb, rxrpc_skb_unshared);
+ sp = rxrpc_skb(skb);
+ }
+ }
+
if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
unsigned long timo = READ_ONCE(call->next_req_timo);
unsigned long now, expect_req_by;
@@ -451,166 +599,18 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
!rxrpc_receiving_reply(call))
- goto unlock;
-
- hard_ack = READ_ONCE(call->rx_hard_ack);
-
- nr_subpackets = sp->nr_subpackets;
- if (nr_subpackets > 1) {
- if (call->nr_jumbo_bad > 3) {
- ack = RXRPC_ACK_NOSPACE;
- ack_serial = serial;
- goto ack;
- }
- }
-
- for (j = 0; j < nr_subpackets; j++) {
- rxrpc_serial_t serial = sp->hdr.serial + j;
- rxrpc_seq_t seq = seq0 + j;
- unsigned int ix = seq & RXRPC_RXTX_BUFF_MASK;
- bool terminal = (j == nr_subpackets - 1);
- bool last = terminal && (sp->rx_flags & RXRPC_SKB_INCL_LAST);
- u8 flags, annotation = j;
-
- _proto("Rx DATA+%u %%%u { #%x t=%u l=%u }",
- j, serial, seq, terminal, last);
-
- if (last) {
- if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
- seq != call->rx_top) {
- rxrpc_proto_abort("LSN", call, seq);
- goto unlock;
- }
- } else {
- if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
- after_eq(seq, call->rx_top)) {
- rxrpc_proto_abort("LSA", call, seq);
- goto unlock;
- }
- }
-
- flags = 0;
- if (last)
- flags |= RXRPC_LAST_PACKET;
- if (!terminal)
- flags |= RXRPC_JUMBO_PACKET;
- if (test_bit(j, sp->rx_req_ack))
- flags |= RXRPC_REQUEST_ACK;
- trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation);
-
- if (before_eq(seq, hard_ack)) {
- ack = RXRPC_ACK_DUPLICATE;
- ack_serial = serial;
- continue;
- }
-
- if (call->rxtx_buffer[ix]) {
- rxrpc_input_dup_data(call, seq, nr_subpackets > 1,
- &jumbo_bad);
- if (ack != RXRPC_ACK_DUPLICATE) {
- ack = RXRPC_ACK_DUPLICATE;
- ack_serial = serial;
- }
- immediate_ack = true;
- continue;
- }
-
- if (after(seq, hard_ack + call->rx_winsize)) {
- ack = RXRPC_ACK_EXCEEDS_WINDOW;
- ack_serial = serial;
- if (flags & RXRPC_JUMBO_PACKET) {
- if (!jumbo_bad) {
- call->nr_jumbo_bad++;
- jumbo_bad = true;
- }
- }
-
- goto ack;
- }
-
- if (flags & RXRPC_REQUEST_ACK && !ack) {
- ack = RXRPC_ACK_REQUESTED;
- ack_serial = serial;
- }
-
- if (after(seq0, call->ackr_highest_seq))
- call->ackr_highest_seq = seq0;
-
- /* Queue the packet. We use a couple of memory barriers here as need
- * to make sure that rx_top is perceived to be set after the buffer
- * pointer and that the buffer pointer is set after the annotation and
- * the skb data.
- *
- * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
- * and also rxrpc_fill_out_ack().
- */
- if (!terminal)
- rxrpc_get_skb(skb, rxrpc_skb_got);
- call->rxtx_annotations[ix] = annotation;
- smp_wmb();
- call->rxtx_buffer[ix] = skb;
- if (after(seq, call->rx_top)) {
- smp_store_release(&call->rx_top, seq);
- } else if (before(seq, call->rx_top)) {
- /* Send an immediate ACK if we fill in a hole */
- if (!ack) {
- ack = RXRPC_ACK_DELAY;
- ack_serial = serial;
- }
- immediate_ack = true;
- }
-
- if (terminal) {
- /* From this point on, we're not allowed to touch the
- * packet any longer as its ref now belongs to the Rx
- * ring.
- */
- skb = NULL;
- sp = NULL;
- }
-
- nr_unacked++;
-
- if (last) {
- set_bit(RXRPC_CALL_RX_LAST, &call->flags);
- if (!ack) {
- ack = RXRPC_ACK_DELAY;
- ack_serial = serial;
- }
- trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
- } else {
- trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq);
- }
+ goto out;
- if (after_eq(seq, call->rx_expect_next)) {
- if (after(seq, call->rx_expect_next)) {
- _net("OOS %u > %u", seq, call->rx_expect_next);
- ack = RXRPC_ACK_OUT_OF_SEQUENCE;
- ack_serial = serial;
- }
- call->rx_expect_next = seq + 1;
- }
- if (!ack)
- ack_serial = serial;
+ if (!rxrpc_input_split_jumbo(call, skb)) {
+ rxrpc_proto_abort("VLD", call, sp->hdr.seq);
+ goto out;
}
+ skb = NULL;
-ack:
- if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack)
- ack = RXRPC_ACK_IDLE;
-
- if (ack)
- rxrpc_propose_ACK(call, ack, ack_serial,
- immediate_ack, true,
- rxrpc_propose_ack_input_data);
- else
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
- false, true,
- rxrpc_propose_ack_input_data);
-
+out:
trace_rxrpc_notify_socket(call->debug_id, serial);
rxrpc_notify_socket(call);
-unlock:
spin_unlock(&call->input_lock);
rxrpc_free_skb(skb, rxrpc_skb_freed);
_leave(" [queued]");
@@ -679,31 +679,8 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
*/
static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call)
{
- rxrpc_seq_t top, bottom, seq;
- bool resend = false;
-
- spin_lock_bh(&call->lock);
-
- bottom = call->tx_hard_ack + 1;
- top = call->acks_lost_top;
- if (before(bottom, top)) {
- for (seq = bottom; before_eq(seq, top); seq++) {
- int ix = seq & RXRPC_RXTX_BUFF_MASK;
- u8 annotation = call->rxtx_annotations[ix];
- u8 anno_type = annotation & RXRPC_TX_ANNO_MASK;
-
- if (anno_type != RXRPC_TX_ANNO_UNACK)
- continue;
- annotation &= ~RXRPC_TX_ANNO_MASK;
- annotation |= RXRPC_TX_ANNO_RETRANS;
- call->rxtx_annotations[ix] = annotation;
- resend = true;
- }
- }
-
- spin_unlock_bh(&call->lock);
-
- if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ if (after(call->acks_lost_top, call->acks_prev_seq) &&
+ !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
rxrpc_queue_call(call);
}
@@ -736,8 +713,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
rwind, ntohl(ackinfo->jumbo_max));
- if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
- rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+ if (rwind > RXRPC_TX_MAX_WINDOW)
+ rwind = RXRPC_TX_MAX_WINDOW;
if (call->tx_winsize != rwind) {
if (rwind > call->tx_winsize)
wake = true;
@@ -776,40 +753,19 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
rxrpc_seq_t seq, int nr_acks,
struct rxrpc_ack_summary *summary)
{
- int ix;
- u8 annotation, anno_type;
-
- for (; nr_acks > 0; nr_acks--, seq++) {
- ix = seq & RXRPC_RXTX_BUFF_MASK;
- annotation = call->rxtx_annotations[ix];
- anno_type = annotation & RXRPC_TX_ANNO_MASK;
- annotation &= ~RXRPC_TX_ANNO_MASK;
- switch (*acks++) {
- case RXRPC_ACK_TYPE_ACK:
+ unsigned int i;
+
+ for (i = 0; i < nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
- if (anno_type == RXRPC_TX_ANNO_ACK)
- continue;
summary->nr_new_acks++;
- call->rxtx_annotations[ix] =
- RXRPC_TX_ANNO_ACK | annotation;
- break;
- case RXRPC_ACK_TYPE_NACK:
- if (!summary->nr_nacks &&
- call->acks_lowest_nak != seq) {
- call->acks_lowest_nak = seq;
+ } else {
+ if (!summary->saw_nacks &&
+ call->acks_lowest_nak != seq + i) {
+ call->acks_lowest_nak = seq + i;
summary->new_low_nack = true;
}
- summary->nr_nacks++;
- if (anno_type == RXRPC_TX_ANNO_NAK)
- continue;
- summary->nr_new_nacks++;
- if (anno_type == RXRPC_TX_ANNO_RETRANS)
- continue;
- call->rxtx_annotations[ix] =
- RXRPC_TX_ANNO_NAK | annotation;
- break;
- default:
- return rxrpc_proto_abort("SFT", call, 0);
+ summary->saw_nacks = true;
}
}
}
@@ -851,12 +807,10 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
+ struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- union {
- struct rxrpc_ackpacket ack;
- struct rxrpc_ackinfo info;
- u8 acks[RXRPC_MAXACKS];
- } buf;
+ struct rxrpc_ackinfo info;
+ struct sk_buff *skb_old = NULL, *skb_put = skb;
rxrpc_serial_t ack_serial, acked_serial;
rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;
@@ -864,29 +818,28 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
_enter("");
offset = sizeof(struct rxrpc_wire_header);
- if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) {
- _debug("extraction failure");
- return rxrpc_proto_abort("XAK", call, 0);
+ if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) {
+ rxrpc_proto_abort("XAK", call, 0);
+ goto out_not_locked;
}
- offset += sizeof(buf.ack);
+ offset += sizeof(ack);
ack_serial = sp->hdr.serial;
- acked_serial = ntohl(buf.ack.serial);
- first_soft_ack = ntohl(buf.ack.firstPacket);
- prev_pkt = ntohl(buf.ack.previousPacket);
+ acked_serial = ntohl(ack.serial);
+ first_soft_ack = ntohl(ack.firstPacket);
+ prev_pkt = ntohl(ack.previousPacket);
hard_ack = first_soft_ack - 1;
- nr_acks = buf.ack.nAcks;
- summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
- buf.ack.reason : RXRPC_ACK__INVALID);
+ nr_acks = ack.nAcks;
+ summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
+ ack.reason : RXRPC_ACK__INVALID);
trace_rxrpc_rx_ack(call, ack_serial, acked_serial,
first_soft_ack, prev_pkt,
summary.ack_reason, nr_acks);
+ rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
- switch (buf.ack.reason) {
+ switch (ack.reason) {
case RXRPC_ACK_PING_RESPONSE:
- rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
- ack_serial);
rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
rxrpc_rtt_rx_ping_response);
break;
@@ -901,22 +854,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
break;
}
- if (buf.ack.reason == RXRPC_ACK_PING) {
+ if (ack.reason == RXRPC_ACK_PING) {
_proto("Rx ACK %%%u PING Request", ack_serial);
- rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
- ack_serial, true, true,
- rxrpc_propose_ack_respond_to_ping);
+ rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
+ rxrpc_propose_ack_respond_to_ping);
} else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
- rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
- ack_serial, true, true,
- rxrpc_propose_ack_respond_to_ack);
+ rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
+ rxrpc_propose_ack_respond_to_ack);
}
/* If we get an EXCEEDS_WINDOW ACK from the server, it probably
* indicates that the client address changed due to NAT. The server
* lost the call because it switched to a different peer.
*/
- if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
+ if (unlikely(ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
rxrpc_is_client_call(call)) {
@@ -929,10 +880,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* indicate a change of address. However, we can retransmit the call
* if we still have it buffered to the beginning.
*/
- if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
+ if (unlikely(ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
- call->tx_hard_ack == 0 &&
+ call->acks_hard_ack == 0 &&
rxrpc_is_client_call(call)) {
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
0, -ENETRESET);
@@ -944,14 +895,19 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
first_soft_ack, call->acks_first_seq,
prev_pkt, call->acks_prev_seq);
- return;
+ goto out_not_locked;
}
- buf.info.rxMTU = 0;
+ info.rxMTU = 0;
ioffset = offset + nr_acks + 3;
- if (skb->len >= ioffset + sizeof(buf.info) &&
- skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
- return rxrpc_proto_abort("XAI", call, 0);
+ if (skb->len >= ioffset + sizeof(info) &&
+ skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) {
+ rxrpc_proto_abort("XAI", call, 0);
+ goto out_not_locked;
+ }
+
+ if (nr_acks > 0)
+ skb_condense(skb);
spin_lock(&call->input_lock);
@@ -967,9 +923,22 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
+ switch (ack.reason) {
+ case RXRPC_ACK_PING:
+ break;
+ case RXRPC_ACK_PING_RESPONSE:
+ rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
+ ack_serial);
+ fallthrough;
+ default:
+ if (after(acked_serial, call->acks_highest_serial))
+ call->acks_highest_serial = acked_serial;
+ break;
+ }
+
/* Parse rwind and mtu sizes if provided. */
- if (buf.info.rxMTU)
- rxrpc_input_ackinfo(call, skb, &buf.info);
+ if (info.rxMTU)
+ rxrpc_input_ackinfo(call, skb, &info);
if (first_soft_ack == 0) {
rxrpc_proto_abort("AK0", call, 0);
@@ -987,7 +956,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
goto out;
}
- if (before(hard_ack, call->tx_hard_ack) ||
+ if (before(hard_ack, call->acks_hard_ack) ||
after(hard_ack, call->tx_top)) {
rxrpc_proto_abort("AKW", call, 0);
goto out;
@@ -997,7 +966,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
goto out;
}
- if (after(hard_ack, call->tx_hard_ack)) {
+ if (after(hard_ack, call->acks_hard_ack)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
rxrpc_end_tx_phase(call, false, "ETA");
goto out;
@@ -1005,25 +974,38 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
if (nr_acks > 0) {
- if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) {
+ if (offset > (int)skb->len - nr_acks) {
rxrpc_proto_abort("XSA", call, 0);
goto out;
}
- rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
- &summary);
+
+ spin_lock(&call->acks_ack_lock);
+ skb_old = call->acks_soft_tbl;
+ call->acks_soft_tbl = skb;
+ spin_unlock(&call->acks_ack_lock);
+
+ rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
+ nr_acks, &summary);
+ skb_put = NULL;
+ } else if (call->acks_soft_tbl) {
+ spin_lock(&call->acks_ack_lock);
+ skb_old = call->acks_soft_tbl;
+ call->acks_soft_tbl = NULL;
+ spin_unlock(&call->acks_ack_lock);
}
- if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
- RXRPC_TX_ANNO_LAST &&
+ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
summary.nr_acks == call->tx_top - hard_ack &&
rxrpc_is_client_call(call))
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, ack_serial,
- false, true,
- rxrpc_propose_ack_ping_for_lost_reply);
+ rxrpc_propose_ping(call, ack_serial,
+ rxrpc_propose_ack_ping_for_lost_reply);
rxrpc_congestion_management(call, skb, &summary, acked_serial);
out:
spin_unlock(&call->input_lock);
+out_not_locked:
+ rxrpc_free_skb(skb_put, rxrpc_skb_freed);
+ rxrpc_free_skb(skb_old, rxrpc_skb_freed);
}
/*
@@ -1096,7 +1078,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
case RXRPC_PACKET_TYPE_ACK:
rxrpc_input_ack(call, skb);
- break;
+ goto no_free;
case RXRPC_PACKET_TYPE_BUSY:
_proto("Rx BUSY %%%u", sp->hdr.serial);
@@ -1307,8 +1289,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
if (sp->hdr.callNumber == 0 ||
sp->hdr.seq == 0)
goto bad_message;
- if (!rxrpc_validate_data(skb))
- goto bad_message;
/* Unshare the packet so that it can be modified for in-place
* decryption.
@@ -1422,7 +1402,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
trace_rxrpc_rx_data(chan->call_debug_id,
sp->hdr.seq,
sp->hdr.serial,
- sp->hdr.flags, 0);
+ sp->hdr.flags);
rxrpc_post_packet_to_conn(conn, skb);
goto out;
}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 9aae99d67833..0eb8471bfc53 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -25,16 +25,16 @@ static int none_how_much_data(struct rxrpc_call *call, size_t remain,
return 0;
}
-static int none_secure_packet(struct rxrpc_call *call, struct sk_buff *skb,
- size_t data_size)
+static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
return 0;
}
-static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int offset, unsigned int len,
- rxrpc_seq_t seq, u16 expected_cksum)
+static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ sp->flags |= RXRPC_RX_VERIFIED;
return 0;
}
@@ -42,11 +42,6 @@ static void none_free_call_crypto(struct rxrpc_call *call)
{
}
-static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len)
-{
-}
-
static int none_respond_to_challenge(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
@@ -95,7 +90,6 @@ const struct rxrpc_security rxrpc_no_security = {
.how_much_data = none_how_much_data,
.secure_packet = none_secure_packet,
.verify_packet = none_verify_packet,
- .locate_data = none_locate_data,
.respond_to_challenge = none_respond_to_challenge,
.verify_response = none_verify_response,
.clear = none_clear,
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 38ea98ff426b..a178f71e5082 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -24,6 +24,19 @@ static void rxrpc_local_processor(struct work_struct *);
static void rxrpc_local_rcu(struct rcu_head *);
/*
+ * Handle an ICMP/ICMP6 error turning up at the tunnel. Push it through the
+ * usual mechanism so that it gets parsed and presented through the UDP
+ * socket's error_report().
+ */
+static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload)
+{
+ if (ip_hdr(skb)->version == IPVERSION)
+ return ip_icmp_error(sk, skb, err, port, info, payload);
+ return ipv6_icmp_error(sk, skb, err, port, info, payload);
+}
+
+/*
* Compare a local to an address. Return -ve, 0 or +ve to indicate less than,
* same or greater than.
*
@@ -84,6 +97,8 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local->rxnet = rxnet;
INIT_HLIST_NODE(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
+ INIT_LIST_HEAD(&local->ack_tx_queue);
+ spin_lock_init(&local->ack_tx_lock);
init_rwsem(&local->defrag_sem);
skb_queue_head_init(&local->reject_queue);
skb_queue_head_init(&local->event_queue);
@@ -419,6 +434,11 @@ static void rxrpc_local_processor(struct work_struct *work)
break;
}
+ if (!list_empty(&local->ack_tx_queue)) {
+ rxrpc_transmit_ack_packets(local);
+ again = true;
+ }
+
if (!skb_queue_empty(&local->reject_queue)) {
rxrpc_reject_packets(local);
again = true;
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index d4144fd86f84..056c428d8bf3 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -17,12 +17,6 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
- * How long to wait before scheduling ACK generation after seeing a
- * packet with RXRPC_REQUEST_ACK set (in jiffies).
- */
-unsigned long rxrpc_requested_ack_delay = 1;
-
-/*
* How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
*
* We use this when we've received new data packets. If those packets aren't
@@ -46,10 +40,7 @@ unsigned long rxrpc_idle_ack_delay = HZ / 2;
* limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
* packets.
*/
-unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE;
-#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE
-#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE
-#endif
+unsigned int rxrpc_rx_window_size = 255;
/*
* Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
@@ -62,15 +53,3 @@ unsigned int rxrpc_rx_mtu = 5692;
* sender that we're willing to handle.
*/
unsigned int rxrpc_rx_jumbo_max = 4;
-
-const s8 rxrpc_ack_priority[] = {
- [0] = 0,
- [RXRPC_ACK_DELAY] = 1,
- [RXRPC_ACK_REQUESTED] = 2,
- [RXRPC_ACK_IDLE] = 3,
- [RXRPC_ACK_DUPLICATE] = 4,
- [RXRPC_ACK_OUT_OF_SEQUENCE] = 5,
- [RXRPC_ACK_EXCEEDS_WINDOW] = 6,
- [RXRPC_ACK_NOSPACE] = 7,
- [RXRPC_ACK_PING_RESPONSE] = 8,
-};
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index bb4c25d6df64..84242c0e467c 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -101,6 +101,8 @@ static __net_init int rxrpc_init_net(struct net *net)
proc_create_net("locals", 0444, rxnet->proc_net,
&rxrpc_local_seq_ops,
sizeof(struct seq_net_private));
+ proc_create_net_single_write("stats", S_IFREG | 0644, rxnet->proc_net,
+ rxrpc_stats_show, rxrpc_stats_clear, NULL);
return 0;
err_proc:
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 9683617db704..46432e70a16b 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -13,15 +13,21 @@
#include <linux/export.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#include <net/udp.h>
#include "ar-internal.h"
-struct rxrpc_ack_buffer {
- struct rxrpc_wire_header whdr;
- struct rxrpc_ackpacket ack;
- u8 acks[255];
- u8 pad[3];
- struct rxrpc_ackinfo ackinfo;
-};
+extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+
+static ssize_t do_udp_sendmsg(struct socket *sk, struct msghdr *msg, size_t len)
+{
+#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
+ struct sockaddr *sa = msg->msg_name;
+
+ if (sa->sa_family == AF_INET6)
+ return udpv6_sendmsg(sk->sk, msg, len);
+#endif
+ return udp_sendmsg(sk->sk, msg, len);
+}
struct rxrpc_abort_buffer {
struct rxrpc_wire_header whdr;
@@ -68,66 +74,83 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call)
*/
static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
struct rxrpc_call *call,
- struct rxrpc_ack_buffer *pkt,
- rxrpc_seq_t *_hard_ack,
- rxrpc_seq_t *_top,
- u8 reason)
+ struct rxrpc_txbuf *txb)
{
- rxrpc_serial_t serial;
- unsigned int tmp;
- rxrpc_seq_t hard_ack, top, seq;
- int ix;
+ struct rxrpc_ackinfo ackinfo;
+ unsigned int qsize;
+ rxrpc_seq_t window, wtop, wrap_point, ix, first;
+ int rsize;
+ u64 wtmp;
u32 mtu, jmax;
- u8 *ackp = pkt->acks;
+ u8 *ackp = txb->acks;
+ u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8);
- tmp = atomic_xchg(&call->ackr_nr_unacked, 0);
- tmp |= atomic_xchg(&call->ackr_nr_consumed, 0);
- if (!tmp && (reason == RXRPC_ACK_DELAY ||
- reason == RXRPC_ACK_IDLE))
- return 0;
+ atomic_set(&call->ackr_nr_unacked, 0);
+ atomic_set(&call->ackr_nr_consumed, 0);
+ rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
/* Barrier against rxrpc_input_data(). */
- serial = call->ackr_serial;
- hard_ack = READ_ONCE(call->rx_hard_ack);
- top = smp_load_acquire(&call->rx_top);
- *_hard_ack = hard_ack;
- *_top = top;
-
- pkt->ack.bufferSpace = htons(8);
- pkt->ack.maxSkew = htons(0);
- pkt->ack.firstPacket = htonl(hard_ack + 1);
- pkt->ack.previousPacket = htonl(call->ackr_highest_seq);
- pkt->ack.serial = htonl(serial);
- pkt->ack.reason = reason;
- pkt->ack.nAcks = top - hard_ack;
-
- if (reason == RXRPC_ACK_PING)
- pkt->whdr.flags |= RXRPC_REQUEST_ACK;
-
- if (after(top, hard_ack)) {
- seq = hard_ack + 1;
- do {
- ix = seq & RXRPC_RXTX_BUFF_MASK;
- if (call->rxtx_buffer[ix])
- *ackp++ = RXRPC_ACK_TYPE_ACK;
- else
- *ackp++ = RXRPC_ACK_TYPE_NACK;
- seq++;
- } while (before_eq(seq, top));
+retry:
+ wtmp = atomic64_read_acquire(&call->ackr_window);
+ window = lower_32_bits(wtmp);
+ wtop = upper_32_bits(wtmp);
+ txb->ack.firstPacket = htonl(window);
+ txb->ack.nAcks = 0;
+
+ if (after(wtop, window)) {
+ /* Try to copy the SACK ring locklessly. We can use the copy,
+ * only if the now-current top of the window didn't go past the
+ * previously read base - otherwise we can't know whether we
+ * have old data or new data.
+ */
+ memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer));
+ wrap_point = window + RXRPC_SACK_SIZE - 1;
+ wtmp = atomic64_read_acquire(&call->ackr_window);
+ window = lower_32_bits(wtmp);
+ wtop = upper_32_bits(wtmp);
+ if (after(wtop, wrap_point)) {
+ cond_resched();
+ goto retry;
+ }
+
+ /* The buffer is maintained as a ring with an invariant mapping
+ * between bit position and sequence number, so we'll probably
+ * need to rotate it.
+ */
+ txb->ack.nAcks = wtop - window;
+ ix = window % RXRPC_SACK_SIZE;
+ first = sizeof(sack_buffer) - ix;
+
+ if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks);
+ } else {
+ memcpy(txb->acks, sack_buffer + ix, first);
+ memcpy(txb->acks + first, sack_buffer,
+ txb->ack.nAcks - first);
+ }
+
+ ackp += txb->ack.nAcks;
+ } else if (before(wtop, window)) {
+ pr_warn("ack window backward %x %x", window, wtop);
+ } else if (txb->ack.reason == RXRPC_ACK_DELAY) {
+ txb->ack.reason = RXRPC_ACK_IDLE;
}
mtu = conn->params.peer->if_mtu;
mtu -= conn->params.peer->hdrsize;
- jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
- pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
- pkt->ackinfo.maxMTU = htonl(mtu);
- pkt->ackinfo.rwind = htonl(call->rx_winsize);
- pkt->ackinfo.jumbo_max = htonl(jmax);
+ jmax = rxrpc_rx_jumbo_max;
+ qsize = (window - 1) - call->rx_consumed;
+ rsize = max_t(int, call->rx_winsize - qsize, 0);
+ ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
+ ackinfo.maxMTU = htonl(mtu);
+ ackinfo.rwind = htonl(rsize);
+ ackinfo.jumbo_max = htonl(jmax);
*ackp++ = 0;
*ackp++ = 0;
*ackp++ = 0;
- return top - hard_ack + 3;
+ memcpy(ackp, &ackinfo, sizeof(ackinfo));
+ return txb->ack.nAcks + 3 + sizeof(ackinfo);
}
/*
@@ -176,26 +199,20 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call,
/*
* Send an ACK call packet.
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
- rxrpc_serial_t *_serial)
+static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *txb)
{
struct rxrpc_connection *conn;
struct rxrpc_ack_buffer *pkt;
+ struct rxrpc_call *call = txb->call;
struct msghdr msg;
- struct kvec iov[2];
+ struct kvec iov[1];
rxrpc_serial_t serial;
- rxrpc_seq_t hard_ack, top;
size_t len, n;
int ret, rtt_slot = -1;
- u8 reason;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return -ECONNRESET;
- pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt)
- return -ENOMEM;
-
conn = call->conn;
msg.msg_name = &call->peer->srx.transport;
@@ -204,83 +221,98 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
msg.msg_controllen = 0;
msg.msg_flags = 0;
- pkt->whdr.epoch = htonl(conn->proto.epoch);
- pkt->whdr.cid = htonl(call->cid);
- pkt->whdr.callNumber = htonl(call->call_id);
- pkt->whdr.seq = 0;
- pkt->whdr.type = RXRPC_PACKET_TYPE_ACK;
- pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag;
- pkt->whdr.userStatus = 0;
- pkt->whdr.securityIndex = call->security_ix;
- pkt->whdr._rsvd = 0;
- pkt->whdr.serviceId = htons(call->service_id);
-
- spin_lock_bh(&call->lock);
- if (ping) {
- reason = RXRPC_ACK_PING;
- } else {
- reason = call->ackr_reason;
- if (!call->ackr_reason) {
- spin_unlock_bh(&call->lock);
- ret = 0;
- goto out;
- }
- call->ackr_reason = 0;
- }
- n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
+ if (txb->ack.reason == RXRPC_ACK_PING)
+ txb->wire.flags |= RXRPC_REQUEST_ACK;
- spin_unlock_bh(&call->lock);
- if (n == 0) {
- kfree(pkt);
+ if (txb->ack.reason == RXRPC_ACK_DELAY)
+ clear_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags);
+ if (txb->ack.reason == RXRPC_ACK_IDLE)
+ clear_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags);
+
+ n = rxrpc_fill_out_ack(conn, call, txb);
+ if (n == 0)
return 0;
- }
- iov[0].iov_base = pkt;
- iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
- iov[1].iov_base = &pkt->ackinfo;
- iov[1].iov_len = sizeof(pkt->ackinfo);
- len = iov[0].iov_len + iov[1].iov_len;
+ iov[0].iov_base = &txb->wire;
+ iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
+ len = iov[0].iov_len;
serial = atomic_inc_return(&conn->serial);
- pkt->whdr.serial = htonl(serial);
+ txb->wire.serial = htonl(serial);
trace_rxrpc_tx_ack(call->debug_id, serial,
- ntohl(pkt->ack.firstPacket),
- ntohl(pkt->ack.serial),
- pkt->ack.reason, pkt->ack.nAcks);
- if (_serial)
- *_serial = serial;
+ ntohl(txb->ack.firstPacket),
+ ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks);
+ if (txb->ack_why == rxrpc_propose_ack_ping_for_lost_ack)
+ call->acks_lost_ping = serial;
- if (ping)
+ if (txb->ack.reason == RXRPC_ACK_PING)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
- conn->params.peer->last_tx_at = ktime_get_seconds();
+ rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
+
+ /* Grab the highest received seq as late as possible */
+ txb->ack.previousPacket = htonl(call->rx_highest_seq);
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+ ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
+ call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_ack);
else
- trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
+ trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
rxrpc_tx_point_call_ack);
rxrpc_tx_backoff(call, ret);
if (call->state < RXRPC_CALL_COMPLETE) {
- if (ret < 0) {
+ if (ret < 0)
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- rxrpc_propose_ACK(call, pkt->ack.reason,
- ntohl(pkt->ack.serial),
- false, true,
- rxrpc_propose_ack_retry_tx);
- }
-
rxrpc_set_keepalive(call);
}
-out:
kfree(pkt);
return ret;
}
/*
+ * ACK transmitter for a local endpoint. The UDP socket locks around each
+ * transmission, so we can only transmit one packet at a time, ACK, DATA or
+ * otherwise.
+ */
+void rxrpc_transmit_ack_packets(struct rxrpc_local *local)
+{
+ LIST_HEAD(queue);
+ int ret;
+
+ trace_rxrpc_local(local->debug_id, rxrpc_local_tx_ack,
+ refcount_read(&local->ref), NULL);
+
+ if (list_empty(&local->ack_tx_queue))
+ return;
+
+ spin_lock_bh(&local->ack_tx_lock);
+ list_splice_tail_init(&local->ack_tx_queue, &queue);
+ spin_unlock_bh(&local->ack_tx_lock);
+
+ while (!list_empty(&queue)) {
+ struct rxrpc_txbuf *txb =
+ list_entry(queue.next, struct rxrpc_txbuf, tx_link);
+
+ ret = rxrpc_send_ack_packet(local, txb);
+ if (ret < 0 && ret != -ECONNRESET) {
+ spin_lock_bh(&local->ack_tx_lock);
+ list_splice_init(&queue, &local->ack_tx_queue);
+ spin_unlock_bh(&local->ack_tx_lock);
+ break;
+ }
+
+ list_del_init(&txb->tx_link);
+ rxrpc_put_call(txb->call, rxrpc_call_put);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
+ }
+}
+
+/*
* Send an ABORT call packet.
*/
int rxrpc_send_abort_packet(struct rxrpc_call *call)
@@ -299,7 +331,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
* channel instead, thereby closing off this call.
*/
if (rxrpc_is_client_call(call) &&
- test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+ test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
return 0;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
@@ -331,8 +363,8 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
serial = atomic_inc_return(&conn->serial);
pkt.whdr.serial = htonl(serial);
- ret = kernel_sendmsg(conn->params.local->socket,
- &msg, iov, 1, sizeof(pkt));
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
+ ret = do_udp_sendmsg(conn->params.local->socket, &msg, sizeof(pkt));
conn->params.peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
@@ -347,19 +379,17 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
/*
* send a packet through the transport endpoint
*/
-int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
- bool retrans)
+int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
+ enum rxrpc_req_ack_trace why;
struct rxrpc_connection *conn = call->conn;
- struct rxrpc_wire_header whdr;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct msghdr msg;
- struct kvec iov[2];
+ struct kvec iov[1];
rxrpc_serial_t serial;
size_t len;
int ret, rtt_slot = -1;
- _enter(",{%d}", skb->len);
+ _enter("%x,{%d}", txb->seq, txb->len);
if (hlist_unhashed(&call->error_link)) {
spin_lock_bh(&call->peer->lock);
@@ -369,28 +399,16 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
/* Each transmission of a Tx packet needs a new serial number */
serial = atomic_inc_return(&conn->serial);
-
- whdr.epoch = htonl(conn->proto.epoch);
- whdr.cid = htonl(call->cid);
- whdr.callNumber = htonl(call->call_id);
- whdr.seq = htonl(sp->hdr.seq);
- whdr.serial = htonl(serial);
- whdr.type = RXRPC_PACKET_TYPE_DATA;
- whdr.flags = sp->hdr.flags;
- whdr.userStatus = 0;
- whdr.securityIndex = call->security_ix;
- whdr._rsvd = htons(sp->hdr._rsvd);
- whdr.serviceId = htons(call->service_id);
+ txb->wire.serial = htonl(serial);
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
- sp->hdr.seq == 1)
- whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
+ txb->seq == 1)
+ txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
- iov[1].iov_base = skb->head;
- iov[1].iov_len = skb->len;
- len = iov[0].iov_len + iov[1].iov_len;
+ iov[0].iov_base = &txb->wire;
+ iov[0].iov_len = sizeof(txb->wire) + txb->len;
+ len = iov[0].iov_len;
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
msg.msg_name = &call->peer->srx.transport;
msg.msg_namelen = call->peer->srx.transport_len;
@@ -405,41 +423,56 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
* service call, lest OpenAFS incorrectly send us an ACK with some
* soft-ACKs in it and then never follow up with a proper hard ACK.
*/
- if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) ||
- rxrpc_to_server(sp)
- ) &&
- (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
- retrans ||
- call->cong_mode == RXRPC_CALL_SLOW_START ||
- (call->peer->rtt_count < 3 && sp->hdr.seq & 1) ||
- ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
- ktime_get_real())))
- whdr.flags |= RXRPC_REQUEST_ACK;
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ why = rxrpc_reqack_already_on;
+ else if (test_bit(RXRPC_TXBUF_LAST, &txb->flags) && rxrpc_sending_to_client(txb))
+ why = rxrpc_reqack_no_srv_last;
+ else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
+ why = rxrpc_reqack_ack_lost;
+ else if (test_bit(RXRPC_TXBUF_RESENT, &txb->flags))
+ why = rxrpc_reqack_retrans;
+ else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2)
+ why = rxrpc_reqack_slow_start;
+ else if (call->tx_winsize <= 2)
+ why = rxrpc_reqack_small_txwin;
+ else if (call->peer->rtt_count < 3 && txb->seq & 1)
+ why = rxrpc_reqack_more_rtt;
+ else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real()))
+ why = rxrpc_reqack_old_rtt;
+ else
+ goto dont_set_request_ack;
+
+ rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
+ trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
+ if (why != rxrpc_reqack_no_srv_last)
+ txb->wire.flags |= RXRPC_REQUEST_ACK;
+dont_set_request_ack:
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
ret = 0;
- trace_rxrpc_tx_data(call, sp->hdr.seq, serial,
- whdr.flags, retrans, true);
+ trace_rxrpc_tx_data(call, txb->seq, serial,
+ txb->wire.flags,
+ test_bit(RXRPC_TXBUF_RESENT, &txb->flags),
+ true);
goto done;
}
}
- trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans,
- false);
+ trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
+ test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+ cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
- if (iov[1].iov_len >= call->peer->maxdata)
+ if (txb->len >= call->peer->maxdata)
goto send_fragmentable;
down_read(&conn->params.local->defrag_sem);
- sp->hdr.serial = serial;
- smp_wmb(); /* Set serial before timestamp */
- skb->tstamp = ktime_get_real();
- if (whdr.flags & RXRPC_REQUEST_ACK)
+ txb->last_sent = ktime_get_real();
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
/* send the packet by UDP
@@ -448,7 +481,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
* - in which case, we'll have processed the ICMP error
* message and update the peer record
*/
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send);
+ ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
conn->params.peer->last_tx_at = ktime_get_seconds();
up_read(&conn->params.local->defrag_sem);
@@ -457,7 +491,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_data_nofrag);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &whdr,
+ trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
rxrpc_tx_point_call_data_nofrag);
}
@@ -467,8 +501,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
done:
if (ret >= 0) {
- if (whdr.flags & RXRPC_REQUEST_ACK) {
- call->peer->rtt_last_req = skb->tstamp;
+ call->tx_last_sent = txb->last_sent;
+ if (txb->wire.flags & RXRPC_REQUEST_ACK) {
+ call->peer->rtt_last_req = txb->last_sent;
if (call->peer->rtt_count > 1) {
unsigned long nowj = jiffies, ack_lost_at;
@@ -480,7 +515,7 @@ done:
}
}
- if (sp->hdr.seq == 1 &&
+ if (txb->seq == 1 &&
!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER,
&call->flags)) {
unsigned long nowj = jiffies, expect_rx_by;
@@ -512,23 +547,21 @@ send_fragmentable:
down_write(&conn->params.local->defrag_sem);
- sp->hdr.serial = serial;
- smp_wmb(); /* Set serial before timestamp */
- skb->tstamp = ktime_get_real();
- if (whdr.flags & RXRPC_REQUEST_ACK)
+ txb->last_sent = ktime_get_real();
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
switch (conn->params.local->srx.transport.family) {
case AF_INET6:
case AF_INET:
ip_sock_set_mtu_discover(conn->params.local->socket->sk,
- IP_PMTUDISC_DONT);
- ret = kernel_sendmsg(conn->params.local->socket, &msg,
- iov, 2, len);
+ IP_PMTUDISC_DONT);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
+ ret = do_udp_sendmsg(conn->params.local->socket, &msg, len);
conn->params.peer->last_tx_at = ktime_get_seconds();
ip_sock_set_mtu_discover(conn->params.local->socket->sk,
- IP_PMTUDISC_DO);
+ IP_PMTUDISC_DO);
break;
default:
@@ -540,7 +573,7 @@ send_fragmentable:
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_data_frag);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &whdr,
+ trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
rxrpc_tx_point_call_data_frag);
}
rxrpc_tx_backoff(call, ret);
@@ -610,8 +643,8 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
whdr.flags ^= RXRPC_CLIENT_INITIATED;
whdr.flags &= RXRPC_CLIENT_INITIATED;
- ret = kernel_sendmsg(local->socket, &msg,
- iov, ioc, size);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
+ ret = do_udp_sendmsg(local->socket, &msg, size);
if (ret < 0)
trace_rxrpc_tx_fail(local->debug_id, 0, ret,
rxrpc_tx_point_reject);
@@ -666,7 +699,8 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
_proto("Tx VERSION (keepalive)");
- ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+ ret = do_udp_sendmsg(peer->local->socket, &msg, len);
if (ret < 0)
trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
rxrpc_tx_point_version_keepalive);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 32561e9567fe..cda3890657a9 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -16,258 +16,13 @@
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
-#include <net/icmp.h>
#include "ar-internal.h"
-static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int);
static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
static void rxrpc_distribute_error(struct rxrpc_peer *, int,
enum rxrpc_call_completion);
/*
- * Find the peer associated with an ICMPv4 packet.
- */
-static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
- struct sk_buff *skb,
- unsigned int udp_offset,
- unsigned int *info,
- struct sockaddr_rxrpc *srx)
-{
- struct iphdr *ip, *ip0 = ip_hdr(skb);
- struct icmphdr *icmp = icmp_hdr(skb);
- struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
-
- _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code);
-
- switch (icmp->type) {
- case ICMP_DEST_UNREACH:
- *info = ntohs(icmp->un.frag.mtu);
- fallthrough;
- case ICMP_TIME_EXCEEDED:
- case ICMP_PARAMETERPROB:
- ip = (struct iphdr *)((void *)icmp + 8);
- break;
- default:
- return NULL;
- }
-
- memset(srx, 0, sizeof(*srx));
- srx->transport_type = local->srx.transport_type;
- srx->transport_len = local->srx.transport_len;
- srx->transport.family = local->srx.transport.family;
-
- /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
- * versa?
- */
- switch (srx->transport.family) {
- case AF_INET:
- srx->transport_len = sizeof(srx->transport.sin);
- srx->transport.family = AF_INET;
- srx->transport.sin.sin_port = udp->dest;
- memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
- sizeof(struct in_addr));
- break;
-
-#ifdef CONFIG_AF_RXRPC_IPV6
- case AF_INET6:
- srx->transport_len = sizeof(srx->transport.sin);
- srx->transport.family = AF_INET;
- srx->transport.sin.sin_port = udp->dest;
- memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
- sizeof(struct in_addr));
- break;
-#endif
-
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-
- _net("ICMP {%pISp}", &srx->transport);
- return rxrpc_lookup_peer_rcu(local, srx);
-}
-
-#ifdef CONFIG_AF_RXRPC_IPV6
-/*
- * Find the peer associated with an ICMPv6 packet.
- */
-static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local,
- struct sk_buff *skb,
- unsigned int udp_offset,
- unsigned int *info,
- struct sockaddr_rxrpc *srx)
-{
- struct icmp6hdr *icmp = icmp6_hdr(skb);
- struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb);
- struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
-
- _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code);
-
- switch (icmp->icmp6_type) {
- case ICMPV6_DEST_UNREACH:
- *info = ntohl(icmp->icmp6_mtu);
- fallthrough;
- case ICMPV6_PKT_TOOBIG:
- case ICMPV6_TIME_EXCEED:
- case ICMPV6_PARAMPROB:
- ip = (struct ipv6hdr *)((void *)icmp + 8);
- break;
- default:
- return NULL;
- }
-
- memset(srx, 0, sizeof(*srx));
- srx->transport_type = local->srx.transport_type;
- srx->transport_len = local->srx.transport_len;
- srx->transport.family = local->srx.transport.family;
-
- /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
- * versa?
- */
- switch (srx->transport.family) {
- case AF_INET:
- _net("Rx ICMP6 on v4 sock");
- srx->transport_len = sizeof(srx->transport.sin);
- srx->transport.family = AF_INET;
- srx->transport.sin.sin_port = udp->dest;
- memcpy(&srx->transport.sin.sin_addr,
- &ip->daddr.s6_addr32[3], sizeof(struct in_addr));
- break;
- case AF_INET6:
- _net("Rx ICMP6");
- srx->transport.sin.sin_port = udp->dest;
- memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr,
- sizeof(struct in6_addr));
- break;
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-
- _net("ICMP {%pISp}", &srx->transport);
- return rxrpc_lookup_peer_rcu(local, srx);
-}
-#endif /* CONFIG_AF_RXRPC_IPV6 */
-
-/*
- * Handle an error received on the local endpoint as a tunnel.
- */
-void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb,
- unsigned int udp_offset)
-{
- struct sock_extended_err ee;
- struct sockaddr_rxrpc srx;
- struct rxrpc_local *local;
- struct rxrpc_peer *peer;
- unsigned int info = 0;
- int err;
- u8 version = ip_hdr(skb)->version;
- u8 type = icmp_hdr(skb)->type;
- u8 code = icmp_hdr(skb)->code;
-
- rcu_read_lock();
- local = rcu_dereference_sk_user_data(sk);
- if (unlikely(!local)) {
- rcu_read_unlock();
- return;
- }
-
- rxrpc_new_skb(skb, rxrpc_skb_received);
-
- switch (ip_hdr(skb)->version) {
- case IPVERSION:
- peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset,
- &info, &srx);
- break;
-#ifdef CONFIG_AF_RXRPC_IPV6
- case 6:
- peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset,
- &info, &srx);
- break;
-#endif
- default:
- rcu_read_unlock();
- return;
- }
-
- if (peer && !rxrpc_get_peer_maybe(peer))
- peer = NULL;
- if (!peer) {
- rcu_read_unlock();
- return;
- }
-
- memset(&ee, 0, sizeof(ee));
-
- switch (version) {
- case IPVERSION:
- switch (type) {
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_FRAG_NEEDED:
- rxrpc_adjust_mtu(peer, info);
- rcu_read_unlock();
- rxrpc_put_peer(peer);
- return;
- default:
- break;
- }
-
- err = EHOSTUNREACH;
- if (code <= NR_ICMP_UNREACH) {
- /* Might want to do something different with
- * non-fatal errors
- */
- //harderr = icmp_err_convert[code].fatal;
- err = icmp_err_convert[code].errno;
- }
- break;
-
- case ICMP_TIME_EXCEEDED:
- err = EHOSTUNREACH;
- break;
- default:
- err = EPROTO;
- break;
- }
-
- ee.ee_origin = SO_EE_ORIGIN_ICMP;
- ee.ee_type = type;
- ee.ee_code = code;
- ee.ee_errno = err;
- break;
-
-#ifdef CONFIG_AF_RXRPC_IPV6
- case 6:
- switch (type) {
- case ICMPV6_PKT_TOOBIG:
- rxrpc_adjust_mtu(peer, info);
- rcu_read_unlock();
- rxrpc_put_peer(peer);
- return;
- }
-
- icmpv6_err_convert(type, code, &err);
-
- if (err == EACCES)
- err = EHOSTUNREACH;
-
- ee.ee_origin = SO_EE_ORIGIN_ICMP6;
- ee.ee_type = type;
- ee.ee_code = code;
- ee.ee_errno = err;
- break;
-#endif
- }
-
- trace_rxrpc_rx_icmp(peer, &ee, &srx);
-
- rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR);
- rcu_read_unlock();
- rxrpc_put_peer(peer);
-}
-
-/*
* Find the peer associated with a local error.
*/
static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
@@ -283,6 +38,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
srx->transport_len = local->srx.transport_len;
srx->transport.family = local->srx.transport.family;
+ /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
+ * versa?
+ */
switch (srx->transport.family) {
case AF_INET:
srx->transport_len = sizeof(srx->transport.sin);
@@ -412,20 +170,38 @@ void rxrpc_error_report(struct sock *sk)
}
rxrpc_new_skb(skb, rxrpc_skb_received);
serr = SKB_EXT_ERR(skb);
+ if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
+ _leave("UDP empty message");
+ rcu_read_unlock();
+ rxrpc_free_skb(skb, rxrpc_skb_freed);
+ return;
+ }
- if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) {
- peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
- if (peer && !rxrpc_get_peer_maybe(peer))
- peer = NULL;
- if (peer) {
- trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
- rxrpc_store_error(peer, serr);
- }
+ peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
+ if (peer && !rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ if (!peer) {
+ rcu_read_unlock();
+ rxrpc_free_skb(skb, rxrpc_skb_freed);
+ _leave(" [no peer]");
+ return;
}
+ trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
+
+ if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+ serr->ee.ee_type == ICMP_DEST_UNREACH &&
+ serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
+ rxrpc_adjust_mtu(peer, serr->ee.ee_info);
+ goto out;
+ }
+
+ rxrpc_store_error(peer, serr);
+out:
rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
rxrpc_put_peer(peer);
+
_leave("");
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 26d2ae9baaf2..041a51225c5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -227,12 +227,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
rxrpc_peer_init_rtt(peer);
- if (RXRPC_TX_SMSS > 2190)
- peer->cong_cwnd = 2;
- else if (RXRPC_TX_SMSS > 1095)
- peer->cong_cwnd = 3;
- else
- peer->cong_cwnd = 4;
+ peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here);
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 245418943e01..fae22a8b38d6 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -54,8 +54,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
unsigned long timeout = 0;
- rxrpc_seq_t tx_hard_ack, rx_hard_ack;
+ rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
+ u64 wtmp;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -90,8 +91,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
timeout -= jiffies;
}
- tx_hard_ack = READ_ONCE(call->tx_hard_ack);
- rx_hard_ack = READ_ONCE(call->rx_hard_ack);
+ acks_hard_ack = READ_ONCE(call->acks_hard_ack);
+ wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n",
@@ -105,8 +106,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
rxrpc_call_states[call->state],
call->abort_code,
call->debug_id,
- tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack,
- rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack,
+ acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
+ lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
call->rx_serial,
timeout);
@@ -216,7 +217,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Proto Local "
" Remote "
- " Use CW MTU LastUse RTT RTO\n"
+ " Use SST MTU LastUse RTT RTO\n"
);
return 0;
}
@@ -234,7 +235,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
lbuff,
rbuff,
refcount_read(&peer->ref),
- peer->cong_cwnd,
+ peer->cong_ssthresh,
peer->mtu,
now - peer->last_tx_at,
peer->srtt_us >> 3,
@@ -397,3 +398,98 @@ const struct seq_operations rxrpc_local_seq_ops = {
.stop = rxrpc_local_seq_stop,
.show = rxrpc_local_seq_show,
};
+
+/*
+ * Display stats in /proc/net/rxrpc/stats
+ */
+int rxrpc_stats_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
+
+ seq_printf(seq,
+ "Data : send=%u sendf=%u\n",
+ atomic_read(&rxnet->stat_tx_data_send),
+ atomic_read(&rxnet->stat_tx_data_send_frag));
+ seq_printf(seq,
+ "Data-Tx : nr=%u retrans=%u\n",
+ atomic_read(&rxnet->stat_tx_data),
+ atomic_read(&rxnet->stat_tx_data_retrans));
+ seq_printf(seq,
+ "Data-Rx : nr=%u reqack=%u jumbo=%u\n",
+ atomic_read(&rxnet->stat_rx_data),
+ atomic_read(&rxnet->stat_rx_data_reqack),
+ atomic_read(&rxnet->stat_rx_data_jumbo));
+ seq_printf(seq,
+ "Ack : fill=%u send=%u skip=%u\n",
+ atomic_read(&rxnet->stat_tx_ack_fill),
+ atomic_read(&rxnet->stat_tx_ack_send),
+ atomic_read(&rxnet->stat_tx_ack_skip));
+ seq_printf(seq,
+ "Ack-Tx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n",
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_REQUESTED]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DUPLICATE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_EXCEEDS_WINDOW]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_NOSPACE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING_RESPONSE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DELAY]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_IDLE]));
+ seq_printf(seq,
+ "Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n",
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_REQUESTED]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DUPLICATE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_EXCEEDS_WINDOW]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_NOSPACE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING_RESPONSE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]));
+ seq_printf(seq,
+ "Why-Req-A: acklost=%u already=%u mrtt=%u ortt=%u\n",
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_already_on]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]));
+ seq_printf(seq,
+ "Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n",
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_retrans]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_slow_start]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_small_txwin]));
+ seq_printf(seq,
+ "Buffers : txb=%u rxb=%u\n",
+ atomic_read(&rxrpc_nr_txbuf),
+ atomic_read(&rxrpc_n_rx_skbs));
+ return 0;
+}
+
+/*
+ * Clear stats if /proc/net/rxrpc/stats is written to.
+ */
+int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
+{
+ struct seq_file *m = file->private_data;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(m));
+
+ if (size > 1 || (size == 1 && buf[0] != '\n'))
+ return -EINVAL;
+
+ atomic_set(&rxnet->stat_tx_data, 0);
+ atomic_set(&rxnet->stat_tx_data_retrans, 0);
+ atomic_set(&rxnet->stat_tx_data_send, 0);
+ atomic_set(&rxnet->stat_tx_data_send_frag, 0);
+ atomic_set(&rxnet->stat_rx_data, 0);
+ atomic_set(&rxnet->stat_rx_data_reqack, 0);
+ atomic_set(&rxnet->stat_rx_data_jumbo, 0);
+
+ atomic_set(&rxnet->stat_tx_ack_fill, 0);
+ atomic_set(&rxnet->stat_tx_ack_send, 0);
+ atomic_set(&rxnet->stat_tx_ack_skip, 0);
+ memset(&rxnet->stat_tx_acks, 0, sizeof(rxnet->stat_tx_acks));
+ memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks));
+
+ memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack));
+ return size;
+}
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index d2cf8e1d218f..6760cb99c6d6 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -84,7 +84,7 @@ struct rxrpc_jumbo_header {
__be16 _rsvd; /* reserved */
__be16 cksum; /* kerberos security checksum */
};
-};
+} __packed;
#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */
#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
@@ -132,13 +132,6 @@ struct rxrpc_ackpacket {
} __packed;
-/* Some ACKs refer to specific packets and some are general and can be updated. */
-#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \
- (1 << RXRPC_ACK_PING_RESPONSE) | \
- (1 << RXRPC_ACK_DELAY) | \
- (1 << RXRPC_ACK_IDLE))
-
-
/*
* ACK packets can have a further piece of information tagged on the end
*/
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 7e39c262fd79..efb85f983657 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -173,8 +173,9 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
break;
}
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack,
- call->rx_pkt_offset, call->rx_pkt_len, ret);
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal,
+ lower_32_bits(atomic64_read(&call->ackr_window)) - 1,
+ call->rx_pkt_offset, call->rx_pkt_len, ret);
return ret;
}
@@ -183,16 +184,14 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
*/
static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
{
+ rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
+
_enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
- trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
- ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
+ trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
- if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
- rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, false, true,
- rxrpc_propose_ack_terminal_ack);
- //rxrpc_send_ack_packet(call, false, NULL);
- }
+ if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY)
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
write_lock_bh(&call->state_lock);
@@ -203,12 +202,11 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
break;
case RXRPC_CALL_SERVER_RECV_REQUEST:
- call->tx_phase = true;
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
write_unlock_bh(&call->state_lock);
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true,
- rxrpc_propose_ack_processing_op);
+ rxrpc_propose_delay_ACK(call, serial,
+ rxrpc_propose_ack_processing_op);
break;
default:
write_unlock_bh(&call->state_lock);
@@ -224,126 +222,66 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
rxrpc_serial_t serial;
- rxrpc_seq_t hard_ack, top;
- bool last = false;
- u8 subpacket;
- int ix;
+ rxrpc_seq_t old_consumed = call->rx_consumed, tseq;
+ bool last;
+ int acked;
_enter("%d", call->debug_id);
- hard_ack = call->rx_hard_ack;
- top = smp_load_acquire(&call->rx_top);
- ASSERT(before(hard_ack, top));
-
- hard_ack++;
- ix = hard_ack & RXRPC_RXTX_BUFF_MASK;
- skb = call->rxtx_buffer[ix];
+further_rotation:
+ skb = skb_dequeue(&call->recvmsg_queue);
rxrpc_see_skb(skb, rxrpc_skb_rotated);
- sp = rxrpc_skb(skb);
-
- subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET;
- serial = sp->hdr.serial + subpacket;
- if (subpacket == sp->nr_subpackets - 1 &&
- sp->rx_flags & RXRPC_SKB_INCL_LAST)
- last = true;
+ sp = rxrpc_skb(skb);
+ tseq = sp->hdr.seq;
+ serial = sp->hdr.serial;
+ last = sp->hdr.flags & RXRPC_LAST_PACKET;
- call->rxtx_buffer[ix] = NULL;
- call->rxtx_annotations[ix] = 0;
/* Barrier against rxrpc_input_data(). */
- smp_store_release(&call->rx_hard_ack, hard_ack);
+ if (after(tseq, call->rx_consumed))
+ smp_store_release(&call->rx_consumed, tseq);
rxrpc_free_skb(skb, rxrpc_skb_freed);
- trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
+ trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
+ serial, call->rx_consumed);
if (last) {
rxrpc_end_rx_phase(call, serial);
- } else {
- /* Check to see if there's an ACK that needs sending. */
- if (atomic_inc_return(&call->ackr_nr_consumed) > 2)
- rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial,
- true, false,
- rxrpc_propose_ack_rotate_rx);
- if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
- rxrpc_send_ack_packet(call, false, NULL);
+ return;
}
-}
-
-/*
- * Decrypt and verify a (sub)packet. The packet's length may be changed due to
- * padding, but if this is the case, the packet length will be resident in the
- * socket buffer. Note that we can't modify the master skb info as the skb may
- * be the home to multiple subpackets.
- */
-static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
- u8 annotation,
- unsigned int offset, unsigned int len)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- rxrpc_seq_t seq = sp->hdr.seq;
- u16 cksum = sp->hdr.cksum;
- u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET;
- _enter("");
-
- /* For all but the head jumbo subpacket, the security checksum is in a
- * jumbo header immediately prior to the data.
+ /* The next packet on the queue might entirely overlap with the one we
+ * just consumed; if so, rotate that away also.
*/
- if (subpacket > 0) {
- __be16 tmp;
- if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0)
- BUG();
- cksum = ntohs(tmp);
- seq += subpacket;
+ skb = skb_peek(&call->recvmsg_queue);
+ if (skb) {
+ sp = rxrpc_skb(skb);
+ if (sp->hdr.seq != call->rx_consumed &&
+ after_eq(call->rx_consumed, sp->hdr.seq))
+ goto further_rotation;
}
- return call->security->verify_packet(call, skb, offset, len,
- seq, cksum);
+ /* Check to see if there's an ACK that needs sending. */
+ acked = atomic_add_return(call->rx_consumed - old_consumed,
+ &call->ackr_nr_consumed);
+ if (acked > 2 &&
+ !test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) {
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, serial,
+ rxrpc_propose_ack_rotate_rx);
+ rxrpc_transmit_ack_packets(call->peer->local);
+ }
}
/*
- * Locate the data within a packet. This is complicated by:
- *
- * (1) An skb may contain a jumbo packet - so we have to find the appropriate
- * subpacket.
- *
- * (2) The (sub)packets may be encrypted and, if so, the encrypted portion
- * contains an extra header which includes the true length of the data,
- * excluding any encrypted padding.
+ * Decrypt and verify a DATA packet.
*/
-static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
- u8 *_annotation,
- unsigned int *_offset, unsigned int *_len,
- bool *_last)
+static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int offset = sizeof(struct rxrpc_wire_header);
- unsigned int len;
- bool last = false;
- int ret;
- u8 annotation = *_annotation;
- u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET;
-
- /* Locate the subpacket */
- offset += subpacket * RXRPC_JUMBO_SUBPKTLEN;
- len = skb->len - offset;
- if (subpacket < sp->nr_subpackets - 1)
- len = RXRPC_JUMBO_DATALEN;
- else if (sp->rx_flags & RXRPC_SKB_INCL_LAST)
- last = true;
-
- if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
- ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
- if (ret < 0)
- return ret;
- *_annotation |= RXRPC_RX_ANNO_VERIFIED;
- }
- *_offset = offset;
- *_len = len;
- *_last = last;
- call->security->locate_data(call, skb, _offset, _len);
- return 0;
+ if (sp->flags & RXRPC_RX_VERIFIED)
+ return 0;
+ return call->security->verify_packet(call, skb);
}
/*
@@ -357,69 +295,55 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
- rxrpc_serial_t serial;
- rxrpc_seq_t hard_ack, top, seq;
+ rxrpc_seq_t seq = 0;
size_t remain;
- bool rx_pkt_last;
unsigned int rx_pkt_offset, rx_pkt_len;
- int ix, copy, ret = -EAGAIN, ret2;
-
- if (test_and_clear_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags) &&
- call->ackr_reason)
- rxrpc_send_ack_packet(call, false, NULL);
+ int copy, ret = -EAGAIN, ret2;
rx_pkt_offset = call->rx_pkt_offset;
rx_pkt_len = call->rx_pkt_len;
- rx_pkt_last = call->rx_pkt_last;
if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
- seq = call->rx_hard_ack;
+ seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
ret = 1;
goto done;
}
- /* Barriers against rxrpc_input_data(). */
- hard_ack = call->rx_hard_ack;
- seq = hard_ack + 1;
-
- while (top = smp_load_acquire(&call->rx_top),
- before_eq(seq, top)
- ) {
- ix = seq & RXRPC_RXTX_BUFF_MASK;
- skb = call->rxtx_buffer[ix];
- if (!skb) {
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq,
- rx_pkt_offset, rx_pkt_len, 0);
- break;
- }
- smp_rmb();
+ /* No one else can be removing stuff from the queue, so we shouldn't
+ * need the Rx lock to walk it.
+ */
+ skb = skb_peek(&call->recvmsg_queue);
+ while (skb) {
rxrpc_see_skb(skb, rxrpc_skb_seen);
sp = rxrpc_skb(skb);
+ seq = sp->hdr.seq;
- if (!(flags & MSG_PEEK)) {
- serial = sp->hdr.serial;
- serial += call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET;
- trace_rxrpc_receive(call, rxrpc_receive_front,
- serial, seq);
+ if (after_eq(call->rx_consumed, seq)) {
+ kdebug("obsolete %x %x", call->rx_consumed, seq);
+ goto skip_obsolete;
}
+ if (!(flags & MSG_PEEK))
+ trace_rxrpc_receive(call, rxrpc_receive_front,
+ sp->hdr.serial, seq);
+
if (msg)
sock_recv_timestamp(msg, sock->sk, skb);
if (rx_pkt_offset == 0) {
- ret2 = rxrpc_locate_data(call, skb,
- &call->rxtx_annotations[ix],
- &rx_pkt_offset, &rx_pkt_len,
- &rx_pkt_last);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq,
- rx_pkt_offset, rx_pkt_len, ret2);
+ ret2 = rxrpc_verify_data(call, skb);
+ rx_pkt_offset = sp->offset;
+ rx_pkt_len = sp->len;
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
+ rx_pkt_offset, rx_pkt_len, ret2);
if (ret2 < 0) {
ret = ret2;
goto out;
}
+ rxrpc_transmit_ack_packets(call->peer->local);
} else {
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq,
- rx_pkt_offset, rx_pkt_len, 0);
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
+ rx_pkt_offset, rx_pkt_len, 0);
}
/* We have to handle short, empty and used-up DATA packets. */
@@ -442,37 +366,34 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
}
if (rx_pkt_len > 0) {
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq,
- rx_pkt_offset, rx_pkt_len, 0);
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_full, seq,
+ rx_pkt_offset, rx_pkt_len, 0);
ASSERTCMP(*_offset, ==, len);
ret = 0;
break;
}
+ skip_obsolete:
/* The whole packet has been transferred. */
- if (!(flags & MSG_PEEK))
- rxrpc_rotate_rx_window(call);
+ if (sp->hdr.flags & RXRPC_LAST_PACKET)
+ ret = 1;
rx_pkt_offset = 0;
rx_pkt_len = 0;
- if (rx_pkt_last) {
- ASSERTCMP(seq, ==, READ_ONCE(call->rx_top));
- ret = 1;
- goto out;
- }
+ skb = skb_peek_next(skb, &call->recvmsg_queue);
- seq++;
+ if (!(flags & MSG_PEEK))
+ rxrpc_rotate_rx_window(call);
}
out:
if (!(flags & MSG_PEEK)) {
call->rx_pkt_offset = rx_pkt_offset;
call->rx_pkt_len = rx_pkt_len;
- call->rx_pkt_last = rx_pkt_last;
}
done:
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
- rx_pkt_offset, rx_pkt_len, ret);
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq,
+ rx_pkt_offset, rx_pkt_len, ret);
if (ret == -EAGAIN)
set_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags);
return ret;
@@ -495,7 +416,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
DEFINE_WAIT(wait);
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0);
+ trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0);
if (flags & (MSG_OOB | MSG_TRUNC))
return -EOPNOTSUPP;
@@ -532,8 +453,7 @@ try_again:
if (list_empty(&rx->recvmsg_q)) {
if (signal_pending(current))
goto wait_interrupted;
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait,
- 0, 0, 0, 0);
+ trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 0);
timeo = schedule_timeout(timeo);
}
finish_wait(sk_sleep(&rx->sk), &wait);
@@ -552,7 +472,7 @@ try_again:
rxrpc_get_call(call, rxrpc_call_got);
write_unlock_bh(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0);
/* We're going to drop the socket lock, so we need to lock the call
* against interference by sendmsg.
@@ -605,8 +525,8 @@ try_again:
if (ret == -EAGAIN)
ret = 0;
- if (after(call->rx_top, call->rx_hard_ack) &&
- call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK])
+ rxrpc_transmit_ack_packets(call->peer->local);
+ if (!skb_queue_empty(&call->recvmsg_queue))
rxrpc_notify_socket(call);
break;
default:
@@ -636,7 +556,7 @@ try_again:
error_unlock_call:
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
return ret;
error_requeue_call:
@@ -644,14 +564,14 @@ error_requeue_call:
write_lock_bh(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
write_unlock_bh(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put);
}
error_no_call:
release_sock(&rx->sk);
error_trace:
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
return ret;
wait_interrupted:
@@ -735,17 +655,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
- switch (call->ackr_reason) {
- case RXRPC_ACK_IDLE:
- break;
- case RXRPC_ACK_DELAY:
- if (ret != -EAGAIN)
- break;
- fallthrough;
- default:
- rxrpc_send_ack_packet(call, false, NULL);
- }
-
+ rxrpc_transmit_ack_packets(call->peer->local);
if (_service)
*_service = call->service_id;
mutex_unlock(&call->user_mutex);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 78fa0524156f..2706e59bf992 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -233,16 +233,8 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn,
static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call)
{
struct crypto_skcipher *tfm = &call->conn->rxkad.cipher->base;
- struct skcipher_request *cipher_req = call->cipher_req;
- if (!cipher_req) {
- cipher_req = skcipher_request_alloc(tfm, GFP_NOFS);
- if (!cipher_req)
- return NULL;
- call->cipher_req = cipher_req;
- }
-
- return cipher_req;
+ return skcipher_request_alloc(tfm, GFP_NOFS);
}
/*
@@ -250,20 +242,16 @@ static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call)
*/
static void rxkad_free_call_crypto(struct rxrpc_call *call)
{
- if (call->cipher_req)
- skcipher_request_free(call->cipher_req);
- call->cipher_req = NULL;
}
/*
* partially encrypt a packet (level 1 security)
*/
static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
- struct sk_buff *skb, u32 data_size,
+ struct rxrpc_txbuf *txb,
struct skcipher_request *req)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxkad_level1_hdr hdr;
+ struct rxkad_level1_hdr *hdr = (void *)txb->data;
struct rxrpc_crypt iv;
struct scatterlist sg;
size_t pad;
@@ -271,22 +259,22 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
_enter("");
- check = sp->hdr.seq ^ call->call_id;
- data_size |= (u32)check << 16;
+ check = txb->seq ^ ntohl(txb->wire.callNumber);
+ hdr->data_size = htonl((u32)check << 16 | txb->len);
- hdr.data_size = htonl(data_size);
- memcpy(skb->head, &hdr, sizeof(hdr));
-
- pad = sizeof(struct rxkad_level1_hdr) + data_size;
+ txb->len += sizeof(struct rxkad_level1_hdr);
+ pad = txb->len;
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
- if (pad)
- skb_put_zero(skb, pad);
+ if (pad) {
+ memset(txb->data + txb->offset, 0, pad);
+ txb->len += pad;
+ }
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- sg_init_one(&sg, skb->head, 8);
+ sg_init_one(&sg, txb->data, 8);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
@@ -301,87 +289,63 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
* wholly encrypt a packet (level 2 security)
*/
static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
- struct sk_buff *skb,
- u32 data_size,
+ struct rxrpc_txbuf *txb,
struct skcipher_request *req)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr rxkhdr;
- struct rxrpc_skb_priv *sp;
+ struct rxkad_level2_hdr *rxkhdr = (void *)txb->data;
struct rxrpc_crypt iv;
- struct scatterlist sg[16];
- unsigned int len;
+ struct scatterlist sg;
size_t pad;
u16 check;
- int err;
-
- sp = rxrpc_skb(skb);
+ int ret;
_enter("");
- check = sp->hdr.seq ^ call->call_id;
+ check = txb->seq ^ ntohl(txb->wire.callNumber);
- rxkhdr.data_size = htonl(data_size | (u32)check << 16);
- rxkhdr.checksum = 0;
- memcpy(skb->head, &rxkhdr, sizeof(rxkhdr));
+ rxkhdr->data_size = htonl(txb->len | (u32)check << 16);
+ rxkhdr->checksum = 0;
- pad = sizeof(struct rxkad_level2_hdr) + data_size;
+ txb->len += sizeof(struct rxkad_level2_hdr);
+ pad = txb->len;
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
- if (pad)
- skb_put_zero(skb, pad);
+ if (pad) {
+ memset(txb->data + txb->offset, 0, pad);
+ txb->len += pad;
+ }
/* encrypt from the session key */
token = call->conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- sg_init_one(&sg[0], skb->head, sizeof(rxkhdr));
+ sg_init_one(&sg, txb->data, txb->len);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
- crypto_skcipher_encrypt(req);
-
- /* we want to encrypt the skbuff in-place */
- err = -EMSGSIZE;
- if (skb_shinfo(skb)->nr_frags > 16)
- goto out;
-
- len = round_up(data_size, RXKAD_ALIGN);
-
- sg_init_table(sg, ARRAY_SIZE(sg));
- err = skb_to_sgvec(skb, sg, 8, len);
- if (unlikely(err < 0))
- goto out;
- skcipher_request_set_crypt(req, sg, sg, len, iv.x);
- crypto_skcipher_encrypt(req);
-
- _leave(" = 0");
- err = 0;
-
-out:
+ skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x);
+ ret = crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- return err;
+ return ret;
}
/*
* checksum an RxRPC packet header
*/
-static int rxkad_secure_packet(struct rxrpc_call *call,
- struct sk_buff *skb,
- size_t data_size)
+static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
- struct rxrpc_skb_priv *sp;
struct skcipher_request *req;
struct rxrpc_crypt iv;
struct scatterlist sg;
+ union {
+ __be32 buf[2];
+ } crypto __aligned(8);
u32 x, y;
int ret;
- sp = rxrpc_skb(skb);
-
- _enter("{%d{%x}},{#%u},%zu,",
+ _enter("{%d{%x}},{#%u},%u,",
call->debug_id, key_serial(call->conn->params.key),
- sp->hdr.seq, data_size);
+ txb->seq, txb->len);
if (!call->conn->rxkad.cipher)
return 0;
@@ -398,39 +362,40 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv));
/* calculate the security checksum */
- x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
- x |= sp->hdr.seq & 0x3fffffff;
- call->crypto_buf[0] = htonl(call->call_id);
- call->crypto_buf[1] = htonl(x);
+ x = (ntohl(txb->wire.cid) & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+ x |= txb->seq & 0x3fffffff;
+ crypto.buf[0] = txb->wire.callNumber;
+ crypto.buf[1] = htonl(x);
- sg_init_one(&sg, call->crypto_buf, 8);
+ sg_init_one(&sg, crypto.buf, 8);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(call->crypto_buf[1]);
+ y = ntohl(crypto.buf[1]);
y = (y >> 16) & 0xffff;
if (y == 0)
y = 1; /* zero checksums are not permitted */
- sp->hdr.cksum = y;
+ txb->wire.cksum = htons(y);
switch (call->conn->params.security_level) {
case RXRPC_SECURITY_PLAIN:
ret = 0;
break;
case RXRPC_SECURITY_AUTH:
- ret = rxkad_secure_packet_auth(call, skb, data_size, req);
+ ret = rxkad_secure_packet_auth(call, txb, req);
break;
case RXRPC_SECURITY_ENCRYPT:
- ret = rxkad_secure_packet_encrypt(call, skb, data_size, req);
+ ret = rxkad_secure_packet_encrypt(call, txb, req);
break;
default:
ret = -EPERM;
break;
}
+ skcipher_request_free(req);
_leave(" = %d [set %x]", ret, y);
return ret;
}
@@ -439,11 +404,11 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
* decrypt partial encryption on a packet (level 1 security)
*/
static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int offset, unsigned int len,
rxrpc_seq_t seq,
struct skcipher_request *req)
{
struct rxkad_level1_hdr sechdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist sg[16];
bool aborted;
@@ -453,9 +418,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
_enter("");
- if (len < 8) {
+ if (sp->len < 8) {
aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
- RXKADSEALEDINCON);
+ RXKADSEALEDINCON);
goto protocol_error;
}
@@ -463,7 +428,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
* directly into the target buffer.
*/
sg_init_table(sg, ARRAY_SIZE(sg));
- ret = skb_to_sgvec(skb, sg, offset, 8);
+ ret = skb_to_sgvec(skb, sg, sp->offset, 8);
if (unlikely(ret < 0))
return ret;
@@ -477,12 +442,13 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
skcipher_request_zero(req);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
RXKADDATALEN);
goto protocol_error;
}
- len -= sizeof(sechdr);
+ sp->offset += sizeof(sechdr);
+ sp->len -= sizeof(sechdr);
buf = ntohl(sechdr.data_size);
data_size = buf & 0xffff;
@@ -496,11 +462,12 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
goto protocol_error;
}
- if (data_size > len) {
+ if (data_size > sp->len) {
aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
RXKADDATALEN);
goto protocol_error;
}
+ sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
@@ -515,12 +482,12 @@ protocol_error:
* wholly decrypt a packet (level 2 security)
*/
static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int offset, unsigned int len,
rxrpc_seq_t seq,
struct skcipher_request *req)
{
const struct rxrpc_key_token *token;
struct rxkad_level2_hdr sechdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
bool aborted;
@@ -528,9 +495,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
u16 check;
int nsg, ret;
- _enter(",{%d}", skb->len);
+ _enter(",{%d}", sp->len);
- if (len < 8) {
+ if (sp->len < 8) {
aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
RXKADSEALEDINCON);
goto protocol_error;
@@ -550,7 +517,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
}
sg_init_table(sg, nsg);
- ret = skb_to_sgvec(skb, sg, offset, len);
+ ret = skb_to_sgvec(skb, sg, sp->offset, sp->len);
if (unlikely(ret < 0)) {
if (sg != _sg)
kfree(sg);
@@ -563,19 +530,20 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg, sg, len, iv.x);
+ skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x);
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
if (sg != _sg)
kfree(sg);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
RXKADDATALEN);
goto protocol_error;
}
- len -= sizeof(sechdr);
+ sp->offset += sizeof(sechdr);
+ sp->len -= sizeof(sechdr);
buf = ntohl(sechdr.data_size);
data_size = buf & 0xffff;
@@ -589,12 +557,13 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
goto protocol_error;
}
- if (data_size > len) {
+ if (data_size > sp->len) {
aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
RXKADDATALEN);
goto protocol_error;
}
+ sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
@@ -609,17 +578,20 @@ nomem:
}
/*
- * Verify the security on a received packet or subpacket (if part of a
- * jumbo packet).
+ * Verify the security on a received packet and the subpackets therein.
*/
-static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int offset, unsigned int len,
- rxrpc_seq_t seq, u16 expected_cksum)
+static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct skcipher_request *req;
struct rxrpc_crypt iv;
struct scatterlist sg;
+ union {
+ __be32 buf[2];
+ } crypto __aligned(8);
+ rxrpc_seq_t seq = sp->hdr.seq;
bool aborted;
+ int ret;
u16 cksum;
u32 x, y;
@@ -639,22 +611,22 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
/* validate the security checksum */
x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
x |= seq & 0x3fffffff;
- call->crypto_buf[0] = htonl(call->call_id);
- call->crypto_buf[1] = htonl(x);
+ crypto.buf[0] = htonl(call->call_id);
+ crypto.buf[1] = htonl(x);
- sg_init_one(&sg, call->crypto_buf, 8);
+ sg_init_one(&sg, crypto.buf, 8);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
- y = ntohl(call->crypto_buf[1]);
+ y = ntohl(crypto.buf[1]);
cksum = (y >> 16) & 0xffff;
if (cksum == 0)
cksum = 1; /* zero checksums are not permitted */
- if (cksum != expected_cksum) {
+ if (cksum != sp->hdr.cksum) {
aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
RXKADSEALEDINCON);
goto protocol_error;
@@ -662,15 +634,22 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
switch (call->conn->params.security_level) {
case RXRPC_SECURITY_PLAIN:
- return 0;
+ ret = 0;
+ break;
case RXRPC_SECURITY_AUTH:
- return rxkad_verify_packet_1(call, skb, offset, len, seq, req);
+ ret = rxkad_verify_packet_1(call, skb, seq, req);
+ break;
case RXRPC_SECURITY_ENCRYPT:
- return rxkad_verify_packet_2(call, skb, offset, len, seq, req);
+ ret = rxkad_verify_packet_2(call, skb, seq, req);
+ break;
default:
- return -ENOANO;
+ ret = -ENOANO;
+ break;
}
+ skcipher_request_free(req);
+ return ret;
+
protocol_error:
if (aborted)
rxrpc_send_abort_packet(call);
@@ -678,52 +657,6 @@ protocol_error:
}
/*
- * Locate the data contained in a packet that was partially encrypted.
- */
-static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len)
-{
- struct rxkad_level1_hdr sechdr;
-
- if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
- BUG();
- *_offset += sizeof(sechdr);
- *_len = ntohl(sechdr.data_size) & 0xffff;
-}
-
-/*
- * Locate the data contained in a packet that was completely encrypted.
- */
-static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len)
-{
- struct rxkad_level2_hdr sechdr;
-
- if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
- BUG();
- *_offset += sizeof(sechdr);
- *_len = ntohl(sechdr.data_size) & 0xffff;
-}
-
-/*
- * Locate the data contained in an already decrypted packet.
- */
-static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int *_offset, unsigned int *_len)
-{
- switch (call->conn->params.security_level) {
- case RXRPC_SECURITY_AUTH:
- rxkad_locate_data_1(call, skb, _offset, _len);
- return;
- case RXRPC_SECURITY_ENCRYPT:
- rxkad_locate_data_2(call, skb, _offset, _len);
- return;
- default:
- return;
- }
-}
-
-/*
* issue a challenge
*/
static int rxkad_issue_challenge(struct rxrpc_connection *conn)
@@ -1234,7 +1167,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
ticket, ticket_len) < 0)
- goto protocol_error_free;
ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len,
&session_key, &expiry, _abort_code);
@@ -1397,7 +1329,6 @@ const struct rxrpc_security rxkad = {
.secure_packet = rxkad_secure_packet,
.verify_packet = rxkad_verify_packet,
.free_call_crypto = rxkad_free_call_crypto,
- .locate_data = rxkad_locate_data,
.issue_challenge = rxkad_issue_challenge,
.respond_to_challenge = rxkad_respond_to_challenge,
.verify_response = rxkad_verify_response,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 3c3a626459de..e5fd8a95bf71 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -22,10 +22,26 @@
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
{
- unsigned int win_size =
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra);
- rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack);
+ unsigned int win_size;
+ rxrpc_seq_t tx_win = smp_load_acquire(&call->acks_hard_ack);
+
+ /* If we haven't transmitted anything for >1RTT, we should reset the
+ * congestion management state.
+ */
+ if (ktime_before(ktime_add_us(call->tx_last_sent,
+ call->peer->srtt_us >> 3),
+ ktime_get_real())) {
+ if (RXRPC_TX_SMSS > 2190)
+ win_size = 2;
+ else if (RXRPC_TX_SMSS > 1095)
+ win_size = 3;
+ else
+ win_size = 4;
+ win_size += call->cong_extra;
+ } else {
+ win_size = min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ }
if (_tx_win)
*_tx_win = tx_win;
@@ -50,7 +66,12 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
if (signal_pending(current))
return sock_intr_errno(*timeo);
- trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
+ rxrpc_shrink_call_tx_buffer(call);
+ continue;
+ }
+
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
}
@@ -71,12 +92,11 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
rtt = 2;
timeout = rtt;
- tx_start = READ_ONCE(call->tx_hard_ack);
+ tx_start = smp_load_acquire(&call->acks_hard_ack);
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
- tx_win = READ_ONCE(call->tx_hard_ack);
if (rxrpc_check_tx_space(call, &tx_win))
return 0;
@@ -87,12 +107,17 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
tx_win == tx_start && signal_pending(current))
return -EINTR;
+ if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
+ rxrpc_shrink_call_tx_buffer(call);
+ continue;
+ }
+
if (tx_win != tx_start) {
timeout = rtt;
tx_start = tx_win;
}
- trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
timeout = schedule_timeout(timeout);
}
}
@@ -112,7 +137,12 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) {
+ rxrpc_shrink_call_tx_buffer(call);
+ continue;
+ }
+
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
*timeo = schedule_timeout(*timeo);
}
}
@@ -129,8 +159,8 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
DECLARE_WAITQUEUE(myself, current);
int ret;
- _enter(",{%u,%u,%u}",
- call->tx_hard_ack, call->tx_top, call->tx_winsize);
+ _enter(",{%u,%u,%u,%u}",
+ call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize);
add_wait_queue(&call->waitq, &myself);
@@ -155,24 +185,6 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
}
/*
- * Schedule an instant Tx resend.
- */
-static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
-{
- spin_lock_bh(&call->lock);
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->rxtx_annotations[ix] =
- (call->rxtx_annotations[ix] & RXRPC_TX_ANNO_LAST) |
- RXRPC_TX_ANNO_RETRANS;
- if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
- rxrpc_queue_call(call);
- }
-
- spin_unlock_bh(&call->lock);
-}
-
-/*
* Notify the owner of the call that the transmit phase is ended and the last
* packet has been queued.
*/
@@ -188,38 +200,35 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
* the packet immediately. Returns the error from rxrpc_send_data_packet()
* in case the caller wants to do something with it.
*/
-static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
- struct sk_buff *skb, bool last,
- rxrpc_notify_end_tx_t notify_end_tx)
+static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ rxrpc_notify_end_tx_t notify_end_tx)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned long now;
- rxrpc_seq_t seq = sp->hdr.seq;
- int ret, ix;
- u8 annotation = RXRPC_TX_ANNO_UNACK;
+ rxrpc_seq_t seq = txb->seq;
+ bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags);
+ int ret;
- _net("queue skb %p [%d]", skb, seq);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data);
ASSERTCMP(seq, ==, call->tx_top + 1);
- if (last)
- annotation |= RXRPC_TX_ANNO_LAST;
-
/* We have to set the timestamp before queueing as the retransmit
* algorithm can see the packet as soon as we queue it.
*/
- skb->tstamp = ktime_get_real();
+ txb->last_sent = ktime_get_real();
- ix = seq & RXRPC_RXTX_BUFF_MASK;
- rxrpc_get_skb(skb, rxrpc_skb_got);
- call->rxtx_annotations[ix] = annotation;
- smp_wmb();
- call->rxtx_buffer[ix] = skb;
+ /* Add the packet to the call's output buffer */
+ rxrpc_get_txbuf(txb, rxrpc_txbuf_get_buffer);
+ spin_lock(&call->tx_lock);
+ list_add_tail(&txb->call_link, &call->tx_buffer);
call->tx_top = seq;
+ spin_unlock(&call->tx_lock);
+
if (last)
- trace_rxrpc_transmit(call, rxrpc_transmit_queue_last);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last);
else
- trace_rxrpc_transmit(call, rxrpc_transmit_queue);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_queue);
if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
_debug("________awaiting reply/ACK__________");
@@ -232,7 +241,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
call->state = RXRPC_CALL_SERVER_SEND_REPLY;
now = jiffies;
- WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET);
+ WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
if (call->ackr_reason == RXRPC_ACK_DELAY)
call->ackr_reason = 0;
trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
@@ -252,7 +261,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
if (seq == 1 && rxrpc_is_client_call(call))
rxrpc_expose_client_call(call);
- ret = rxrpc_send_data_packet(call, skb, false);
+ ret = rxrpc_send_data_packet(call, txb);
if (ret < 0) {
switch (ret) {
case -ENETUNREACH:
@@ -262,8 +271,6 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
0, ret);
goto out;
}
- _debug("need instant resend %d", ret);
- rxrpc_instant_resend(call, ix);
} else {
unsigned long now = jiffies;
unsigned long resend_at = now + call->peer->rto_j;
@@ -274,9 +281,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
}
out:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
- _leave(" = %d", ret);
- return ret;
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans);
}
/*
@@ -290,8 +295,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
rxrpc_notify_end_tx_t notify_end_tx,
bool *_dropped_lock)
{
- struct rxrpc_skb_priv *sp;
- struct sk_buff *skb;
+ struct rxrpc_txbuf *txb;
struct sock *sk = &rx->sk;
enum rxrpc_call_state state;
long timeo;
@@ -325,16 +329,15 @@ reload:
goto maybe_error;
}
- skb = call->tx_pending;
+ txb = call->tx_pending;
call->tx_pending = NULL;
- rxrpc_see_skb(skb, rxrpc_skb_seen);
+ if (txb)
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more);
do {
- /* Check to see if there's a ping ACK to reply to. */
- if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
- rxrpc_send_ack_packet(call, false, NULL);
+ rxrpc_transmit_ack_packets(call->peer->local);
- if (!skb) {
+ if (!txb) {
size_t remain, bufsize, chunk, offset;
_debug("alloc");
@@ -355,53 +358,31 @@ reload:
_debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset);
/* create a buffer that we can retain until it's ACK'd */
- skb = sock_alloc_send_skb(
- sk, bufsize, msg->msg_flags & MSG_DONTWAIT, &ret);
- if (!skb)
+ ret = -ENOMEM;
+ txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA,
+ GFP_KERNEL);
+ if (!txb)
goto maybe_error;
- sp = rxrpc_skb(skb);
- sp->rx_flags |= RXRPC_SKB_TX_BUFFER;
- rxrpc_new_skb(skb, rxrpc_skb_new);
-
- _debug("ALLOC SEND %p", skb);
-
- ASSERTCMP(skb->mark, ==, 0);
-
- __skb_put(skb, offset);
-
- sp->remain = chunk;
- if (sp->remain > skb_tailroom(skb))
- sp->remain = skb_tailroom(skb);
-
- _net("skb: hr %d, tr %d, hl %d, rm %d",
- skb_headroom(skb),
- skb_tailroom(skb),
- skb_headlen(skb),
- sp->remain);
-
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ txb->offset = offset;
+ txb->space -= offset;
+ txb->space = min_t(size_t, chunk, txb->space);
}
_debug("append");
- sp = rxrpc_skb(skb);
/* append next segment of data to the current buffer */
if (msg_data_left(msg) > 0) {
- int copy = skb_tailroom(skb);
- ASSERTCMP(copy, >, 0);
- if (copy > msg_data_left(msg))
- copy = msg_data_left(msg);
- if (copy > sp->remain)
- copy = sp->remain;
-
- _debug("add");
- ret = skb_add_data(skb, &msg->msg_iter, copy);
- _debug("added");
- if (ret < 0)
+ size_t copy = min_t(size_t, txb->space, msg_data_left(msg));
+
+ _debug("add %zu", copy);
+ if (!copy_from_iter_full(txb->data + txb->offset, copy,
+ &msg->msg_iter))
goto efault;
- sp->remain -= copy;
- skb->mark += copy;
+ _debug("added");
+ txb->space -= copy;
+ txb->len += copy;
+ txb->offset += copy;
copied += copy;
if (call->tx_total_len != -1)
call->tx_total_len -= copy;
@@ -413,32 +394,22 @@ reload:
goto call_terminated;
/* add the packet to the send queue if it's now full */
- if (sp->remain <= 0 ||
+ if (!txb->space ||
(msg_data_left(msg) == 0 && !more)) {
- struct rxrpc_connection *conn = call->conn;
- uint32_t seq;
-
- seq = call->tx_top + 1;
-
- sp->hdr.seq = seq;
- sp->hdr._rsvd = 0;
- sp->hdr.flags = conn->out_clientflag;
-
- if (msg_data_left(msg) == 0 && !more)
- sp->hdr.flags |= RXRPC_LAST_PACKET;
- else if (call->tx_top - call->tx_hard_ack <
+ if (msg_data_left(msg) == 0 && !more) {
+ txb->wire.flags |= RXRPC_LAST_PACKET;
+ __set_bit(RXRPC_TXBUF_LAST, &txb->flags);
+ }
+ else if (call->tx_top - call->acks_hard_ack <
call->tx_winsize)
- sp->hdr.flags |= RXRPC_MORE_PACKETS;
+ txb->wire.flags |= RXRPC_MORE_PACKETS;
- ret = call->security->secure_packet(call, skb, skb->mark);
+ ret = call->security->secure_packet(call, txb);
if (ret < 0)
goto out;
- ret = rxrpc_queue_packet(rx, call, skb,
- !msg_data_left(msg) && !more,
- notify_end_tx);
- /* Should check for failure here */
- skb = NULL;
+ rxrpc_queue_packet(rx, call, txb, notify_end_tx);
+ txb = NULL;
}
} while (msg_data_left(msg) > 0);
@@ -451,12 +422,12 @@ success:
read_unlock_bh(&call->state_lock);
}
out:
- call->tx_pending = skb;
+ call->tx_pending = txb;
_leave(" = %d", ret);
return ret;
call_terminated:
- rxrpc_free_skb(skb, rxrpc_skb_freed);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_send_aborted);
_leave(" = %d", call->error);
return call->error;
@@ -645,7 +616,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
*/
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
- __releases(&call->user_mutex)
{
enum rxrpc_call_state state;
struct rxrpc_call *call;
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 580a5acffee7..0c827d5bb2b8 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -14,8 +14,7 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-#define is_tx_skb(skb) (rxrpc_skb(skb)->rx_flags & RXRPC_SKB_TX_BUFFER)
-#define select_skb_count(skb) (is_tx_skb(skb) ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs)
+#define select_skb_count(skb) (&rxrpc_n_rx_skbs)
/*
* Note the allocation or reception of a socket buffer.
@@ -24,8 +23,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
- rxrpc_skb(skb)->rx_flags, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
}
/*
@@ -36,8 +34,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
const void *here = __builtin_return_address(0);
if (skb) {
int n = atomic_read(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
- rxrpc_skb(skb)->rx_flags, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
}
}
@@ -48,8 +45,7 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
- rxrpc_skb(skb)->rx_flags, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
skb_get(skb);
}
@@ -60,7 +56,7 @@ void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(&rxrpc_n_rx_skbs);
- trace_rxrpc_skb(skb, op, 0, n, 0, here);
+ trace_rxrpc_skb(skb, op, 0, n, here);
}
/*
@@ -72,8 +68,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
if (skb) {
int n;
n = atomic_dec_return(select_skb_count(skb));
- trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
- rxrpc_skb(skb)->rx_flags, here);
+ trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
kfree_skb(skb);
}
}
@@ -88,8 +83,7 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
while ((skb = skb_dequeue((list))) != NULL) {
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, rxrpc_skb_purged,
- refcount_read(&skb->users), n,
- rxrpc_skb(skb)->rx_flags, here);
+ refcount_read(&skb->users), n, here);
kfree_skb(skb);
}
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 555e0910786b..cde3224a5cd2 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -14,7 +14,7 @@ static struct ctl_table_header *rxrpc_sysctl_reg_table;
static const unsigned int four = 4;
static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
-static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
+static const unsigned int n_max_acks = 255;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
@@ -27,15 +27,6 @@ static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
static struct ctl_table rxrpc_sysctl_table[] = {
/* Values measured in milliseconds but used in jiffies */
{
- .procname = "req_ack_delay",
- .data = &rxrpc_requested_ack_delay,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
- },
- {
.procname = "soft_ack_delay",
.data = &rxrpc_soft_ack_delay,
.maxlen = sizeof(unsigned long),
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
new file mode 100644
index 000000000000..96bfee89927b
--- /dev/null
+++ b/net/rxrpc/txbuf.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC Tx data buffering.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+static atomic_t rxrpc_txbuf_debug_ids;
+atomic_t rxrpc_nr_txbuf;
+
+/*
+ * Allocate and partially initialise an I/O request structure.
+ */
+struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
+ gfp_t gfp)
+{
+ struct rxrpc_txbuf *txb;
+
+ txb = kmalloc(sizeof(*txb), gfp);
+ if (txb) {
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->call = call;
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = sizeof(txb->data);
+ txb->len = 0;
+ txb->offset = 0;
+ txb->flags = 0;
+ txb->ack_why = 0;
+ txb->seq = call->tx_top + 1;
+ txb->wire.epoch = htonl(call->conn->proto.epoch);
+ txb->wire.cid = htonl(call->cid);
+ txb->wire.callNumber = htonl(call->call_id);
+ txb->wire.seq = htonl(txb->seq);
+ txb->wire.type = packet_type;
+ txb->wire.flags = call->conn->out_clientflag;
+ txb->wire.userStatus = 0;
+ txb->wire.securityIndex = call->security_ix;
+ txb->wire._rsvd = 0;
+ txb->wire.serviceId = htons(call->service_id);
+
+ trace_rxrpc_txbuf(txb->debug_id,
+ txb->call_debug_id, txb->seq, 1,
+ packet_type == RXRPC_PACKET_TYPE_DATA ?
+ rxrpc_txbuf_alloc_data :
+ rxrpc_txbuf_alloc_ack);
+ atomic_inc(&rxrpc_nr_txbuf);
+ }
+
+ return txb;
+}
+
+void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
+{
+ int r;
+
+ __refcount_inc(&txb->ref, &r);
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r + 1, what);
+}
+
+void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
+{
+ int r = refcount_read(&txb->ref);
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what);
+}
+
+static void rxrpc_free_txbuf(struct rcu_head *rcu)
+{
+ struct rxrpc_txbuf *txb = container_of(rcu, struct rxrpc_txbuf, rcu);
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0,
+ rxrpc_txbuf_free);
+ kfree(txb);
+ atomic_dec(&rxrpc_nr_txbuf);
+}
+
+void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
+{
+ unsigned int debug_id, call_debug_id;
+ rxrpc_seq_t seq;
+ bool dead;
+ int r;
+
+ if (txb) {
+ debug_id = txb->debug_id;
+ call_debug_id = txb->call_debug_id;
+ seq = txb->seq;
+ dead = __refcount_dec_and_test(&txb->ref, &r);
+ trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what);
+ if (dead)
+ call_rcu(&txb->rcu, rxrpc_free_txbuf);
+ }
+}
+
+/*
+ * Shrink the transmit buffer.
+ */
+void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
+{
+ struct rxrpc_txbuf *txb;
+ rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack);
+
+ _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
+
+ for (;;) {
+ spin_lock(&call->tx_lock);
+ txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link);
+ if (!txb)
+ break;
+ hard_ack = smp_load_acquire(&call->acks_hard_ack);
+ if (before(hard_ack, txb->seq))
+ break;
+
+ ASSERTCMP(txb->seq, ==, call->tx_bottom + 1);
+ call->tx_bottom++;
+ list_del_rcu(&txb->call_link);
+
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
+
+ spin_unlock(&call->tx_lock);
+
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
+ }
+
+ spin_unlock(&call->tx_lock);
+}