summaryrefslogtreecommitdiff
path: root/net/rxrpc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 17:44:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-12 17:44:08 -0700
commit9187210eee7d87eea37b45ea93454a88681894a4 (patch)
tree31b4610e62cdd5e1dfb700014aa619e41145d7d3 /net/rxrpc
parent1f440397665f4241346e4cc6d93f8b73880815d1 (diff)
parented1f164038b50c5864aa85389f3ffd456f050cca (diff)
Merge tag 'net-next-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Large effort by Eric to lower rtnl_lock pressure and remove locks: - Make commonly used parts of rtnetlink (address, route dumps etc) lockless, protected by RCU instead of rtnl_lock. - Add a netns exit callback which already holds rtnl_lock, allowing netns exit to take rtnl_lock once in the core instead of once for each driver / callback. - Remove locks / serialization in the socket diag interface. - Remove 6 calls to synchronize_rcu() while holding rtnl_lock. - Remove the dev_base_lock, depend on RCU where necessary. - Support busy polling on a per-epoll context basis. Poll length and budget parameters can be set independently of system defaults. - Introduce struct net_hotdata, to make sure read-mostly global config variables fit in as few cache lines as possible. - Add optional per-nexthop statistics to ease monitoring / debug of ECMP imbalance problems. - Support TCP_NOTSENT_LOWAT in MPTCP. - Ensure that IPv6 temporary addresses' preferred lifetimes are long enough, compared to other configured lifetimes, and at least 2 sec. - Support forwarding of ICMP Error messages in IPSec, per RFC 4301. - Add support for the independent control state machine for bonding per IEEE 802.1AX-2008 5.4.15 in addition to the existing coupled control state machine. - Add "network ID" to MCTP socket APIs to support hosts with multiple disjoint MCTP networks. - Re-use the mono_delivery_time skbuff bit for packets which user space wants to be sent at a specified time. Maintain the timing information while traversing veth links, bridge etc. - Take advantage of MSG_SPLICE_PAGES for RxRPC DATA and ACK packets. - Simplify many places iterating over netdevs by using an xarray instead of a hash table walk (hash table remains in place, for use on fastpaths). - Speed up scanning for expired routes by keeping a dedicated list. - Speed up "generic" XDP by trying harder to avoid large allocations. - Support attaching arbitrary metadata to netconsole messages. Things we sprinkled into general kernel code: - Enforce VM_IOREMAP flag and range in ioremap_page_range and introduce VM_SPARSE kind and vm_area_[un]map_pages (used by bpf_arena). - Rework selftest harness to enable the use of the full range of ksft exit code (pass, fail, skip, xfail, xpass). Netfilter: - Allow userspace to define a table that is exclusively owned by a daemon (via netlink socket aliveness) without auto-removing this table when the userspace program exits. Such table gets marked as orphaned and a restarting management daemon can re-attach/regain ownership. - Speed up element insertions to nftables' concatenated-ranges set type. Compact a few related data structures. BPF: - Add BPF token support for delegating a subset of BPF subsystem functionality from privileged system-wide daemons such as systemd through special mount options for userns-bound BPF fs to a trusted & unprivileged application. - Introduce bpf_arena which is sparse shared memory region between BPF program and user space where structures inside the arena can have pointers to other areas of the arena, and pointers work seamlessly for both user-space programs and BPF programs. - Introduce may_goto instruction that is a contract between the verifier and the program. The verifier allows the program to loop assuming it's behaving well, but reserves the right to terminate it. - Extend the BPF verifier to enable static subprog calls in spin lock critical sections. - Support registration of struct_ops types from modules which helps projects like fuse-bpf that seeks to implement a new struct_ops type. - Add support for retrieval of cookies for perf/kprobe multi links. - Support arbitrary TCP SYN cookie generation / validation in the TC layer with BPF to allow creating SYN flood handling in BPF firewalls. - Add code generation to inline the bpf_kptr_xchg() helper which improves performance when stashing/popping the allocated BPF objects. Wireless: - Add SPP (signaling and payload protected) AMSDU support. - Support wider bandwidth OFDMA, as required for EHT operation. Driver API: - Major overhaul of the Energy Efficient Ethernet internals to support new link modes (2.5GE, 5GE), share more code between drivers (especially those using phylib), and encourage more uniform behavior. Convert and clean up drivers. - Define an API for querying per netdev queue statistics from drivers. - IPSec: account in global stats for fully offloaded sessions. - Create a concept of Ethernet PHY Packages at the Device Tree level, to allow parameterizing the existing PHY package code. - Enable Rx hashing (RSS) on GTP protocol fields. Misc: - Improvements and refactoring all over networking selftests. - Create uniform module aliases for TC classifiers, actions, and packet schedulers to simplify creating modprobe policies. - Address all missing MODULE_DESCRIPTION() warnings in networking. - Extend the Netlink descriptions in YAML to cover message encapsulation or "Netlink polymorphism", where interpretation of nested attributes depends on link type, classifier type or some other "class type". Drivers: - Ethernet high-speed NICs: - Add a new driver for Marvell's Octeon PCI Endpoint NIC VF. - Intel (100G, ice, idpf): - support E825-C devices - nVidia/Mellanox: - support devices with one port and multiple PCIe links - Broadcom (bnxt): - support n-tuple filters - support configuring the RSS key - Wangxun (ngbe/txgbe): - implement irq_domain for TXGBE's sub-interrupts - Pensando/AMD: - support XDP - optimize queue submission and wakeup handling (+17% bps) - optimize struct layout, saving 28% of memory on queues - Ethernet NICs embedded and virtual: - Google cloud vNIC: - refactor driver to perform memory allocations for new queue config before stopping and freeing the old queue memory - Synopsys (stmmac): - obey queueMaxSDU and implement counters required by 802.1Qbv - Renesas (ravb): - support packet checksum offload - suspend to RAM and runtime PM support - Ethernet switches: - nVidia/Mellanox: - support for nexthop group statistics - Microchip: - ksz8: implement PHY loopback - add support for KSZ8567, a 7-port 10/100Mbps switch - PTP: - New driver for RENESAS FemtoClock3 Wireless clock generator. - Support OCP PTP cards designed and built by Adva. - CAN: - Support recvmsg() flags for own, local and remote traffic on CAN BCM sockets. - Support for esd GmbH PCIe/402 CAN device family. - m_can: - Rx/Tx submission coalescing - wake on frame Rx - WiFi: - Intel (iwlwifi): - enable signaling and payload protected A-MSDUs - support wider-bandwidth OFDMA - support for new devices - bump FW API to 89 for AX devices; 90 for BZ/SC devices - MediaTek (mt76): - mt7915: newer ADIE version support - mt7925: radio temperature sensor support - Qualcomm (ath11k): - support 6 GHz station power modes: Low Power Indoor (LPI), Standard Power) SP and Very Low Power (VLP) - QCA6390 & WCN6855: support 2 concurrent station interfaces - QCA2066 support - Qualcomm (ath12k): - refactoring in preparation for Multi-Link Operation (MLO) support - 1024 Block Ack window size support - firmware-2.bin support - support having multiple identical PCI devices (firmware needs to have ATH12K_FW_FEATURE_MULTI_QRTR_ID) - QCN9274: support split-PHY devices - WCN7850: enable Power Save Mode in station mode - WCN7850: P2P support - RealTek: - rtw88: support for more rtw8811cu and rtw8821cu devices - rtw89: support SCAN_RANDOM_SN and SET_SCAN_DWELL - rtlwifi: speed up USB firmware initialization - rtwl8xxxu: - RTL8188F: concurrent interface support - Channel Switch Announcement (CSA) support in AP mode - Broadcom (brcmfmac): - per-vendor feature support - per-vendor SAE password setup - DMI nvram filename quirk for ACEPC W5 Pro" * tag 'net-next-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2255 commits) nexthop: Fix splat with CONFIG_DEBUG_PREEMPT=y nexthop: Fix out-of-bounds access during attribute validation nexthop: Only parse NHA_OP_FLAGS for dump messages that require it nexthop: Only parse NHA_OP_FLAGS for get messages that require it bpf: move sleepable flag from bpf_prog_aux to bpf_prog bpf: hardcode BPF_PROG_PACK_SIZE to 2MB * num_possible_nodes() selftests/bpf: Add kprobe multi triggering benchmarks ptp: Move from simple ida to xarray vxlan: Remove generic .ndo_get_stats64 vxlan: Do not alloc tstats manually devlink: Add comments to use netlink gen tool nfp: flower: handle acti_netdevs allocation failure net/packet: Add getsockopt support for PACKET_COPY_THRESH net/netlink: Add getsockopt support for NETLINK_LISTEN_ALL_NSID selftests/bpf: Add bpf_arena_htab test. selftests/bpf: Add bpf_arena_list test. selftests/bpf: Add unit tests for bpf_arena_alloc/free_pages bpf: Add helper macro bpf_addr_space_cast() libbpf: Recognize __arena global variables. bpftool: Recognize arena map type ...
Diffstat (limited to 'net/rxrpc')
-rw-r--r--net/rxrpc/af_rxrpc.c12
-rw-r--r--net/rxrpc/ar-internal.h88
-rw-r--r--net/rxrpc/call_event.c327
-rw-r--r--net/rxrpc/call_object.c56
-rw-r--r--net/rxrpc/conn_client.c4
-rw-r--r--net/rxrpc/conn_event.c16
-rw-r--r--net/rxrpc/conn_object.c4
-rw-r--r--net/rxrpc/input.c116
-rw-r--r--net/rxrpc/insecure.c11
-rw-r--r--net/rxrpc/io_thread.c11
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/misc.c8
-rw-r--r--net/rxrpc/output.c441
-rw-r--r--net/rxrpc/proc.c10
-rw-r--r--net/rxrpc/protocol.h6
-rw-r--r--net/rxrpc/rtt.c36
-rw-r--r--net/rxrpc/rxkad.c57
-rw-r--r--net/rxrpc/sendmsg.c63
-rw-r--r--net/rxrpc/sysctl.c16
-rw-r--r--net/rxrpc/txbuf.c174
20 files changed, 746 insertions, 713 deletions
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 465bfe5eb061..5222bc97d192 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -487,7 +487,7 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
* rxrpc_kernel_set_max_life - Set maximum lifespan on a call
* @sock: The socket the call is on
* @call: The call to configure
- * @hard_timeout: The maximum lifespan of the call in jiffies
+ * @hard_timeout: The maximum lifespan of the call in ms
*
* Set the maximum lifespan of a call. The call will end with ETIME or
* ETIMEDOUT if it takes longer than this.
@@ -495,14 +495,14 @@ EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call,
unsigned long hard_timeout)
{
- unsigned long now;
+ ktime_t delay = ms_to_ktime(hard_timeout), expect_term_by;
mutex_lock(&call->user_mutex);
- now = jiffies;
- hard_timeout += now;
- WRITE_ONCE(call->expect_term_by, hard_timeout);
- rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard);
+ expect_term_by = ktime_add(ktime_get_real(), delay);
+ WRITE_ONCE(call->expect_term_by, expect_term_by);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ rxrpc_poke_call(call, rxrpc_call_poke_set_timeout);
mutex_unlock(&call->user_mutex);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7818aae1be8e..08c0a32db8c7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -198,8 +198,8 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
- struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
union {
+ struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
struct {
u16 offset; /* Offset of data */
u16 len; /* Length of data */
@@ -208,9 +208,12 @@ struct rxrpc_skb_priv {
};
struct {
rxrpc_seq_t first_ack; /* First packet in acks table */
+ rxrpc_seq_t prev_ack; /* Highest seq seen */
+ rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */
+ u8 reason; /* Reason for ack */
u8 nr_acks; /* Number of acks+nacks */
u8 nr_nacks; /* Number of nacks */
- };
+ } ack;
};
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -248,10 +251,9 @@ struct rxrpc_security {
struct rxrpc_key_token *);
/* Work out how much data we can store in a packet, given an estimate
- * of the amount of data remaining.
+ * of the amount of data remaining and allocate a data buffer.
*/
- int (*how_much_data)(struct rxrpc_call *, size_t,
- size_t *, size_t *, size_t *);
+ struct rxrpc_txbuf *(*alloc_txbuf)(struct rxrpc_call *call, size_t remaining, gfp_t gfp);
/* impose security on a packet */
int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *);
@@ -292,6 +294,7 @@ struct rxrpc_local {
struct socket *socket; /* my UDP socket */
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
+ struct page_frag_cache tx_alloc; /* Tx control packet allocation (I/O thread only) */
struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
struct sk_buff_head rx_delay_queue; /* Delay injection queue */
@@ -352,8 +355,8 @@ struct rxrpc_peer {
u32 mdev_us; /* medium deviation */
u32 mdev_max_us; /* maximal mdev for the last rtt period */
u32 rttvar_us; /* smoothed mdev_max */
- u32 rto_j; /* Retransmission timeout in jiffies */
- u8 backoff; /* Backoff timeout */
+ u32 rto_us; /* Retransmission timeout in usec */
+ u8 backoff; /* Backoff timeout (as shift) */
u8 cong_ssthresh; /* Congestion slow-start threshold */
};
@@ -500,6 +503,8 @@ struct rxrpc_connection {
struct list_head proc_link; /* link in procfs list */
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct page_frag_cache tx_data_alloc; /* Tx DATA packet allocation */
+ struct mutex tx_data_alloc_lock;
struct mutex security_lock; /* Lock for security management */
const struct rxrpc_security *security; /* applied security module */
@@ -618,17 +623,17 @@ struct rxrpc_call {
const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
struct sockaddr_rxrpc dest_srx; /* Destination address */
- unsigned long delay_ack_at; /* When DELAY ACK needs to happen */
- unsigned long ack_lost_at; /* When ACK is figured as lost */
- unsigned long resend_at; /* When next resend needs to happen */
- unsigned long ping_at; /* When next to send a ping */
- unsigned long keepalive_at; /* When next to send a keepalive ping */
- unsigned long expect_rx_by; /* When we expect to get a packet by */
- unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
- unsigned long expect_term_by; /* When we expect call termination by */
- u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
- u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
- u32 hard_timo; /* Maximum lifetime or 0 (jif) */
+ ktime_t delay_ack_at; /* When DELAY ACK needs to happen */
+ ktime_t ack_lost_at; /* When ACK is figured as lost */
+ ktime_t resend_at; /* When next resend needs to happen */
+ ktime_t ping_at; /* When next to send a ping */
+ ktime_t keepalive_at; /* When next to send a keepalive ping */
+ ktime_t expect_rx_by; /* When we expect to get a packet by */
+ ktime_t expect_req_by; /* When we expect to get a request DATA packet by */
+ ktime_t expect_term_by; /* When we expect call termination by */
+ u32 next_rx_timo; /* Timeout for next Rx packet (ms) */
+ u32 next_req_timo; /* Timeout for next Rx request packet (ms) */
+ u32 hard_timo; /* Maximum lifetime or 0 (s) */
struct timer_list timer; /* Combined event timer */
struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -673,7 +678,7 @@ struct rxrpc_call {
rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
- u16 tx_backoff; /* Delay to insert due to Tx failure */
+ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */
u8 tx_winsize; /* Maximum size of Tx window */
#define RXRPC_TX_MAX_WINDOW 128
ktime_t tx_last_sent; /* Last time a transmission occurred */
@@ -788,40 +793,30 @@ struct rxrpc_send_params {
* Buffer of data to be output as a packet.
*/
struct rxrpc_txbuf {
- struct rcu_head rcu;
struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
struct list_head tx_link; /* Link in live Enc queue or Tx queue */
ktime_t last_sent; /* Time at which last transmitted */
refcount_t ref;
rxrpc_seq_t seq; /* Sequence number of this packet */
+ rxrpc_serial_t serial; /* Last serial number transmitted with */
unsigned int call_debug_id;
unsigned int debug_id;
unsigned int len; /* Amount of data in buffer */
unsigned int space; /* Remaining data space */
unsigned int offset; /* Offset of fill point */
- unsigned long flags;
-#define RXRPC_TXBUF_LAST 0 /* Set if last packet in Tx phase */
-#define RXRPC_TXBUF_RESENT 1 /* Set if has been resent */
+ unsigned int flags;
+#define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */
+#define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */
+ __be16 cksum; /* Checksum to go in header */
+ unsigned short ack_rwind; /* ACK receive window */
u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */
- struct {
- /* The packet for encrypting and DMA'ing. We align it such
- * that data[] aligns correctly for any crypto blocksize.
- */
- u8 pad[64 - sizeof(struct rxrpc_wire_header)];
- struct rxrpc_wire_header wire; /* Network-ready header */
- union {
- u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */
- struct {
- struct rxrpc_ackpacket ack;
- DECLARE_FLEX_ARRAY(u8, acks);
- };
- };
- } __aligned(64);
+ u8 nr_kvec; /* Amount of kvec[] used */
+ struct kvec kvec[3];
};
static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb)
{
- return txb->wire.flags & RXRPC_CLIENT_INITIATED;
+ return txb->flags & RXRPC_CLIENT_INITIATED;
}
static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
@@ -869,17 +864,11 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
*/
void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why);
-void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ack_trace);
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
enum rxrpc_propose_ack_trace);
void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
-void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why);
-
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
@@ -1160,9 +1149,9 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
/*
* output.c
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why);
int rxrpc_send_abort_packet(struct rxrpc_call *);
-int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
@@ -1223,7 +1212,7 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
*/
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int,
rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
-unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool);
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans);
void rxrpc_peer_init_rtt(struct rxrpc_peer *);
/*
@@ -1295,8 +1284,9 @@ static inline void rxrpc_sysctl_exit(void) {}
* txbuf.c
*/
extern atomic_t rxrpc_nr_txbuf;
-struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
- gfp_t gfp);
+struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size,
+ size_t data_align, gfp_t gfp);
+struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size);
void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 0f78544d043b..7bbb68504766 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -23,14 +23,14 @@
void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
enum rxrpc_propose_ack_trace why)
{
- unsigned long now = jiffies;
- unsigned long ping_at = now + rxrpc_idle_ack_delay;
-
- if (time_before(ping_at, call->ping_at)) {
- WRITE_ONCE(call->ping_at, ping_at);
- rxrpc_reduce_call_timer(call, ping_at, now,
- rxrpc_timer_set_for_ping);
- trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
+ ktime_t delay = ms_to_ktime(READ_ONCE(rxrpc_idle_ack_delay));
+ ktime_t now = ktime_get_real();
+ ktime_t ping_at = ktime_add(now, delay);
+
+ trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
+ if (ktime_before(ping_at, call->ping_at)) {
+ call->ping_at = ping_at;
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_ping);
}
}
@@ -40,62 +40,18 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
enum rxrpc_propose_ack_trace why)
{
- unsigned long expiry = rxrpc_soft_ack_delay;
- unsigned long now = jiffies, ack_at;
-
- if (rxrpc_soft_ack_delay < expiry)
- expiry = rxrpc_soft_ack_delay;
- if (call->peer->srtt_us != 0)
- ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
- else
- ack_at = expiry;
-
- ack_at += READ_ONCE(call->tx_backoff);
- ack_at += now;
- if (time_before(ack_at, call->delay_ack_at)) {
- WRITE_ONCE(call->delay_ack_at, ack_at);
- rxrpc_reduce_call_timer(call, ack_at, now,
- rxrpc_timer_set_for_ack);
- }
+ ktime_t now = ktime_get_real(), delay;
trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial);
-}
-
-/*
- * Queue an ACK for immediate transmission.
- */
-void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
- rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
-{
- struct rxrpc_txbuf *txb;
-
- if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- return;
-
- rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
- txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK,
- rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
- if (!txb) {
- kleave(" = -ENOMEM");
- return;
- }
+ if (call->peer->srtt_us)
+ delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC;
+ else
+ delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay));
+ ktime_add_ms(delay, call->tx_backoff);
- txb->ack_why = why;
- txb->wire.seq = 0;
- txb->wire.type = RXRPC_PACKET_TYPE_ACK;
- txb->wire.flags |= RXRPC_SLOW_START_OK;
- txb->ack.bufferSpace = 0;
- txb->ack.maxSkew = 0;
- txb->ack.firstPacket = 0;
- txb->ack.previousPacket = 0;
- txb->ack.serial = htonl(serial);
- txb->ack.reason = ack_reason;
- txb->ack.nAcks = 0;
-
- trace_rxrpc_send_ack(call, why, ack_reason, serial);
- rxrpc_send_ack_packet(call, txb);
- rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
+ call->delay_ack_at = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack);
}
/*
@@ -114,25 +70,19 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
struct rxrpc_ackpacket *ack = NULL;
struct rxrpc_skb_priv *sp;
struct rxrpc_txbuf *txb;
- unsigned long resend_at;
- rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
- ktime_t now, max_age, oldest, ack_ts;
- bool unacked = false;
+ rxrpc_seq_t transmitted = call->tx_transmitted;
+ ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
+ ktime_t resend_at = KTIME_MAX, now, delay;
+ bool unacked = false, did_send = false;
unsigned int i;
- LIST_HEAD(retrans_queue);
_enter("{%d,%d}", call->acks_hard_ack, call->tx_top);
now = ktime_get_real();
- max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
- oldest = now;
if (list_empty(&call->tx_buffer))
goto no_resend;
- if (list_empty(&call->tx_buffer))
- goto no_further_resend;
-
trace_rxrpc_resend(call, ack_skb);
txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
@@ -143,12 +93,12 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
sp = rxrpc_skb(ack_skb);
ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- for (i = 0; i < sp->nr_acks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
rxrpc_seq_t seq;
if (ack->acks[i] & 1)
continue;
- seq = sp->first_ack + i;
+ seq = sp->ack.first_ack + i;
if (after(txb->seq, transmitted))
break;
if (after(txb->seq, seq))
@@ -160,19 +110,23 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
goto no_further_resend;
found_txb:
- if (after(ntohl(txb->wire.serial), call->acks_highest_serial))
+ resend_at = ktime_add(txb->last_sent, rto);
+ if (after(txb->serial, call->acks_highest_serial)) {
+ if (ktime_after(resend_at, now) &&
+ ktime_before(resend_at, next_resend))
+ next_resend = resend_at;
continue; /* Ack point not yet reached */
+ }
rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
- if (list_empty(&txb->tx_link)) {
- list_add_tail(&txb->tx_link, &retrans_queue);
- set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
- }
+ trace_rxrpc_retransmit(call, txb->seq, txb->serial,
+ ktime_sub(resend_at, now));
- trace_rxrpc_retransmit(call, txb->seq,
- ktime_to_ns(ktime_sub(txb->last_sent,
- max_age)));
+ txb->flags |= RXRPC_TXBUF_RESENT;
+ rxrpc_transmit_one(call, txb);
+ did_send = true;
+ now = ktime_get_real();
if (list_is_last(&txb->call_link, &call->tx_buffer))
goto no_further_resend;
@@ -184,43 +138,46 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* seen. Anything between the soft-ACK table and that point will get
* ACK'd or NACK'd in due course, so don't worry about it here; here we
* need to consider retransmitting anything beyond that point.
- *
- * Note that ACK for a packet can beat the update of tx_transmitted.
*/
- if (after_eq(READ_ONCE(call->acks_prev_seq), READ_ONCE(call->tx_transmitted)))
+ if (after_eq(call->acks_prev_seq, call->tx_transmitted))
goto no_further_resend;
list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
- if (before_eq(txb->seq, READ_ONCE(call->acks_prev_seq)))
+ resend_at = ktime_add(txb->last_sent, rto);
+
+ if (before_eq(txb->seq, call->acks_prev_seq))
continue;
- if (after(txb->seq, READ_ONCE(call->tx_transmitted)))
+ if (after(txb->seq, call->tx_transmitted))
break; /* Not transmitted yet */
if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE &&
- before(ntohl(txb->wire.serial), ntohl(ack->serial)))
+ before(txb->serial, ntohl(ack->serial)))
goto do_resend; /* Wasn't accounted for by a more recent ping. */
- if (ktime_after(txb->last_sent, max_age)) {
- if (ktime_before(txb->last_sent, oldest))
- oldest = txb->last_sent;
+ if (ktime_after(resend_at, now)) {
+ if (ktime_before(resend_at, next_resend))
+ next_resend = resend_at;
continue;
}
do_resend:
unacked = true;
- if (list_empty(&txb->tx_link)) {
- list_add_tail(&txb->tx_link, &retrans_queue);
- set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
- rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
- }
+
+ txb->flags |= RXRPC_TXBUF_RESENT;
+ rxrpc_transmit_one(call, txb);
+ did_send = true;
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
+ now = ktime_get_real();
}
no_further_resend:
no_resend:
- resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rxrpc_get_rto_backoff(call->peer,
- !list_empty(&retrans_queue));
- WRITE_ONCE(call->resend_at, resend_at);
+ if (resend_at < KTIME_MAX) {
+ delay = rxrpc_get_rto_backoff(call->peer, did_send);
+ resend_at = ktime_add(resend_at, delay);
+ trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset);
+ }
+ call->resend_at = resend_at;
if (unacked)
rxrpc_congestion_timeout(call);
@@ -229,25 +186,15 @@ no_resend:
* that an ACK got lost somewhere. Send a ping to find out instead of
* retransmitting data.
*/
- if (list_empty(&retrans_queue)) {
- rxrpc_reduce_call_timer(call, resend_at, jiffies,
- rxrpc_timer_set_for_resend);
- ack_ts = ktime_sub(now, call->acks_latest_ts);
- if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
- goto out;
- rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
- rxrpc_propose_ack_ping_for_lost_ack);
- goto out;
- }
+ if (!did_send) {
+ ktime_t next_ping = ktime_add_us(call->acks_latest_ts,
+ call->peer->srtt_us >> 3);
- /* Retransmit the queue */
- while ((txb = list_first_entry_or_null(&retrans_queue,
- struct rxrpc_txbuf, tx_link))) {
- list_del_init(&txb->tx_link);
- rxrpc_transmit_one(call, txb);
+ if (ktime_sub(next_ping, now) <= 0)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_0_retrans);
}
-out:
_leave("");
}
@@ -257,13 +204,11 @@ out:
*/
static void rxrpc_begin_service_reply(struct rxrpc_call *call)
{
- unsigned long now = jiffies;
-
rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY);
- WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
if (call->ackr_reason == RXRPC_ACK_DELAY)
call->ackr_reason = 0;
- trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
+ call->delay_ack_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack);
}
/*
@@ -320,7 +265,7 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
call->tx_top = txb->seq;
list_add_tail(&txb->call_link, &call->tx_buffer);
- if (txb->wire.flags & RXRPC_LAST_PACKET)
+ if (txb->flags & RXRPC_LAST_PACKET)
rxrpc_close_tx_phase(call);
rxrpc_transmit_one(call, txb);
@@ -372,8 +317,8 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
*/
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
- unsigned long now, next, t;
- bool resend = false, expired = false;
+ ktime_t now, t;
+ bool resend = false;
s32 abort_code;
rxrpc_see_call(call, rxrpc_call_see_input);
@@ -397,70 +342,73 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
goto out;
+ if (skb)
+ rxrpc_input_call_packet(call, skb);
+
/* If we see our async-event poke, check for timeout trippage. */
- now = jiffies;
- t = READ_ONCE(call->expect_rx_by);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
- expired = true;
+ now = ktime_get_real();
+ t = ktime_sub(call->expect_rx_by, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_expect_rx);
+ goto expired;
}
- t = READ_ONCE(call->expect_req_by);
- if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST &&
- time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
- expired = true;
+ t = ktime_sub(call->expect_req_by, now);
+ if (t <= 0) {
+ call->expect_req_by = KTIME_MAX;
+ if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_idle);
+ goto expired;
+ }
}
- t = READ_ONCE(call->expect_term_by);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
- expired = true;
+ t = ktime_sub(READ_ONCE(call->expect_term_by), now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_hard);
+ goto expired;
}
- t = READ_ONCE(call->delay_ack_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
- cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->delay_ack_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_delayed_ack);
+ call->delay_ack_at = KTIME_MAX;
rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
- rxrpc_propose_ack_ping_for_lost_ack);
+ rxrpc_propose_ack_delayed_ack);
}
- t = READ_ONCE(call->ack_lost_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
- cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->ack_lost_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack);
+ call->ack_lost_at = KTIME_MAX;
set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
}
- t = READ_ONCE(call->keepalive_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
- cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->ping_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping);
+ call->ping_at = KTIME_MAX;
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_keepalive);
}
- t = READ_ONCE(call->ping_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
- cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
- rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
- rxrpc_propose_ack_ping_for_keepalive);
- }
-
- t = READ_ONCE(call->resend_at);
- if (time_after_eq(now, t)) {
- trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
- cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
+ t = ktime_sub(call->resend_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend);
+ call->resend_at = KTIME_MAX;
resend = true;
}
- if (skb)
- rxrpc_input_call_packet(call, skb);
-
rxrpc_transmit_some_data(call);
+ now = ktime_get_real();
+ t = ktime_sub(call->keepalive_at, now);
+ if (t <= 0) {
+ trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_keepalive);
+ call->keepalive_at = KTIME_MAX;
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_keepalive);
+ }
+
if (skb) {
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -472,24 +420,13 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_initial_ping(call);
/* Process events */
- if (expired) {
- if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
- (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
- trace_rxrpc_call_reset(call);
- rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
- rxrpc_abort_call_reset);
- } else {
- rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
- rxrpc_abort_call_timeout);
- }
- goto out;
- }
-
if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY)
+ if (resend &&
+ __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY &&
+ !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
rxrpc_resend(call, NULL);
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
@@ -511,23 +448,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
/* Make sure the timer is restarted */
if (!__rxrpc_call_is_complete(call)) {
- next = call->expect_rx_by;
+ ktime_t next = READ_ONCE(call->expect_term_by), delay;
-#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
+#define set(T) { ktime_t _t = (T); if (ktime_before(_t, next)) next = _t; }
set(call->expect_req_by);
- set(call->expect_term_by);
+ set(call->expect_rx_by);
set(call->delay_ack_at);
set(call->ack_lost_at);
set(call->resend_at);
set(call->keepalive_at);
set(call->ping_at);
- now = jiffies;
- if (time_after_eq(now, next))
+ now = ktime_get_real();
+ delay = ktime_sub(next, now);
+ if (delay <= 0) {
rxrpc_poke_call(call, rxrpc_call_poke_timer_now);
-
- rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ } else {
+ unsigned long nowj = jiffies, delayj, nextj;
+
+ delayj = max(nsecs_to_jiffies(delay), 1);
+ nextj = nowj + delayj;
+ if (time_before(nextj, call->timer.expires) ||
+ !timer_pending(&call->timer)) {
+ trace_rxrpc_timer_restart(call, delay, delayj);
+ timer_reduce(&call->timer, nextj);
+ }
+ }
}
out:
@@ -542,4 +489,16 @@ out:
rxrpc_shrink_call_tx_buffer(call);
_leave("");
return true;
+
+expired:
+ if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
+ (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
+ trace_rxrpc_call_reset(call);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_reset);
+ } else {
+ rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+ rxrpc_abort_call_timeout);
+ }
+ goto out;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 9fc9a6c3f685..01fa71e8b1f7 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -70,20 +70,11 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
if (!__rxrpc_call_is_complete(call)) {
- trace_rxrpc_timer_expired(call, jiffies);
+ trace_rxrpc_timer_expired(call);
rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
}
-void rxrpc_reduce_call_timer(struct rxrpc_call *call,
- unsigned long expire_at,
- unsigned long now,
- enum rxrpc_timer_trace why)
-{
- trace_rxrpc_timer(call, why, now);
- timer_reduce(&call->timer, expire_at);
-}
-
static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
static void rxrpc_destroy_call(struct work_struct *);
@@ -163,12 +154,20 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
refcount_set(&call->ref, 1);
- call->debug_id = debug_id;
- call->tx_total_len = -1;
- call->next_rx_timo = 20 * HZ;
- call->next_req_timo = 1 * HZ;
- call->ackr_window = 1;
- call->ackr_wtop = 1;
+ call->debug_id = debug_id;
+ call->tx_total_len = -1;
+ call->next_rx_timo = 20 * HZ;
+ call->next_req_timo = 1 * HZ;
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
+ call->delay_ack_at = KTIME_MAX;
+ call->ack_lost_at = KTIME_MAX;
+ call->resend_at = KTIME_MAX;
+ call->ping_at = KTIME_MAX;
+ call->keepalive_at = KTIME_MAX;
+ call->expect_rx_by = KTIME_MAX;
+ call->expect_req_by = KTIME_MAX;
+ call->expect_term_by = KTIME_MAX;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -226,11 +225,11 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
__set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
if (p->timeouts.normal)
- call->next_rx_timo = min(msecs_to_jiffies(p->timeouts.normal), 1UL);
+ call->next_rx_timo = min(p->timeouts.normal, 1);
if (p->timeouts.idle)
- call->next_req_timo = min(msecs_to_jiffies(p->timeouts.idle), 1UL);
+ call->next_req_timo = min(p->timeouts.idle, 1);
if (p->timeouts.hard)
- call->hard_timo = p->timeouts.hard * HZ;
+ call->hard_timo = p->timeouts.hard;
ret = rxrpc_init_client_call_security(call);
if (ret < 0) {
@@ -253,18 +252,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
*/
void rxrpc_start_call_timer(struct rxrpc_call *call)
{
- unsigned long now = jiffies;
- unsigned long j = now + MAX_JIFFY_OFFSET;
-
- call->delay_ack_at = j;
- call->ack_lost_at = j;
- call->resend_at = j;
- call->ping_at = j;
- call->keepalive_at = j;
- call->expect_rx_by = j;
- call->expect_req_by = j;
- call->expect_term_by = j + call->hard_timo;
- call->timer.expires = now;
+ if (call->hard_timo) {
+ ktime_t delay = ms_to_ktime(call->hard_timo * 1000);
+
+ call->expect_term_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ }
+ call->timer.expires = jiffies;
}
/*
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3b9b267a4431..d25bf1cf3670 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -636,7 +636,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
unsigned long final_ack_at = jiffies + 2;
- WRITE_ONCE(chan->final_ack_at, final_ack_at);
+ chan->final_ack_at = final_ack_at;
smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
rxrpc_reduce_conn_timer(conn, final_ack_at);
@@ -770,7 +770,7 @@ next:
conn_expires_at = conn->idle_timestamp + expiry;
- now = READ_ONCE(jiffies);
+ now = jiffies;
if (time_after(conn_expires_at, now))
goto not_yet_expired;
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 1f251d758cb9..598b4ee389fc 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -88,7 +88,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
struct rxrpc_ackpacket ack;
};
} __attribute__((packed)) pkt;
- struct rxrpc_ackinfo ack_info;
+ struct rxrpc_acktrailer trailer;
size_t len;
int ret, ioc;
u32 serial, mtu, call_id, padding;
@@ -122,8 +122,8 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[0].iov_len = sizeof(pkt.whdr);
iov[1].iov_base = &padding;
iov[1].iov_len = 3;
- iov[2].iov_base = &ack_info;
- iov[2].iov_len = sizeof(ack_info);
+ iov[2].iov_base = &trailer;
+ iov[2].iov_len = sizeof(trailer);
serial = rxrpc_get_next_serial(conn);
@@ -158,14 +158,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
- ack_info.rxMTU = htonl(rxrpc_rx_mtu);
- ack_info.maxMTU = htonl(mtu);
- ack_info.rwind = htonl(rxrpc_rx_window_size);
- ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ trailer.maxMTU = htonl(rxrpc_rx_mtu);
+ trailer.ifMTU = htonl(mtu);
+ trailer.rwind = htonl(rxrpc_rx_window_size);
+ trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max);
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
padding = 0;
iov[0].iov_len += sizeof(pkt.ack);
- len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+ len += sizeof(pkt.ack) + 3 + sizeof(trailer);
ioc = 3;
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index df8a271948a1..0af4642aeec4 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -68,6 +68,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
mutex_init(&conn->security_lock);
+ mutex_init(&conn->tx_data_alloc_lock);
skb_queue_head_init(&conn->rx_queue);
conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
@@ -341,6 +342,9 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
*/
rxrpc_purge_queue(&conn->rx_queue);
+ if (conn->tx_data_alloc.va)
+ __page_frag_cache_drain(virt_to_page(conn->tx_data_alloc.va),
+ conn->tx_data_alloc.pagecnt_bias);
call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9691de00ade7..3dedb8c0618c 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -212,7 +212,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
if (before_eq(txb->seq, call->acks_hard_ack))
continue;
- if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
+ if (txb->flags & RXRPC_LAST_PACKET) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
}
@@ -252,6 +252,9 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+ call->resend_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend);
+
if (unlikely(call->cong_last_nack)) {
rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
call->cong_last_nack = NULL;
@@ -288,15 +291,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
static bool rxrpc_receiving_reply(struct rxrpc_call *call)
{
struct rxrpc_ack_summary summary = { 0 };
- unsigned long now, timo;
rxrpc_seq_t top = READ_ONCE(call->tx_top);
if (call->ackr_reason) {
- now = jiffies;
- timo = now + MAX_JIFFY_OFFSET;
-
- WRITE_ONCE(call->delay_ack_at, timo);
- trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
+ call->delay_ack_at = KTIME_MAX;
+ trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
@@ -329,7 +328,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
case RXRPC_CALL_SERVER_RECV_REQUEST:
rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST);
- call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
+ call->expect_req_by = KTIME_MAX;
rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op);
break;
@@ -589,14 +588,12 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_CALL_SERVER_RECV_REQUEST: {
unsigned long timo = READ_ONCE(call->next_req_timo);
- unsigned long now, expect_req_by;
if (timo) {
- now = jiffies;
- expect_req_by = now + timo;
- WRITE_ONCE(call->expect_req_by, expect_req_by);
- rxrpc_reduce_call_timer(call, expect_req_by, now,
- rxrpc_timer_set_for_idle);
+ ktime_t delay = ms_to_ktime(timo);
+
+ call->expect_req_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_idle);
}
break;
}
@@ -670,14 +667,14 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
/*
* Process the extra information that may be appended to an ACK packet
*/
-static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
- struct rxrpc_ackinfo *ackinfo)
+static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb,
+ struct rxrpc_acktrailer *trailer)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_peer *peer;
unsigned int mtu;
bool wake = false;
- u32 rwind = ntohl(ackinfo->rwind);
+ u32 rwind = ntohl(trailer->rwind);
if (rwind > RXRPC_TX_MAX_WINDOW)
rwind = RXRPC_TX_MAX_WINDOW;
@@ -691,7 +688,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
if (call->cong_ssthresh > rwind)
call->cong_ssthresh = rwind;
- mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
+ mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU));
peer = call->peer;
if (mtu < peer->maxdata) {
@@ -713,20 +710,19 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
rxrpc_seq_t seq)
{
struct sk_buff *skb = call->cong_last_nack;
- struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned int i, new_acks = 0, retained_nacks = 0;
- rxrpc_seq_t old_seq = sp->first_ack;
- u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack);
+ rxrpc_seq_t old_seq = sp->ack.first_ack;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- if (after_eq(seq, old_seq + sp->nr_acks)) {
- summary->nr_new_acks += sp->nr_nacks;
- summary->nr_new_acks += seq - (old_seq + sp->nr_acks);
+ if (after_eq(seq, old_seq + sp->ack.nr_acks)) {
+ summary->nr_new_acks += sp->ack.nr_nacks;
+ summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks);
summary->nr_retained_nacks = 0;
} else if (seq == old_seq) {
- summary->nr_retained_nacks = sp->nr_nacks;
+ summary->nr_retained_nacks = sp->ack.nr_nacks;
} else {
- for (i = 0; i < sp->nr_acks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_NACK) {
if (before(old_seq + i, seq))
new_acks++;
@@ -739,7 +735,7 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
summary->nr_retained_nacks = retained_nacks;
}
- return old_seq + sp->nr_acks;
+ return old_seq + sp->ack.nr_acks;
}
/*
@@ -759,10 +755,10 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned int i, old_nacks = 0;
- rxrpc_seq_t lowest_nak = seq + sp->nr_acks;
+ rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks;
u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < sp->nr_acks; i++) {
+ for (i = 0; i < sp->ack.nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
if (after_eq(seq, since))
@@ -774,7 +770,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
old_nacks++;
} else {
summary->nr_new_nacks++;
- sp->nr_nacks++;
+ sp->ack.nr_nacks++;
}
if (before(seq, lowest_nak))
@@ -835,38 +831,32 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_ack_summary summary = { 0 };
- struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_ackinfo info;
+ struct rxrpc_acktrailer trailer;
rxrpc_serial_t ack_serial, acked_serial;
rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
int nr_acks, offset, ioffset;
_enter("");
- offset = sizeof(struct rxrpc_wire_header);
- if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
- return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
- offset += sizeof(ack);
-
- ack_serial = sp->hdr.serial;
- acked_serial = ntohl(ack.serial);
- first_soft_ack = ntohl(ack.firstPacket);
- prev_pkt = ntohl(ack.previousPacket);
- hard_ack = first_soft_ack - 1;
- nr_acks = ack.nAcks;
- sp->first_ack = first_soft_ack;
- sp->nr_acks = nr_acks;
- summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
- ack.reason : RXRPC_ACK__INVALID);
+ offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
+
+ ack_serial = sp->hdr.serial;
+ acked_serial = sp->ack.acked_serial;
+ first_soft_ack = sp->ack.first_ack;
+ prev_pkt = sp->ack.prev_ack;
+ nr_acks = sp->ack.nr_acks;
+ hard_ack = first_soft_ack - 1;
+ summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ?
+ sp->ack.reason : RXRPC_ACK__INVALID);
trace_rxrpc_rx_ack(call, ack_serial, acked_serial,
first_soft_ack, prev_pkt,
summary.ack_reason, nr_acks);
- rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
+ rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
if (acked_serial != 0) {
- switch (ack.reason) {
+ switch (summary.ack_reason) {
case RXRPC_ACK_PING_RESPONSE:
rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
rxrpc_rtt_rx_ping_response);
@@ -886,7 +876,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* indicates that the client address changed due to NAT. The server
* lost the call because it switched to a different peer.
*/
- if (unlikely(ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
+ if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
rxrpc_is_client_call(call)) {
@@ -899,7 +889,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
* indicate a change of address. However, we can retransmit the call
* if we still have it buffered to the beginning.
*/
- if (unlikely(ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
+ if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
first_soft_ack == 1 &&
prev_pkt == 0 &&
call->acks_hard_ack == 0 &&
@@ -917,11 +907,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
goto send_response;
}
- info.rxMTU = 0;
+ trailer.maxMTU = 0;
ioffset = offset + nr_acks + 3;
- if (skb->len >= ioffset + sizeof(info) &&
- skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
- return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
+ if (skb->len >= ioffset + sizeof(trailer) &&
+ skb_copy_bits(skb, ioffset, &trailer, sizeof(trailer)) < 0)
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_trailer);
if (nr_acks > 0)
skb_condense(skb);
@@ -940,7 +930,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
- switch (ack.reason) {
+ switch (summary.ack_reason) {
case RXRPC_ACK_PING:
break;
default:
@@ -950,8 +940,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
/* Parse rwind and mtu sizes if provided. */
- if (info.rxMTU)
- rxrpc_input_ackinfo(call, skb, &info);
+ if (trailer.maxMTU)
+ rxrpc_input_ack_trailer(call, skb, &trailer);
if (first_soft_ack == 0)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
@@ -997,7 +987,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_congestion_management(call, skb, &summary, acked_serial);
send_response:
- if (ack.reason == RXRPC_ACK_PING)
+ if (summary.ack_reason == RXRPC_ACK_PING)
rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
rxrpc_propose_ack_respond_to_ping);
else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
@@ -1048,12 +1038,10 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
timo = READ_ONCE(call->next_rx_timo);
if (timo) {
- unsigned long now = jiffies, expect_rx_by;
+ ktime_t delay = ms_to_ktime(timo);
- expect_rx_by = now + timo;
- WRITE_ONCE(call->expect_rx_by, expect_rx_by);
- rxrpc_reduce_call_timer(call, expect_rx_by, now,
- rxrpc_timer_set_for_normal);
+ call->expect_rx_by = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
}
switch (sp->hdr.type) {
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 34353b6e584b..f2701068ed9e 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -15,14 +15,11 @@ static int none_init_connection_security(struct rxrpc_connection *conn,
}
/*
- * Work out how much data we can put in an unsecured packet.
+ * Allocate an appropriately sized buffer for the amount of data remaining.
*/
-static int none_how_much_data(struct rxrpc_call *call, size_t remain,
- size_t *_buf_size, size_t *_data_size, size_t *_offset)
+static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- *_offset = 0;
- return 0;
+ return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 0, gfp);
}
static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
@@ -79,7 +76,7 @@ const struct rxrpc_security rxrpc_no_security = {
.exit = none_exit,
.init_connection_security = none_init_connection_security,
.free_call_crypto = none_free_call_crypto,
- .how_much_data = none_how_much_data,
+ .alloc_txbuf = none_alloc_txbuf,
.secure_packet = none_secure_packet,
.verify_packet = none_verify_packet,
.respond_to_challenge = none_respond_to_challenge,
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 4a3a08a0e2cd..0300baa9afcd 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -124,6 +124,7 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
+ struct rxrpc_ackpacket ack;
/* dig out the RxRPC connection details */
if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
@@ -141,6 +142,16 @@ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
sp->hdr.securityIndex = whdr.securityIndex;
sp->hdr._rsvd = ntohs(whdr._rsvd);
sp->hdr.serviceId = ntohs(whdr.serviceId);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(whdr), &ack, sizeof(ack)) < 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_short_ack);
+ sp->ack.first_ack = ntohl(ack.firstPacket);
+ sp->ack.prev_ack = ntohl(ack.previousPacket);
+ sp->ack.acked_serial = ntohl(ack.serial);
+ sp->ack.reason = ack.reason;
+ sp->ack.nr_acks = ack.nAcks;
+ }
return true;
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 34d307368135..504453c688d7 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -452,6 +452,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
+ if (local->tx_alloc.va)
+ __page_frag_cache_drain(virt_to_page(local->tx_alloc.va),
+ local->tx_alloc.pagecnt_bias);
}
/*
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 825b81183046..657cf35089a6 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -17,22 +17,22 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
- * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ * How long to wait before scheduling an ACK with subtype DELAY (in ms).
*
* We use this when we've received new data packets. If those packets aren't
* all consumed within this time we will send a DELAY ACK if an ACK was not
* requested to let the sender know it doesn't need to resend.
*/
-unsigned long rxrpc_soft_ack_delay = HZ;
+unsigned long rxrpc_soft_ack_delay = 1000;
/*
- * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ * How long to wait before scheduling an ACK with subtype IDLE (in ms).
*
* We use this when we've consumed some previously soft-ACK'd packets when
* further packets aren't immediately received to decide when to send an IDLE
* ACK let the other end know that it can free up its Tx buffer space.
*/
-unsigned long rxrpc_idle_ack_delay = HZ / 2;
+unsigned long rxrpc_idle_ack_delay = 500;
/*
* Receive window size in packets. This indicates the maximum number of
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 4a292f860ae3..5ea9601efd05 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -48,12 +48,10 @@ static const char rxrpc_keepalive_string[] = "";
static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
{
if (ret < 0) {
- u16 tx_backoff = READ_ONCE(call->tx_backoff);
-
- if (tx_backoff < HZ)
- WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+ if (call->tx_backoff < 1000)
+ call->tx_backoff += 100;
} else {
- WRITE_ONCE(call->tx_backoff, 0);
+ call->tx_backoff = 0;
}
}
@@ -65,84 +63,92 @@ static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
* Receiving a response to the ping will prevent the ->expect_rx_by timer from
* expiring.
*/
-static void rxrpc_set_keepalive(struct rxrpc_call *call)
+static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now)
{
- unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
+ ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6);
- keepalive_at += now;
- WRITE_ONCE(call->keepalive_at, keepalive_at);
- rxrpc_reduce_call_timer(call, keepalive_at, now,
- rxrpc_timer_set_for_keepalive);
+ call->keepalive_at = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive);
}
/*
* Fill out an ACK packet.
*/
-static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
- struct rxrpc_call *call,
- struct rxrpc_txbuf *txb,
- u16 *_rwind)
+static void rxrpc_fill_out_ack(struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ u8 ack_reason,
+ rxrpc_serial_t serial)
{
- struct rxrpc_ackinfo ackinfo;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3;
+ struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
unsigned int qsize, sack, wrap, to;
rxrpc_seq_t window, wtop;
int rsize;
u32 mtu, jmax;
- u8 *ackp = txb->acks;
+ u8 *filler = txb->kvec[2].iov_base;
+ u8 *sackp = txb->kvec[1].iov_base;
- call->ackr_nr_unacked = 0;
- atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
- clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
window = call->ackr_window;
wtop = call->ackr_wtop;
sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
- txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = wtop - window;
+
+ whdr->seq = 0;
+ whdr->type = RXRPC_PACKET_TYPE_ACK;
+ txb->flags |= RXRPC_SLOW_START_OK;
+ ack->bufferSpace = 0;
+ ack->maxSkew = 0;
+ ack->firstPacket = htonl(window);
+ ack->previousPacket = htonl(call->rx_highest_seq);
+ ack->serial = htonl(serial);
+ ack->reason = ack_reason;
+ ack->nAcks = wtop - window;
+ filler[0] = 0;
+ filler[1] = 0;
+ filler[2] = 0;
+
+ if (ack_reason == RXRPC_ACK_PING)
+ txb->flags |= RXRPC_REQUEST_ACK;
if (after(wtop, window)) {
+ txb->len += ack->nAcks;
+ txb->kvec[1].iov_base = sackp;
+ txb->kvec[1].iov_len = ack->nAcks;
+
wrap = RXRPC_SACK_SIZE - sack;
- to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
+ to = min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE);
- if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
+ if (sack + ack->nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks);
} else {
- memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
- memcpy(txb->acks + wrap, call->ackr_sack_table,
- to - wrap);
+ memcpy(sackp, call->ackr_sack_table + sack, wrap);
+ memcpy(sackp + wrap, call->ackr_sack_table, to - wrap);
}
-
- ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
- } else if (txb->ack.reason == RXRPC_ACK_DELAY) {
- txb->ack.reason = RXRPC_ACK_IDLE;
+ } else if (ack->reason == RXRPC_ACK_DELAY) {
+ ack->reason = RXRPC_ACK_IDLE;
}
- mtu = conn->peer->if_mtu;
- mtu -= conn->peer->hdrsize;
+ mtu = call->peer->if_mtu;
+ mtu -= call->peer->hdrsize;
jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
- *_rwind = rsize;
- ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
- ackinfo.maxMTU = htonl(mtu);
- ackinfo.rwind = htonl(rsize);
- ackinfo.jumbo_max = htonl(jmax);
-
- *ackp++ = 0;
- *ackp++ = 0;
- *ackp++ = 0;
- memcpy(ackp, &ackinfo, sizeof(ackinfo));
- return txb->ack.nAcks + 3 + sizeof(ackinfo);
+ txb->ack_rwind = rsize;
+ trailer->maxMTU = htonl(rxrpc_rx_mtu);
+ trailer->ifMTU = htonl(mtu);
+ trailer->rwind = htonl(rsize);
+ trailer->jumbo_max = htonl(jmax);
}
/*
* Record the beginning of an RTT probe.
*/
-static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
- enum rxrpc_rtt_tx_trace why)
+static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
+ ktime_t now, enum rxrpc_rtt_tx_trace why)
{
unsigned long avail = call->rtt_avail;
int rtt_slot = 9;
@@ -155,47 +161,31 @@ static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
goto no_slot;
call->rtt_serial[rtt_slot] = serial;
- call->rtt_sent_at[rtt_slot] = ktime_get_real();
+ call->rtt_sent_at[rtt_slot] = now;
smp_wmb(); /* Write data before avail bit */
set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
- return rtt_slot;
+ return;
no_slot:
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
- return -1;
-}
-
-/*
- * Cancel an RTT probe.
- */
-static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call,
- rxrpc_serial_t serial, int rtt_slot)
-{
- if (rtt_slot != -1) {
- clear_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
- smp_wmb(); /* Clear pending bit before setting slot */
- set_bit(rtt_slot, &call->rtt_avail);
- trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_cancel, rtt_slot, serial);
- }
}
/*
* Transmit an ACK packet.
*/
-int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
struct rxrpc_connection *conn;
+ struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
struct msghdr msg;
- struct kvec iov[1];
- rxrpc_serial_t serial;
- size_t len, n;
- int ret, rtt_slot = -1;
- u16 rwind;
+ ktime_t now;
+ int ret;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- return -ECONNRESET;
+ return;
conn = call->conn;
@@ -203,55 +193,68 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
msg.msg_namelen = call->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- if (txb->ack.reason == RXRPC_ACK_PING)
- txb->wire.flags |= RXRPC_REQUEST_ACK;
-
- n = rxrpc_fill_out_ack(conn, call, txb, &rwind);
- if (n == 0)
- return 0;
-
- iov[0].iov_base = &txb->wire;
- iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
- len = iov[0].iov_len;
+ msg.msg_flags = MSG_SPLICE_PAGES;
- serial = rxrpc_get_next_serial(conn);
- txb->wire.serial = htonl(serial);
- trace_rxrpc_tx_ack(call->debug_id, serial,
- ntohl(txb->ack.firstPacket),
- ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks,
- rwind);
+ whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
- if (txb->ack.reason == RXRPC_ACK_PING)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
+ txb->serial = rxrpc_get_next_serial(conn);
+ whdr->serial = htonl(txb->serial);
+ trace_rxrpc_tx_ack(call->debug_id, txb->serial,
+ ntohl(ack->firstPacket),
+ ntohl(ack->serial), ack->reason, ack->nAcks,
+ txb->ack_rwind);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
- /* Grab the highest received seq as late as possible */
- txb->ack.previousPacket = htonl(call->rx_highest_seq);
-
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
- ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len);
+ rxrpc_local_dont_fragment(conn->local, false);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len);
call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0) {
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret,
rxrpc_tx_point_call_ack);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
+ trace_rxrpc_tx_packet(call->debug_id, whdr,
rxrpc_tx_point_call_ack);
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- call->peer->rtt_last_req = ktime_get_real();
+ now = ktime_get_real();
+ if (ack->reason == RXRPC_ACK_PING)
+ rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_ping);
+ if (txb->flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = now;
+ rxrpc_set_keepalive(call, now);
}
rxrpc_tx_backoff(call, ret);
+}
- if (!__rxrpc_call_is_complete(call)) {
- if (ret < 0)
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- rxrpc_set_keepalive(call);
+/*
+ * Queue an ACK for immediate transmission.
+ */
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ return;
+
+ rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
+
+ txb = rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window);
+ if (!txb) {
+ kleave(" = -ENOMEM");
+ return;
}
- return ret;
+ txb->ack_why = why;
+
+ rxrpc_fill_out_ack(call, txb, ack_reason, serial);
+ call->ackr_nr_unacked = 0;
+ atomic_set(&call->ackr_nr_consumed, 0);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
+
+ trace_rxrpc_send_ack(call, why, ack_reason, serial);
+ rxrpc_send_ack_packet(call, txb);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
}
/*
@@ -319,38 +322,22 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
}
/*
- * send a packet through the transport endpoint
+ * Prepare a (sub)packet for transmission.
*/
-int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb,
+ rxrpc_serial_t serial)
{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
enum rxrpc_req_ack_trace why;
struct rxrpc_connection *conn = call->conn;
- struct msghdr msg;
- struct kvec iov[1];
- rxrpc_serial_t serial;
- size_t len;
- int ret, rtt_slot = -1;
_enter("%x,{%d}", txb->seq, txb->len);
- /* Each transmission of a Tx packet needs a new serial number */
- serial = rxrpc_get_next_serial(conn);
- txb->wire.serial = htonl(serial);
+ txb->serial = serial;
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
txb->seq == 1)
- txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
-
- iov[0].iov_base = &txb->wire;
- iov[0].iov_len = sizeof(txb->wire) + txb->len;
- len = iov[0].iov_len;
- iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
-
- msg.msg_name = &call->peer->srx.transport;
- msg.msg_namelen = call->peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
+ whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
/* If our RTT cache needs working on, request an ACK. Also request
* ACKs if a DATA packet appears to have been lost.
@@ -359,13 +346,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
* service call, lest OpenAFS incorrectly send us an ACK with some
* soft-ACKs in it and then never follow up with a proper hard ACK.
*/
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ if (txb->flags & RXRPC_REQUEST_ACK)
why = rxrpc_reqack_already_on;
- else if (test_bit(RXRPC_TXBUF_LAST, &txb->flags) && rxrpc_sending_to_client(txb))
+ else if ((txb->flags & RXRPC_LAST_PACKET) && rxrpc_sending_to_client(txb))
why = rxrpc_reqack_no_srv_last;
else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
why = rxrpc_reqack_ack_lost;
- else if (test_bit(RXRPC_TXBUF_RESENT, &txb->flags))
+ else if (txb->flags & RXRPC_TXBUF_RESENT)
why = rxrpc_reqack_retrans;
else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2)
why = rxrpc_reqack_slow_start;
@@ -381,42 +368,116 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
if (why != rxrpc_reqack_no_srv_last)
- txb->wire.flags |= RXRPC_REQUEST_ACK;
+ txb->flags |= RXRPC_REQUEST_ACK;
dont_set_request_ack:
+ whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
+ whdr->serial = htonl(txb->serial);
+ whdr->cksum = txb->cksum;
+
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false);
+}
+
+/*
+ * Prepare a packet for transmission.
+ */
+static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ rxrpc_serial_t serial;
+
+ /* Each transmission of a Tx packet needs a new serial number */
+ serial = rxrpc_get_next_serial(call->conn);
+
+ rxrpc_prepare_data_subpacket(call, txb, serial);
+
+ return txb->len;
+}
+
+/*
+ * Set timeouts after transmitting a packet.
+ */
+static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ ktime_t now = ktime_get_real();
+ bool ack_requested = txb->flags & RXRPC_REQUEST_ACK;
+
+ call->tx_last_sent = now;
+ txb->last_sent = now;
+
+ if (ack_requested) {
+ rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_data);
+
+ call->peer->rtt_last_req = now;
+ if (call->peer->rtt_count > 1) {
+ ktime_t delay = rxrpc_get_rto_backoff(call->peer, false);
+
+ call->ack_lost_at = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack);
+ }
+ }
+
+ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
+ ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));
+
+ call->expect_rx_by = ktime_add(now, delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
+ }
+
+ rxrpc_set_keepalive(call, now);
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxrpc_connection *conn = call->conn;
+ enum rxrpc_tx_point frag;
+ struct msghdr msg;
+ size_t len;
+ int ret;
+
+ _enter("%x,{%d}", txb->seq, txb->len);
+
+ len = rxrpc_prepare_data_packet(call, txb);
+
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
ret = 0;
- trace_rxrpc_tx_data(call, txb->seq, serial,
- txb->wire.flags,
- test_bit(RXRPC_TXBUF_RESENT, &txb->flags),
- true);
+ trace_rxrpc_tx_data(call, txb->seq, txb->serial,
+ txb->flags, true);
goto done;
}
}
- trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
- test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+ iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len);
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = MSG_SPLICE_PAGES;
/* Track what we've attempted to transmit at least once so that the
* retransmission algorithm doesn't try to resend what we haven't sent
- * yet. However, this can race as we can receive an ACK before we get
- * to this point. But, OTOH, if we won't get an ACK mentioning this
- * packet unless the far side received it (though it could have
- * discarded it anyway and NAK'd it).
+ * yet.
*/
- cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
+ if (txb->seq == call->tx_transmitted + 1)
+ call->tx_transmitted = txb->seq;
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
- if (txb->len >= call->peer->maxdata)
- goto send_fragmentable;
-
- txb->last_sent = ktime_get_real();
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
+ if (txb->len >= call->peer->maxdata) {
+ rxrpc_local_dont_fragment(conn->local, false);
+ frag = rxrpc_tx_point_call_data_frag;
+ } else {
+ rxrpc_local_dont_fragment(conn->local, true);
+ frag = rxrpc_tx_point_call_data_nofrag;
+ }
+retry:
/* send the packet by UDP
* - returns -EMSGSIZE if UDP would have to fragment the packet
* to go out of the interface
@@ -429,46 +490,21 @@ dont_set_request_ack:
if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
- rxrpc_tx_point_call_data_nofrag);
+ trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
} else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
- rxrpc_tx_point_call_data_nofrag);
+ trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
}
rxrpc_tx_backoff(call, ret);
- if (ret == -EMSGSIZE)
- goto send_fragmentable;
+ if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_frag) {
+ rxrpc_local_dont_fragment(conn->local, false);
+ frag = rxrpc_tx_point_call_data_frag;
+ goto retry;
+ }
done:
if (ret >= 0) {
- call->tx_last_sent = txb->last_sent;
- if (txb->wire.flags & RXRPC_REQUEST_ACK) {
- call->peer->rtt_last_req = txb->last_sent;
- if (call->peer->rtt_count > 1) {
- unsigned long nowj = jiffies, ack_lost_at;
-
- ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
- ack_lost_at += nowj;
- WRITE_ONCE(call->ack_lost_at, ack_lost_at);
- rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
- rxrpc_timer_set_for_lost_ack);
- }
- }
-
- if (txb->seq == 1 &&
- !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER,
- &call->flags)) {
- unsigned long nowj = jiffies, expect_rx_by;
-
- expect_rx_by = nowj + call->next_rx_timo;
- WRITE_ONCE(call->expect_rx_by, expect_rx_by);
- rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
- rxrpc_timer_set_for_normal);
- }
-
- rxrpc_set_keepalive(call);
+ rxrpc_tstamp_data_packets(call, txb);
} else {
/* Cancel the call if the initial transmission fails,
* particularly if that's due to network routing issues that
@@ -482,41 +518,6 @@ done:
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
-
-send_fragmentable:
- /* attempt to send this message with fragmentation enabled */
- _debug("send fragment");
-
- txb->last_sent = ktime_get_real();
- if (txb->wire.flags & RXRPC_REQUEST_ACK)
- rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
-
- switch (conn->local->srx.transport.family) {
- case AF_INET6:
- case AF_INET:
- rxrpc_local_dont_fragment(conn->local, false);
- rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
- ret = do_udp_sendmsg(conn->local->socket, &msg, len);
- conn->peer->last_tx_at = ktime_get_seconds();
-
- rxrpc_local_dont_fragment(conn->local, true);
- break;
-
- default:
- BUG();
- }
-
- if (ret < 0) {
- rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
- rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
- trace_rxrpc_tx_fail(call->debug_id, serial, ret,
- rxrpc_tx_point_call_data_frag);
- } else {
- trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
- rxrpc_tx_point_call_data_frag);
- }
- rxrpc_tx_backoff(call, ret);
- goto done;
}
/*
@@ -723,11 +724,9 @@ void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_instant_resend(call, txb);
}
} else {
- unsigned long now = jiffies;
- unsigned long resend_at = now + call->peer->rto_j;
+ ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC);
- WRITE_ONCE(call->resend_at, resend_at);
- rxrpc_reduce_call_timer(call, resend_at, now,
- rxrpc_timer_set_for_send);
+ call->resend_at = ktime_add(ktime_get_real(), delay);
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx);
}
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 26dc2f26d92d..263a2251e3d2 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -52,9 +52,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
enum rxrpc_call_state state;
- unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
+ long timeout = 0;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -76,10 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
state = rxrpc_call_state(call);
- if (state != RXRPC_CALL_SERVER_PREALLOC) {
- timeout = READ_ONCE(call->expect_rx_by);
- timeout -= jiffies;
- }
+ if (state != RXRPC_CALL_SERVER_PREALLOC)
+ timeout = ktime_ms_delta(READ_ONCE(call->expect_rx_by), ktime_get_real());
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
seq_printf(seq,
@@ -309,7 +307,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
peer->mtu,
now - peer->last_tx_at,
peer->srtt_us >> 3,
- jiffies_to_usecs(peer->rto_j));
+ peer->rto_us);
return 0;
}
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index e8ee4af43ca8..4fe6b4d20ada 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -135,9 +135,9 @@ struct rxrpc_ackpacket {
/*
* ACK packets can have a further piece of information tagged on the end
*/
-struct rxrpc_ackinfo {
- __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
- __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
+struct rxrpc_acktrailer {
+ __be32 maxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
+ __be32 ifMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
__be32 rwind; /* Rx window size (packets) [AFS 3.4] */
__be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */
};
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
index be61d6f5be8d..cdab7b7d08a0 100644
--- a/net/rxrpc/rtt.c
+++ b/net/rxrpc/rtt.c
@@ -11,8 +11,8 @@
#include <linux/net.h>
#include "ar-internal.h"
-#define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
-#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
+#define RXRPC_RTO_MAX (120 * USEC_PER_SEC)
+#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */
#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
@@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
{
- return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+ return (peer->srtt_us >> 3) + peer->rttvar_us;
}
static u32 rxrpc_bound_rto(u32 rto)
@@ -124,7 +124,7 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer)
/* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
* guarantees that rto is higher.
*/
- peer->rto_j = rxrpc_bound_rto(rto);
+ peer->rto_us = rxrpc_bound_rto(rto);
}
static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
@@ -163,33 +163,33 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
spin_unlock(&peer->rtt_input_lock);
trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
- peer->srtt_us >> 3, peer->rto_j);
+ peer->srtt_us >> 3, peer->rto_us);
}
/*
- * Get the retransmission timeout to set in jiffies, backing it off each time
- * we retransmit.
+ * Get the retransmission timeout to set in nanoseconds, backing it off each
+ * time we retransmit.
*/
-unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
+ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
{
- u64 timo_j;
- u8 backoff = READ_ONCE(peer->backoff);
+ u64 timo_us;
+ u32 backoff = READ_ONCE(peer->backoff);
- timo_j = peer->rto_j;
- timo_j <<= backoff;
- if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
+ timo_us = peer->rto_us;
+ timo_us <<= backoff;
+ if (retrans && timo_us * 2 <= RXRPC_RTO_MAX)
WRITE_ONCE(peer->backoff, backoff + 1);
- if (timo_j < 1)
- timo_j = 1;
+ if (timo_us < 1)
+ timo_us = 1;
- return timo_j;
+ return ns_to_ktime(timo_us * NSEC_PER_USEC);
}
void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
{
- peer->rto_j = RXRPC_TIMEOUT_INIT;
- peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
+ peer->rto_us = RXRPC_TIMEOUT_INIT;
+ peer->mdev_us = RXRPC_TIMEOUT_INIT;
peer->backoff = 0;
//minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 6b32d61d4cdc..f1a68270862d 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -145,16 +145,17 @@ error:
/*
* Work out how much data we can put in a packet.
*/
-static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
- size_t *_buf_size, size_t *_data_size, size_t *_offset)
+static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
{
- size_t shdr, buf_size, chunk;
+ struct rxrpc_txbuf *txb;
+ size_t shdr, space;
+
+ remain = min(remain, 65535 - sizeof(struct rxrpc_wire_header));
switch (call->conn->security_level) {
default:
- buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
- shdr = 0;
- goto out;
+ space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
+ return rxrpc_alloc_data_txbuf(call, space, 0, gfp);
case RXRPC_SECURITY_AUTH:
shdr = sizeof(struct rxkad_level1_hdr);
break;
@@ -163,17 +164,16 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
break;
}
- buf_size = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN);
+ space = min_t(size_t, round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr);
+ space = round_up(space, RXKAD_ALIGN);
- chunk = buf_size - shdr;
- if (remain < chunk)
- buf_size = round_up(shdr + remain, RXKAD_ALIGN);
+ txb = rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, gfp);
+ if (!txb)
+ return NULL;
-out:
- *_buf_size = buf_size;
- *_data_size = chunk;
- *_offset = shdr;
- return 0;
+ txb->offset += shdr;
+ txb->space -= shdr;
+ return txb;
}
/*
@@ -251,7 +251,8 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
struct rxrpc_txbuf *txb,
struct skcipher_request *req)
{
- struct rxkad_level1_hdr *hdr = (void *)txb->data;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxkad_level1_hdr *hdr = (void *)(whdr + 1);
struct rxrpc_crypt iv;
struct scatterlist sg;
size_t pad;
@@ -259,7 +260,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
_enter("");
- check = txb->seq ^ ntohl(txb->wire.callNumber);
+ check = txb->seq ^ call->call_id;
hdr->data_size = htonl((u32)check << 16 | txb->len);
txb->len += sizeof(struct rxkad_level1_hdr);
@@ -267,14 +268,14 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
if (pad) {
- memset(txb->data + txb->offset, 0, pad);
+ memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
txb->len += pad;
}
/* start the encryption afresh */
memset(&iv, 0, sizeof(iv));
- sg_init_one(&sg, txb->data, 8);
+ sg_init_one(&sg, hdr, 8);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
@@ -293,7 +294,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
struct skcipher_request *req)
{
const struct rxrpc_key_token *token;
- struct rxkad_level2_hdr *rxkhdr = (void *)txb->data;
+ struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
+ struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1);
struct rxrpc_crypt iv;
struct scatterlist sg;
size_t pad;
@@ -302,7 +304,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
_enter("");
- check = txb->seq ^ ntohl(txb->wire.callNumber);
+ check = txb->seq ^ call->call_id;
rxkhdr->data_size = htonl(txb->len | (u32)check << 16);
rxkhdr->checksum = 0;
@@ -312,7 +314,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
pad = RXKAD_ALIGN - pad;
pad &= RXKAD_ALIGN - 1;
if (pad) {
- memset(txb->data + txb->offset, 0, pad);
+ memset(txb->kvec[0].iov_base + txb->offset, 0, pad);
txb->len += pad;
}
@@ -320,7 +322,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
token = call->conn->key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- sg_init_one(&sg, txb->data, txb->len);
+ sg_init_one(&sg, rxkhdr, txb->len);
skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x);
@@ -362,9 +364,9 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv));
/* calculate the security checksum */
- x = (ntohl(txb->wire.cid) & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
x |= txb->seq & 0x3fffffff;
- crypto.buf[0] = txb->wire.callNumber;
+ crypto.buf[0] = htonl(call->call_id);
crypto.buf[1] = htonl(x);
sg_init_one(&sg, crypto.buf, 8);
@@ -378,7 +380,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
y = (y >> 16) & 0xffff;
if (y == 0)
y = 1; /* zero checksums are not permitted */
- txb->wire.cksum = htons(y);
+ txb->cksum = htons(y);
switch (call->conn->security_level) {
case RXRPC_SECURITY_PLAIN:
@@ -726,7 +728,6 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
rxrpc_local_dont_fragment(conn->local, false);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
- rxrpc_local_dont_fragment(conn->local, true);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_response);
@@ -1256,7 +1257,7 @@ const struct rxrpc_security rxkad = {
.free_preparse_server_key = rxkad_free_preparse_server_key,
.destroy_server_key = rxkad_destroy_server_key,
.init_connection_security = rxkad_init_connection_security,
- .how_much_data = rxkad_how_much_data,
+ .alloc_txbuf = rxkad_alloc_txbuf,
.secure_packet = rxkad_secure_packet,
.verify_packet = rxkad_verify_packet,
.free_call_crypto = rxkad_free_call_crypto,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 5677d5690a02..6f765768c49c 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -240,7 +240,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
rxrpc_notify_end_tx_t notify_end_tx)
{
rxrpc_seq_t seq = txb->seq;
- bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
+ bool poke, last = txb->flags & RXRPC_LAST_PACKET;
rxrpc_inc_stat(call->rxnet, stat_tx_data);
@@ -336,7 +336,7 @@ reload:
do {
if (!txb) {
- size_t remain, bufsize, chunk, offset;
+ size_t remain;
_debug("alloc");
@@ -348,23 +348,11 @@ reload:
* region (enc blocksize), but the trailer is not.
*/
remain = more ? INT_MAX : msg_data_left(msg);
- ret = call->conn->security->how_much_data(call, remain,
- &bufsize, &chunk, &offset);
- if (ret < 0)
+ txb = call->conn->security->alloc_txbuf(call, remain, sk->sk_allocation);
+ if (IS_ERR(txb)) {
+ ret = PTR_ERR(txb);
goto maybe_error;
-
- _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset);
-
- /* create a buffer that we can retain until it's ACK'd */
- ret = -ENOMEM;
- txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA,
- GFP_KERNEL);
- if (!txb)
- goto maybe_error;
-
- txb->offset = offset;
- txb->space -= offset;
- txb->space = min_t(size_t, chunk, txb->space);
+ }
}
_debug("append");
@@ -374,8 +362,8 @@ reload:
size_t copy = min_t(size_t, txb->space, msg_data_left(msg));
_debug("add %zu", copy);
- if (!copy_from_iter_full(txb->data + txb->offset, copy,
- &msg->msg_iter))
+ if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset,
+ copy, &msg->msg_iter))
goto efault;
_debug("added");
txb->space -= copy;
@@ -394,18 +382,18 @@ reload:
/* add the packet to the send queue if it's now full */
if (!txb->space ||
(msg_data_left(msg) == 0 && !more)) {
- if (msg_data_left(msg) == 0 && !more) {
- txb->wire.flags |= RXRPC_LAST_PACKET;
- __set_bit(RXRPC_TXBUF_LAST, &txb->flags);
- }
+ if (msg_data_left(msg) == 0 && !more)
+ txb->flags |= RXRPC_LAST_PACKET;
else if (call->tx_top - call->acks_hard_ack <
call->tx_winsize)
- txb->wire.flags |= RXRPC_MORE_PACKETS;
+ txb->flags |= RXRPC_MORE_PACKETS;
ret = call->security->secure_packet(call, txb);
if (ret < 0)
goto out;
+ txb->kvec[0].iov_len += txb->len;
+ txb->len = txb->kvec[0].iov_len;
rxrpc_queue_packet(rx, call, txb, notify_end_tx);
txb = NULL;
}
@@ -621,7 +609,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_call *call;
- unsigned long now, j;
bool dropped_lock = false;
int ret;
@@ -699,25 +686,21 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
switch (p.call.nr_timeouts) {
case 3:
- j = msecs_to_jiffies(p.call.timeouts.normal);
- if (p.call.timeouts.normal > 0 && j == 0)
- j = 1;
- WRITE_ONCE(call->next_rx_timo, j);
+ WRITE_ONCE(call->next_rx_timo, p.call.timeouts.normal);
fallthrough;
case 2:
- j = msecs_to_jiffies(p.call.timeouts.idle);
- if (p.call.timeouts.idle > 0 && j == 0)
- j = 1;
- WRITE_ONCE(call->next_req_timo, j);
+ WRITE_ONCE(call->next_req_timo, p.call.timeouts.idle);
fallthrough;
case 1:
if (p.call.timeouts.hard > 0) {
- j = p.call.timeouts.hard * HZ;
- now = jiffies;
- j += now;
- WRITE_ONCE(call->expect_term_by, j);
- rxrpc_reduce_call_timer(call, j, now,
- rxrpc_timer_set_for_hard);
+ ktime_t delay = ms_to_ktime(p.call.timeouts.hard * MSEC_PER_SEC);
+
+ WRITE_ONCE(call->expect_term_by,
+ ktime_add(p.call.timeouts.hard,
+ ktime_get_real()));
+ trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard);
+ rxrpc_poke_call(call, rxrpc_call_poke_set_timeout);
+
}
break;
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index ecaeb4ecfb58..c9bedd0e2d86 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -15,6 +15,8 @@ static const unsigned int four = 4;
static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = 255;
+static const unsigned long one_ms = 1;
+static const unsigned long max_ms = 1000;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
@@ -28,24 +30,24 @@ static const unsigned long max_500 = 500;
* information on the individual parameters.
*/
static struct ctl_table rxrpc_sysctl_table[] = {
- /* Values measured in milliseconds but used in jiffies */
+ /* Values measured in milliseconds */
{
.procname = "soft_ack_delay",
.data = &rxrpc_soft_ack_delay,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)&one_ms,
+ .extra2 = (void *)&max_ms,
},
{
.procname = "idle_ack_delay",
.data = &rxrpc_idle_ack_delay,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)&one_ms,
+ .extra2 = (void *)&max_ms,
},
{
.procname = "idle_conn_expiry",
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index d43be8512386..b2a82ab756c2 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -14,45 +14,146 @@ static atomic_t rxrpc_txbuf_debug_ids;
atomic_t rxrpc_nr_txbuf;
/*
- * Allocate and partially initialise an I/O request structure.
+ * Allocate and partially initialise a data transmission buffer.
*/
-struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
- gfp_t gfp)
+struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size,
+ size_t data_align, gfp_t gfp)
{
+ struct rxrpc_wire_header *whdr;
struct rxrpc_txbuf *txb;
+ size_t total, hoff = 0;
+ void *buf;
txb = kmalloc(sizeof(*txb), gfp);
- if (txb) {
- INIT_LIST_HEAD(&txb->call_link);
- INIT_LIST_HEAD(&txb->tx_link);
- refcount_set(&txb->ref, 1);
- txb->call_debug_id = call->debug_id;
- txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
- txb->space = sizeof(txb->data);
- txb->len = 0;
- txb->offset = 0;
- txb->flags = 0;
- txb->ack_why = 0;
- txb->seq = call->tx_prepared + 1;
- txb->wire.epoch = htonl(call->conn->proto.epoch);
- txb->wire.cid = htonl(call->cid);
- txb->wire.callNumber = htonl(call->call_id);
- txb->wire.seq = htonl(txb->seq);
- txb->wire.type = packet_type;
- txb->wire.flags = call->conn->out_clientflag;
- txb->wire.userStatus = 0;
- txb->wire.securityIndex = call->security_ix;
- txb->wire._rsvd = 0;
- txb->wire.serviceId = htons(call->dest_srx.srx_service);
-
- trace_rxrpc_txbuf(txb->debug_id,
- txb->call_debug_id, txb->seq, 1,
- packet_type == RXRPC_PACKET_TYPE_DATA ?
- rxrpc_txbuf_alloc_data :
- rxrpc_txbuf_alloc_ack);
- atomic_inc(&rxrpc_nr_txbuf);
+ if (!txb)
+ return NULL;
+
+ if (data_align)
+ hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr);
+ total = hoff + sizeof(*whdr) + data_size;
+
+ mutex_lock(&call->conn->tx_data_alloc_lock);
+ buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp,
+ ~(data_align - 1) & ~(L1_CACHE_BYTES - 1));
+ mutex_unlock(&call->conn->tx_data_alloc_lock);
+ if (!buf) {
+ kfree(txb);
+ return NULL;
+ }
+
+ whdr = buf + hoff;
+
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->last_sent = KTIME_MIN;
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = data_size;
+ txb->len = 0;
+ txb->offset = sizeof(*whdr);
+ txb->flags = call->conn->out_clientflag;
+ txb->ack_why = 0;
+ txb->seq = call->tx_prepared + 1;
+ txb->serial = 0;
+ txb->cksum = 0;
+ txb->nr_kvec = 1;
+ txb->kvec[0].iov_base = whdr;
+ txb->kvec[0].iov_len = sizeof(*whdr);
+
+ whdr->epoch = htonl(call->conn->proto.epoch);
+ whdr->cid = htonl(call->cid);
+ whdr->callNumber = htonl(call->call_id);
+ whdr->seq = htonl(txb->seq);
+ whdr->type = RXRPC_PACKET_TYPE_DATA;
+ whdr->flags = 0;
+ whdr->userStatus = 0;
+ whdr->securityIndex = call->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(call->dest_srx.srx_service);
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1,
+ rxrpc_txbuf_alloc_data);
+
+ atomic_inc(&rxrpc_nr_txbuf);
+ return txb;
+}
+
+/*
+ * Allocate and partially initialise an ACK packet.
+ */
+struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size)
+{
+ struct rxrpc_wire_header *whdr;
+ struct rxrpc_acktrailer *trailer;
+ struct rxrpc_ackpacket *ack;
+ struct rxrpc_txbuf *txb;
+ gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
+ void *buf, *buf2 = NULL;
+ u8 *filler;
+
+ txb = kmalloc(sizeof(*txb), gfp);
+ if (!txb)
+ return NULL;
+
+ buf = page_frag_alloc(&call->local->tx_alloc,
+ sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
+ if (!buf) {
+ kfree(txb);
+ return NULL;
+ }
+
+ if (sack_size) {
+ buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
+ if (!buf2) {
+ page_frag_free(buf);
+ kfree(txb);
+ return NULL;
+ }
}
+ whdr = buf;
+ ack = buf + sizeof(*whdr);
+ filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
+ trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
+
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = 0;
+ txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer);
+ txb->offset = 0;
+ txb->flags = call->conn->out_clientflag;
+ txb->ack_rwind = 0;
+ txb->seq = 0;
+ txb->serial = 0;
+ txb->cksum = 0;
+ txb->nr_kvec = 3;
+ txb->kvec[0].iov_base = whdr;
+ txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack);
+ txb->kvec[1].iov_base = buf2;
+ txb->kvec[1].iov_len = sack_size;
+ txb->kvec[2].iov_base = filler;
+ txb->kvec[2].iov_len = 3 + sizeof(*trailer);
+
+ whdr->epoch = htonl(call->conn->proto.epoch);
+ whdr->cid = htonl(call->cid);
+ whdr->callNumber = htonl(call->call_id);
+ whdr->seq = 0;
+ whdr->type = RXRPC_PACKET_TYPE_ACK;
+ whdr->flags = 0;
+ whdr->userStatus = 0;
+ whdr->securityIndex = call->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(call->dest_srx.srx_service);
+
+ get_page(virt_to_head_page(trailer));
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1,
+ rxrpc_txbuf_alloc_ack);
+ atomic_inc(&rxrpc_nr_txbuf);
return txb;
}
@@ -71,12 +172,15 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what);
}
-static void rxrpc_free_txbuf(struct rcu_head *rcu)
+static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb)
{
- struct rxrpc_txbuf *txb = container_of(rcu, struct rxrpc_txbuf, rcu);
+ int i;
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0,
rxrpc_txbuf_free);
+ for (i = 0; i < txb->nr_kvec; i++)
+ if (txb->kvec[i].iov_base)
+ page_frag_free(txb->kvec[i].iov_base);
kfree(txb);
atomic_dec(&rxrpc_nr_txbuf);
}
@@ -95,7 +199,7 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
dead = __refcount_dec_and_test(&txb->ref, &r);
trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what);
if (dead)
- call_rcu(&txb->rcu, rxrpc_free_txbuf);
+ rxrpc_free_txbuf(txb);
}
}