diff options
Diffstat (limited to 'net/mptcp/protocol.h')
-rw-r--r-- | net/mptcp/protocol.h | 247 |
1 files changed, 186 insertions, 61 deletions
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 07f6242afc1a..ad21925af061 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -12,8 +12,7 @@ #include <net/inet_connection_sock.h> #include <uapi/linux/mptcp.h> #include <net/genetlink.h> - -#include "mptcp_pm_gen.h" +#include <net/rstreason.h> #define MPTCP_SUPPORTED_VERSION 1 @@ -113,10 +112,9 @@ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket atomic flags */ -#define MPTCP_NOSPACE 1 -#define MPTCP_WORK_RTX 2 -#define MPTCP_FALLBACK_DONE 4 -#define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_WORK_RTX 1 +#define MPTCP_FALLBACK_DONE 2 +#define MPTCP_WORK_CLOSE_SUBFLOW 3 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 @@ -151,22 +149,24 @@ struct mptcp_options_received { u32 subflow_seq; u16 data_len; __sum16 csum; - u16 suboptions; + struct_group(status, + u16 suboptions; + u16 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + reset_reason:4, + reset_transient:1, + echo:1, + backup:1, + deny_join_id0:1, + __unused:2; + ); + u8 join_id; u32 token; u32 nonce; - u16 use_map:1, - dsn64:1, - data_fin:1, - use_ack:1, - ack64:1, - mpc_map:1, - reset_reason:4, - reset_transient:1, - echo:1, - backup:1, - deny_join_id0:1, - __unused:2; - u8 join_id; u64 thmac; u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; @@ -238,6 +238,12 @@ struct mptcp_pm_data { struct mptcp_rm_list rm_list_rx; }; +struct mptcp_pm_local { + struct mptcp_addr_info addr; + u8 flags; + int ifindex; +}; + struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; @@ -260,8 +266,10 @@ struct mptcp_data_frag { struct mptcp_sock { /* inet_connection_sock must be the first member */ struct inet_connection_sock sk; - u64 local_key; - u64 remote_key; + u64 local_key; /* protected by the first subflow socket lock + * lockless access read + */ + u64 remote_key; /* same as above */ u64 write_seq; u64 bytes_sent; u64 snd_nxt; @@ -281,6 +289,9 @@ struct mptcp_sock { u64 bytes_acked; u64 snd_una; u64 wnd_end; + u32 last_data_sent; + u32 last_data_recv; + u32 last_ack_recv; unsigned long timer_ival; u32 token; int rmem_released; @@ -306,6 +317,10 @@ struct mptcp_sock { in_accept_queue:1, free_first:1, rcvspace_init:1; + u32 notsent_lowat; + int keepalive_cnt; + int keepalive_idle; + int keepalive_intvl; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -341,12 +356,30 @@ struct mptcp_sock { #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +extern struct genl_family mptcp_genl_family; + static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); } +#ifdef CONFIG_DEBUG_NET +/* MPTCP-specific: we might (indirectly) call this helper with the wrong sk */ +#undef tcp_sk +#define tcp_sk(ptr) ({ \ + typeof(ptr) _ptr = (ptr); \ + WARN_ON(_ptr->sk_protocol != IPPROTO_TCP); \ + container_of_const(_ptr, struct tcp_sock, inet_conn.icsk_inet.sk); \ +}) +#define mptcp_sk(ptr) ({ \ + typeof(ptr) _ptr = (ptr); \ + WARN_ON(_ptr->sk_protocol != IPPROTO_MPTCP); \ + container_of_const(_ptr, struct mptcp_sock, sk.icsk_inet.sk); \ +}) + +#else /* !CONFIG_DEBUG_NET */ #define mptcp_sk(ptr) container_of_const(ptr, struct mptcp_sock, sk.icsk_inet.sk) +#endif /* the msk socket don't use the backlog, also account for the bulk * free memory @@ -361,6 +394,11 @@ static inline int mptcp_win_from_space(const struct sock *sk, int space) return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space); } +static inline int mptcp_space_from_win(const struct sock *sk, int win) +{ + return __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, win); +} + static inline int __mptcp_space(const struct sock *sk) { return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk)); @@ -400,7 +438,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (msk->snd_una == READ_ONCE(msk->snd_nxt)) + if (msk->snd_una == msk->snd_nxt) return NULL; return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); @@ -418,6 +456,7 @@ struct mptcp_subflow_request_sock { u16 mp_capable : 1, mp_join : 1, backup : 1, + request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; @@ -476,7 +515,6 @@ struct mptcp_subflow_context { request_bkup : 1, mp_capable : 1, /* remote is MPTCP capable */ mp_join : 1, /* remote is JOINing */ - fully_established : 1, /* path validated */ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, @@ -493,9 +531,13 @@ struct mptcp_subflow_context { stale : 1, /* unable to snd/rcv data, do not use for xmit */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 10; + close_event_done : 1, /* has done the post-closed part */ + mpc_drop : 1, /* the MPC option has been dropped in a rtx */ + __unused : 9; bool data_avail; bool scheduled; + bool pm_listener; /* a listener managed by the kernel PM? */ + bool fully_established; /* path validated */ u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -538,7 +580,7 @@ struct mptcp_subflow_context { static inline struct mptcp_subflow_context * mptcp_subflow_ctx(const struct sock *sk) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); /* Use RCU on icsk_ulp_data only for sock diag code */ return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; @@ -558,6 +600,43 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) WRITE_ONCE(subflow->local_id, -1); } +/* Convert reset reasons in MPTCP to enum sk_rst_reason type */ +static inline enum sk_rst_reason +sk_rst_convert_mptcp_reason(u32 reason) +{ + switch (reason) { + case MPTCP_RST_EUNSPEC: + return SK_RST_REASON_MPTCP_RST_EUNSPEC; + case MPTCP_RST_EMPTCP: + return SK_RST_REASON_MPTCP_RST_EMPTCP; + case MPTCP_RST_ERESOURCE: + return SK_RST_REASON_MPTCP_RST_ERESOURCE; + case MPTCP_RST_EPROHIBIT: + return SK_RST_REASON_MPTCP_RST_EPROHIBIT; + case MPTCP_RST_EWQ2BIG: + return SK_RST_REASON_MPTCP_RST_EWQ2BIG; + case MPTCP_RST_EBADPERF: + return SK_RST_REASON_MPTCP_RST_EBADPERF; + case MPTCP_RST_EMIDDLEBOX: + return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX; + default: + /* It should not happen, or else errors may occur + * in MPTCP layer + */ + return SK_RST_REASON_ERROR; + } +} + +static inline void +mptcp_send_active_reset_reason(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + enum sk_rst_reason reason; + + reason = sk_rst_convert_mptcp_reason(subflow->reset_reason); + tcp_send_active_reset(sk, GFP_ATOMIC, reason); +} + static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { @@ -622,6 +701,12 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); + +void mptcp_active_disable(struct sock *sk); +bool mptcp_active_should_disable(struct sock *ssk); +void mptcp_active_enable(struct sock *sk); + +void mptcp_get_available_schedulers(char *buf, size_t maxlen); void __mptcp_subflow_fully_established(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); @@ -649,7 +734,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr); /* called with sk socket lock held */ -int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, +int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, struct socket **new_sock); @@ -677,10 +762,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) static inline bool mptcp_epollin_ready(const struct sock *sk) { + u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); + + if (!data_avail) + return false; + /* mptcp doesn't have to deal with small skbs in the receive queue, - * at it can always coalesce them + * as it can always coalesce them */ - return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) || + return (data_avail >= sk->sk_rcvlowat) || (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) || READ_ONCE(tcp_memory_pressure); @@ -697,7 +787,7 @@ static inline bool __tcp_can_send(const struct sock *ssk) static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ - if (subflow->request_join && !subflow->fully_established) + if (subflow->request_join && !READ_ONCE(subflow->fully_established)) return false; return __tcp_can_send(mptcp_subflow_tcp_sock(subflow)); @@ -790,14 +880,36 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline u32 mptcp_notsent_lowat(const struct sock *sk) +{ + struct net *net = sock_net(sk); + u32 val; + + val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); +} + +static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 notsent_bytes; + + notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); + return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); +} + +static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) +{ + return mptcp_stream_memory_free(sk, wake) && + __sk_stream_is_writeable(sk, wake); +} + static inline void mptcp_write_space(struct sock *sk) { - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) - sk_stream_write_space(sk); - } + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (mptcp_stream_memory_free(sk, 1)) + sk_stream_write_space(sk); } static inline void __mptcp_sync_sndbuf(struct sock *sk) @@ -808,7 +920,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk) if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) return; - new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0]; + new_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]); mptcp_for_each_subflow(mptcp_sk(sk), subflow) { ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); @@ -901,6 +1013,8 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); +bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote); void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); @@ -920,29 +1034,19 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr); -int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex); -int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, - u8 *flags, int *ifindex); -int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, - unsigned int id, - u8 *flags, int *ifindex); -int mptcp_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); -int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup); -int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, - struct mptcp_pm_addr_entry *loc, - struct mptcp_pm_addr_entry *rem, u8 bkup); +bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, + const struct mptcp_addr_info *saddr); +bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr); +int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list); +void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -958,6 +1062,8 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -1022,6 +1128,18 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) { @@ -1034,8 +1152,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); @@ -1055,7 +1171,6 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); } -void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk); void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk); static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb) @@ -1081,9 +1196,11 @@ static inline bool mptcp_check_fallback(const struct sock *sk) static inline void __mptcp_do_fallback(struct mptcp_sock *msk) { if (__mptcp_check_fallback(msk)) { - pr_debug("TCP fallback already done (msk=%p)", msk); + pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } + if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) + return; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } @@ -1117,7 +1234,15 @@ static inline void mptcp_do_fallback(struct sock *ssk) } } -#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) +#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) + +static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + pr_fallback(msk); + subflow->request_mptcp = 0; + __mptcp_do_fallback(msk); +} static inline bool mptcp_check_infinite_map(struct sk_buff *skb) { |