diff options
Diffstat (limited to 'net')
87 files changed, 1834 insertions, 913 deletions
diff --git a/net/9p/mod.c b/net/9p/mod.c index eb9777f05755..253ba824a325 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -171,13 +171,11 @@ void v9fs_put_trans(struct p9_trans_module *m) */ static int __init init_p9(void) { - int ret = 0; - p9_error_init(); pr_info("Installing 9P2000 support\n"); p9_trans_fd_init(); - return ret; + return 0; } /** diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 754714c8d752..f12555f23a49 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1132,7 +1132,6 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_cp_le_set_ext_adv_enable *cp; - struct hci_cp_ext_adv_set *adv_set; __u8 status = *((__u8 *) skb->data); BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -1144,8 +1143,6 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, if (!cp) return; - adv_set = (void *) cp->data; - hci_dev_lock(hdev); if (cp->enable) { @@ -1483,6 +1480,30 @@ static void hci_cc_le_read_resolv_list_size(struct hci_dev *hdev, hdev->le_resolv_list_size = rp->size; } +static void hci_cc_le_set_addr_resolution_enable(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 *sent, status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (status) + return; + + sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE); + if (!sent) + return; + + hci_dev_lock(hdev); + + if (*sent) + hci_dev_set_flag(hdev, HCI_LL_RPA_RESOLUTION); + else + hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION); + + hci_dev_unlock(hdev); +} + static void hci_cc_le_read_max_data_len(struct hci_dev *hdev, struct sk_buff *skb) { @@ -3266,6 +3287,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cc_le_read_resolv_list_size(hdev, skb); break; + case HCI_OP_LE_SET_ADDR_RESOLV_ENABLE: + hci_cc_le_set_addr_resolution_enable(hdev, skb); + break; + case HCI_OP_LE_READ_MAX_DATA_LEN: hci_cc_le_read_max_data_len(hdev, skb); break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 231602f7cb66..3bdc8f3ca259 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3356,7 +3356,6 @@ int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip) static void set_default_phy_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { - struct mgmt_cp_set_phy_confguration *cp; struct mgmt_pending_cmd *cmd; BT_DBG("status 0x%02x", status); @@ -3367,8 +3366,6 @@ static void set_default_phy_complete(struct hci_dev *hdev, u8 status, if (!cmd) goto unlock; - cp = cmd->param; - if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_PHY_CONFIGURATION, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 22a78eedf4b1..f4078830ea50 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -11,12 +11,14 @@ #include <linux/filter.h> #include <linux/sched/signal.h> -static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) +static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, + struct bpf_cgroup_storage *storage) { u32 ret; preempt_disable(); rcu_read_lock(); + bpf_cgroup_storage_set(storage); ret = BPF_PROG_RUN(prog, ctx); rcu_read_unlock(); preempt_enable(); @@ -26,14 +28,19 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) { + struct bpf_cgroup_storage *storage = NULL; u64 time_start, time_spent = 0; u32 ret = 0, i; + storage = bpf_cgroup_storage_alloc(prog); + if (IS_ERR(storage)) + return PTR_ERR(storage); + if (!repeat) repeat = 1; time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - ret = bpf_test_run_one(prog, ctx); + ret = bpf_test_run_one(prog, ctx, storage); if (need_resched()) { if (signal_pending(current)) break; @@ -46,6 +53,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) do_div(time_spent, repeat); *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + bpf_cgroup_storage_free(storage); + return ret; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 920665dd92db..20ed7adcf1cc 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1423,10 +1423,10 @@ static void br_multicast_query_received(struct net_bridge *br, br_multicast_mark_router(br, port); } -static int br_ip4_multicast_query(struct net_bridge *br, - struct net_bridge_port *port, - struct sk_buff *skb, - u16 vid) +static void br_ip4_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) { const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); @@ -1439,7 +1439,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; unsigned int offset = skb_transport_offset(skb); __be32 group; - int err = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1498,7 +1497,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, out: spin_unlock(&br->multicast_lock); - return err; } #if IS_ENABLED(CONFIG_IPV6) @@ -1828,7 +1826,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb_trimmed, vid); + br_ip4_multicast_query(br, port, skb_trimmed, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid, src); diff --git a/net/compat.c b/net/compat.c index 7242cce5631b..3b2105f6549d 100644 --- a/net/compat.c +++ b/net/compat.c @@ -466,8 +466,7 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) ctv = (struct compat_timeval __user *) userstamp; err = -ENOENT; - if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk, SOCK_TIMESTAMP); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); tv = ktime_to_timeval(sk->sk_stamp); if (tv.tv_sec == -1) return err; @@ -494,8 +493,7 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta ctv = (struct compat_timespec __user *) userstamp; err = -ENOENT; - if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk, SOCK_TIMESTAMP); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); ts = ktime_to_timespec(sk->sk_stamp); if (ts.tv_sec == -1) return err; diff --git a/net/core/dev.c b/net/core/dev.c index f68122f0ab02..325fc5088370 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2176,6 +2176,7 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset, if (!static_key_false(&xps_needed)) return; + cpus_read_lock(); mutex_lock(&xps_map_mutex); if (static_key_false(&xps_rxqs_needed)) { @@ -2199,10 +2200,11 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset, out_no_maps: if (static_key_enabled(&xps_rxqs_needed)) - static_key_slow_dec(&xps_rxqs_needed); + static_key_slow_dec_cpuslocked(&xps_rxqs_needed); - static_key_slow_dec(&xps_needed); + static_key_slow_dec_cpuslocked(&xps_needed); mutex_unlock(&xps_map_mutex); + cpus_read_unlock(); } static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) @@ -2250,6 +2252,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index, return new_map; } +/* Must be called under cpus_read_lock */ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, u16 index, bool is_rxqs_map) { @@ -2317,9 +2320,9 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, if (!new_dev_maps) goto out_no_new_maps; - static_key_slow_inc(&xps_needed); + static_key_slow_inc_cpuslocked(&xps_needed); if (is_rxqs_map) - static_key_slow_inc(&xps_rxqs_needed); + static_key_slow_inc_cpuslocked(&xps_rxqs_needed); for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), j < nr_ids;) { @@ -2448,11 +2451,18 @@ error: kfree(new_dev_maps); return -ENOMEM; } +EXPORT_SYMBOL_GPL(__netif_set_xps_queue); int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index) { - return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false); + int ret; + + cpus_read_lock(); + ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, false); + cpus_read_unlock(); + + return ret; } EXPORT_SYMBOL(netif_set_xps_queue); diff --git a/net/core/filter.c b/net/core/filter.c index 7509bb7f0694..587bbfbd7db3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3814,6 +3814,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) +{ + return sock_gen_cookie(ctx->sk); +} + +static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = { + .func = bpf_get_socket_cookie_sock_addr, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) +{ + return sock_gen_cookie(ctx->sk); +} + +static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { + .func = bpf_get_socket_cookie_sock_ops, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) { struct sock *sk = sk_to_full_sk(skb->sk); @@ -4544,26 +4568,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset, { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; void *srh_tlvs, *srh_end, *ptr; - struct ipv6_sr_hdr *srh; int srhoff = 0; - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + if (srh == NULL) return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4)); srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen); ptr = skb->data + offset; if (ptr >= srh_tlvs && ptr + len <= srh_end) - srh_state->valid = 0; + srh_state->valid = false; else if (ptr < (void *)&srh->flags || ptr + len > (void *)&srh->segments) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return -EINVAL; + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); memcpy(skb->data + offset, from, len); return 0; @@ -4579,52 +4605,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { .arg4_type = ARG_CONST_SIZE }; -BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, - u32, action, void *, param, u32, param_len) +static void bpf_update_srh_state(struct sk_buff *skb) { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); - struct ipv6_sr_hdr *srh; int srhoff = 0; - int err; - - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) - return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - if (!srh_state->valid) { - if (unlikely((srh_state->hdrlen & 7) != 0)) - return -EBADMSG; - - srh->hdrlen = (u8)(srh_state->hdrlen >> 3); - if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))) - return -EBADMSG; - - srh_state->valid = 1; + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) { + srh_state->srh = NULL; + } else { + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + srh_state->hdrlen = srh_state->srh->hdrlen << 3; + srh_state->valid = true; } +} + +BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, + u32, action, void *, param, u32, param_len) +{ + struct seg6_bpf_srh_state *srh_state = + this_cpu_ptr(&seg6_bpf_srh_states); + int hdroff = 0; + int err; switch (action) { case SEG6_LOCAL_ACTION_END_X: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; if (param_len != sizeof(struct in6_addr)) return -EINVAL; return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0); case SEG6_LOCAL_ACTION_END_T: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; if (param_len != sizeof(int)) return -EINVAL; return seg6_lookup_nexthop(skb, NULL, *(int *)param); + case SEG6_LOCAL_ACTION_END_DT6: + if (!seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; + if (param_len != sizeof(int)) + return -EINVAL; + + if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0) + return -EBADMSG; + if (!pskb_pull(skb, hdroff)) + return -EBADMSG; + + skb_postpull_rcsum(skb, skb_network_header(skb), hdroff); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + bpf_compute_data_pointers(skb); + bpf_update_srh_state(skb); + return seg6_lookup_nexthop(skb, NULL, *(int *)param); case SEG6_LOCAL_ACTION_END_B6: + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE, param, param_len); if (!err) - srh_state->hdrlen = - ((struct ipv6_sr_hdr *)param)->hdrlen << 3; + bpf_update_srh_state(skb); + return err; case SEG6_LOCAL_ACTION_END_B6_ENCAP: + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) + return -EBADMSG; err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6, param, param_len); if (!err) - srh_state->hdrlen = - ((struct ipv6_sr_hdr *)param)->hdrlen << 3; + bpf_update_srh_state(skb); + return err; default: return -EINVAL; @@ -4646,15 +4698,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; void *srh_end, *srh_tlvs, *ptr; - struct ipv6_sr_hdr *srh; struct ipv6hdr *hdr; int srhoff = 0; int ret; - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + if (unlikely(srh == NULL)) return -EINVAL; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) + ((srh->first_segment + 1) << 4)); @@ -4684,8 +4735,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, hdr = (struct ipv6hdr *)skb->data; hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return -EINVAL; + srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_state->hdrlen += len; - srh_state->valid = 0; + srh_state->valid = false; return 0; } @@ -4768,6 +4822,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) */ case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4790,6 +4846,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) default: return NULL; } + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_sock_addr_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4813,6 +4873,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } static const struct bpf_func_proto * +cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; + default: + return sk_filter_func_proto(func_id, prog); + } +} + +static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { @@ -4932,6 +5003,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_map_update_proto; case BPF_FUNC_sock_hash_update: return &bpf_sock_hash_update_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_sock_ops_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4951,6 +5026,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_cork_bytes_proto; case BPF_FUNC_msg_pull_data: return &bpf_msg_pull_data_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -4978,6 +5055,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; + case BPF_FUNC_get_local_storage: + return &bpf_get_local_storage_proto; default: return bpf_base_func_proto(func_id); } @@ -6782,7 +6861,7 @@ const struct bpf_prog_ops xdp_prog_ops = { }; const struct bpf_verifier_ops cg_skb_verifier_ops = { - .get_func_proto = sk_filter_func_proto, + .get_func_proto = cg_skb_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 08a5184f4b34..ce9eeeb7c024 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_IP)) + FLOW_DISSECTOR_KEY_ENC_IP) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS)) return; info = skb_tunnel_info(skb); @@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, ip->tos = key->tos; ip->ttl = key->ttl; } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + struct flow_dissector_key_enc_opts *enc_opt; + + enc_opt = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_OPTS, + target_container); + + if (info->options_len) { + enc_opt->len = info->options_len; + ip_tunnel_info_opts_get(enc_opt->data, info); + enc_opt->dst_opt_type = info->key.tun_flags & + TUNNEL_OPTIONS_PRESENT; + } + } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 98fd12721221..e4e442d70c2d 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -112,7 +112,7 @@ static void est_timer(struct timer_list *t) * @bstats: basic statistics * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics - * @stats_lock: statistics lock + * @lock: lock for statistics and control path * @running: qdisc running seqcount * @opt: rate estimator configuration TLV * @@ -128,7 +128,7 @@ static void est_timer(struct timer_list *t) int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, - spinlock_t *stats_lock, + spinlock_t *lock, seqcount_t *running, struct nlattr *opt) { @@ -154,19 +154,22 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, seqcount_init(&est->seq); intvl_log = parm->interval + 2; est->bstats = bstats; - est->stats_lock = stats_lock; + est->stats_lock = lock; est->running = running; est->ewma_log = parm->ewma_log; est->intvl_log = intvl_log; est->cpu_bstats = cpu_bstats; - if (stats_lock) + if (lock) local_bh_disable(); est_fetch_counters(est, &b); - if (stats_lock) + if (lock) local_bh_enable(); est->last_bytes = b.bytes; est->last_packets = b.packets; + + if (lock) + spin_lock_bh(lock); old = rcu_dereference_protected(*rate_est, 1); if (old) { del_timer_sync(&old->timer); @@ -179,6 +182,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, mod_timer(&est->timer, est->next_jiffies); rcu_assign_pointer(*rate_est, est); + if (lock) + spin_unlock_bh(lock); if (old) kfree_rcu(old, rcu); return 0; @@ -209,7 +214,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * @bstats: basic statistics * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics - * @stats_lock: statistics lock + * @lock: lock for statistics and control path * @running: qdisc running seqcount (might be NULL) * @opt: rate estimator configuration TLV * @@ -221,11 +226,11 @@ EXPORT_SYMBOL(gen_kill_estimator); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, - spinlock_t *stats_lock, + spinlock_t *lock, seqcount_t *running, struct nlattr *opt) { return gen_new_estimator(bstats, cpu_bstats, rate_est, - stats_lock, running, opt); + lock, running, opt); } EXPORT_SYMBOL(gen_replace_estimator); diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index e45098593dc0..3e85437f7106 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -50,10 +50,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, * mixing with BH RCU lock doesn't work. */ preempt_disable(); - rcu_read_lock(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); - rcu_read_unlock(); switch (ret) { case BPF_OK: diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 0a95bcf64cdc..bd67c4d0fcfd 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -26,6 +26,7 @@ #include <linux/pm_runtime.h> #include <linux/of.h> #include <linux/of_net.h> +#include <linux/cpu.h> #include "net-sysfs.h" @@ -1400,7 +1401,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } + cpus_read_lock(); err = __netif_set_xps_queue(dev, mask, index, true); + cpus_read_unlock(); + kfree(mask); return err ? : len; } diff --git a/net/core/sock.c b/net/core/sock.c index e31233f5ba39..3730eb855095 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2900,8 +2900,8 @@ EXPORT_SYMBOL(lock_sock_fast); int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct timeval tv; - if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk, SOCK_TIMESTAMP); + + sock_enable_timestamp(sk, SOCK_TIMESTAMP); tv = ktime_to_timeval(sk->sk_stamp); if (tv.tv_sec == -1) return -ENOENT; @@ -2916,8 +2916,8 @@ EXPORT_SYMBOL(sock_get_timestamp); int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) { struct timespec ts; - if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk, SOCK_TIMESTAMP); + + sock_enable_timestamp(sk, SOCK_TIMESTAMP); ts = ktime_to_timespec(sk->sk_stamp); if (ts.tv_sec == -1) return -ENOENT; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 2b75df469220..842a9c7c73a3 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -229,14 +229,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now) struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); u32 cwnd = hc->tx_cwnd, restart_cwnd, iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + s32 delta = now - hc->tx_lsndtime; hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); /* don't reduce cwnd below the initial window (IW) */ restart_cwnd = min(cwnd, iwnd); - cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; - hc->tx_cwnd = max(cwnd, restart_cwnd); + while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd) + cwnd >>= 1; + hc->tx_cwnd = max(cwnd, restart_cwnd); hc->tx_cwnd_stamp = now; hc->tx_cwnd_used = 0; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 3107a2e24e6b..1c002c0fb712 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -404,7 +404,7 @@ void dn_rt_cache_flush(int delay) if (delay <= 0) { spin_unlock_bh(&dn_rt_flush_lock); - dn_run_flush(0); + dn_run_flush(NULL); return; } @@ -1920,7 +1920,7 @@ void __init dn_route_init(void) void __exit dn_route_cleanup(void) { del_timer(&dn_route_timer); - dn_run_flush(0); + dn_run_flush(NULL); remove_proc_entry("decnet_cache", init_net.proc_net); dst_entries_destroy(&dn_dst_ops); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 1ba3bde96b55..962c4fd338ba 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -639,7 +639,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev) + if (!dev->phydev && !dp->pl) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -659,7 +659,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev) + if (!dev->phydev && !dp->pl) return -ENODEV; if (!ds->ops->get_mac_eee) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index ec7a5da56129..e7857a8ac86d 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -40,9 +40,6 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { const struct frag_lowpan_compare_key *key = a; - struct lowpan_frag_queue *fq; - - fq = container_of(q, struct lowpan_frag_queue, q); BUILD_BUG_ON(sizeof(*key) > sizeof(q->key)); memcpy(&q->key, key, sizeof(*key)); @@ -52,10 +49,8 @@ static void lowpan_frag_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; - struct net *net; fq = container_of(frag, struct frag_queue, q); - net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags); spin_lock(&fq->q.lock); diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index e6ff5128e61a..ca53efa17be1 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -265,9 +265,24 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) /* We must take a copy of the skb before we modify/replace the ipv6 * header as the header could be used elsewhere */ - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - return NET_XMIT_DROP; + if (unlikely(skb_headroom(skb) < ldev->needed_headroom || + skb_tailroom(skb) < ldev->needed_tailroom)) { + struct sk_buff *nskb; + + nskb = skb_copy_expand(skb, ldev->needed_headroom, + ldev->needed_tailroom, GFP_ATOMIC); + if (likely(nskb)) { + consume_skb(skb); + skb = nskb; + } else { + kfree_skb(skb); + return NET_XMIT_DROP; + } + } else { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return NET_XMIT_DROP; + } ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset); if (ret < 0) { diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a60658c85a9a..bc6b912603f1 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -25,6 +25,7 @@ #include <linux/termios.h> /* For TIOCOUTQ/INQ */ #include <linux/list.h> #include <linux/slab.h> +#include <linux/socket.h> #include <net/datalink.h> #include <net/psnap.h> #include <net/sock.h> @@ -452,6 +453,7 @@ struct dgram_sock { unsigned int bound:1; unsigned int connected:1; unsigned int want_ack:1; + unsigned int want_lqi:1; unsigned int secen:1; unsigned int secen_override:1; unsigned int seclevel:3; @@ -486,6 +488,7 @@ static int dgram_init(struct sock *sk) struct dgram_sock *ro = dgram_sk(sk); ro->want_ack = 1; + ro->want_lqi = 0; return 0; } @@ -713,6 +716,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; + struct dgram_sock *ro = dgram_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name); skb = skb_recv_datagram(sk, flags, noblock, &err); @@ -744,6 +748,13 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, *addr_len = sizeof(*saddr); } + if (ro->want_lqi) { + err = put_cmsg(msg, SOL_IEEE802154, WPAN_WANTLQI, + sizeof(uint8_t), &(mac_cb(skb)->lqi)); + if (err) + goto done; + } + if (flags & MSG_TRUNC) copied = skb->len; done: @@ -847,6 +858,9 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: val = ro->want_ack; break; + case WPAN_WANTLQI: + val = ro->want_lqi; + break; case WPAN_SECURITY: if (!ro->secen_override) val = WPAN_SECURITY_DEFAULT; @@ -892,6 +906,9 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname, case WPAN_WANTACK: ro->want_ack = !!val; break; + case WPAN_WANTLQI: + ro->want_lqi = !!val; + break; case WPAN_SECURITY: if (!ns_capable(net->user_ns, CAP_NET_ADMIN) && !ns_capable(net->user_ns, CAP_NET_RAW)) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6d258a5669e7..bcb11f3a27c0 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -146,7 +146,7 @@ void inet_frag_destroy(struct inet_frag_queue *q) fp = xp; } while (fp); } else { - sum_truesize = skb_rbtree_purge(&q->rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); } sum = sum_truesize + f->qsize; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0e8f8de77e71..88281fbce88c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -57,6 +57,57 @@ */ static const char ip_frag_cache_name[] = "ip4-frags"; +/* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. + * + * Invariants: + * - next_frag is NULL at the tail of a "run"; + * - the head of a "run" has the sum of all fragment lengths in frag_run_len. + */ +struct ipfrag_skb_cb { + struct inet_skb_parm h; + struct sk_buff *next_frag; + int frag_run_len; +}; + +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) + +static void ip4_frag_init_run(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); + + FRAG_CB(skb)->next_frag = NULL; + FRAG_CB(skb)->frag_run_len = skb->len; +} + +/* Append skb to the last "run". */ +static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, + struct sk_buff *skb) +{ + RB_CLEAR_NODE(&skb->rbnode); + FRAG_CB(skb)->next_frag = NULL; + + FRAG_CB(q->last_run_head)->frag_run_len += skb->len; + FRAG_CB(q->fragments_tail)->next_frag = skb; + q->fragments_tail = skb; +} + +/* Create a new "run" with the skb. */ +static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) +{ + if (q->last_run_head) + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, + &q->last_run_head->rbnode.rb_right); + else + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); + rb_insert_color(&skb->rbnode, &q->rb_fragments); + + ip4_frag_init_run(skb); + q->fragments_tail = skb; + q->last_run_head = skb; +} + /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; @@ -75,8 +126,8 @@ static u8 ip4_frag_ecn(u8 tos) static struct inet_frags ip4_frags; -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, - struct net_device *dev); +static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); static void ip4_frag_init(struct inet_frag_queue *q, const void *a) @@ -154,7 +205,7 @@ static void ip_expire(struct timer_list *t) __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); - if (!qp->q.flags & INET_FRAG_FIRST_IN) + if (!(qp->q.flags & INET_FRAG_FIRST_IN)) goto out; /* sk_buff::dev and sk_buff::rbnode are unionized. So we @@ -168,7 +219,12 @@ static void ip_expire(struct timer_list *t) head = skb_rb_first(&qp->q.rb_fragments); if (!head) goto out; - rb_erase(&head->rbnode, &qp->q.rb_fragments); + if (FRAG_CB(head)->next_frag) + rb_replace_node(&head->rbnode, + &FRAG_CB(head)->next_frag->rbnode, + &qp->q.rb_fragments); + else + rb_erase(&head->rbnode, &qp->q.rb_fragments); memset(&head->rbnode, 0, sizeof(head->rbnode)); barrier(); } @@ -269,7 +325,7 @@ static int ip_frag_reinit(struct ipq *qp) return -ETIMEDOUT; } - sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; @@ -278,6 +334,7 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; + qp->q.last_run_head = NULL; qp->iif = 0; qp->ecn = 0; @@ -289,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct rb_node **rbn, *parent; - struct sk_buff *skb1; + struct sk_buff *skb1, *prev_tail; struct net_device *dev; unsigned int fragsize; int flags, offset; @@ -367,38 +424,41 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) */ /* Find out where to put this fragment. */ - skb1 = qp->q.fragments_tail; - if (!skb1) { - /* This is the first fragment we've received. */ - rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node); - qp->q.fragments_tail = skb; - } else if ((skb1->ip_defrag_offset + skb1->len) < end) { - /* This is the common/special case: skb goes to the end. */ + prev_tail = qp->q.fragments_tail; + if (!prev_tail) + ip4_frag_create_run(&qp->q, skb); /* First fragment. */ + else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { + /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < (skb1->ip_defrag_offset + skb1->len)) + if (offset < prev_tail->ip_defrag_offset + prev_tail->len) goto discard_qp; - /* Insert after skb1. */ - rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right); - qp->q.fragments_tail = skb; + if (offset == prev_tail->ip_defrag_offset + prev_tail->len) + ip4_frag_append_to_last_run(&qp->q, skb); + else + ip4_frag_create_run(&qp->q, skb); } else { - /* Binary search. Note that skb can become the first fragment, but - * not the last (covered above). */ + /* Binary search. Note that skb can become the first fragment, + * but not the last (covered above). + */ rbn = &qp->q.rb_fragments.rb_node; do { parent = *rbn; skb1 = rb_to_skb(parent); if (end <= skb1->ip_defrag_offset) rbn = &parent->rb_left; - else if (offset >= skb1->ip_defrag_offset + skb1->len) + else if (offset >= skb1->ip_defrag_offset + + FRAG_CB(skb1)->frag_run_len) rbn = &parent->rb_right; else /* Found an overlap with skb1. */ goto discard_qp; } while (*rbn); /* Here we have parent properly set, and rbn pointing to - * one of its NULL left/right children. Insert skb. */ + * one of its NULL left/right children. Insert skb. + */ + ip4_frag_init_run(skb); rb_link_node(&skb->rbnode, parent, rbn); + rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); } - rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); if (dev) qp->iif = dev->ifindex; @@ -425,7 +485,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - err = ip_frag_reasm(qp, skb, dev); + err = ip_frag_reasm(qp, skb, prev_tail, dev); skb->_skb_refdst = orefdst; return err; } @@ -444,7 +504,7 @@ err: /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, - struct net_device *dev) + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; @@ -468,10 +528,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, fp = skb_clone(skb, GFP_ATOMIC); if (!fp) goto out_nomem; - rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments); + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(prev_tail)->next_frag = fp; + else + rb_replace_node(&skb->rbnode, &fp->rbnode, + &qp->q.rb_fragments); if (qp->q.fragments_tail == skb) qp->q.fragments_tail = fp; skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, &qp->q.rb_fragments); consume_skb(head); @@ -507,7 +573,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, for (i = 0; i < skb_shinfo(head)->nr_frags; i++) plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; - skb->truesize += clone->truesize; + head->truesize += clone->truesize; clone->csum = 0; clone->ip_summed = head->ip_summed; add_frag_mem_limit(qp->q.net, clone->truesize); @@ -520,24 +586,36 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, skb_push(head, head->data - skb_network_header(head)); /* Traverse the tree in order, to build frag_list. */ + fp = FRAG_CB(head)->next_frag; rbn = rb_next(&head->rbnode); rb_erase(&head->rbnode, &qp->q.rb_fragments); - while (rbn) { - struct rb_node *rbnext = rb_next(rbn); - fp = rb_to_skb(rbn); - rb_erase(rbn, &qp->q.rb_fragments); - rbn = rbnext; - *nextp = fp; - nextp = &fp->next; - fp->prev = NULL; - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; + while (rbn || fp) { + /* fp points to the next sk_buff in the current run; + * rbn points to the next run. + */ + /* Go through the current run. */ + while (fp) { + *nextp = fp; + nextp = &fp->next; + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_COMPLETE) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + fp = FRAG_CB(fp)->next_frag; + } + /* Move to the next run. */ + if (rbn) { + struct rb_node *rbnext = rb_next(rbn); + + fp = rb_to_skb(rbn); + rb_erase(rbn, &qp->q.rb_fragments); + rbn = rbnext; + } } sub_frag_mem_limit(qp->q.net, head->truesize); @@ -573,6 +651,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; + qp->q.last_run_head = NULL; return 0; out_nomem: @@ -654,6 +733,28 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) } EXPORT_SYMBOL(ip_check_defrag); +unsigned int inet_frag_rbtree_purge(struct rb_root *root) +{ + struct rb_node *p = rb_first(root); + unsigned int sum = 0; + + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); + + p = rb_next(p); + rb_erase(&skb->rbnode, root); + while (skb) { + struct sk_buff *next = FRAG_CB(skb)->next_frag; + + sum += skb->truesize; + kfree_skb(skb); + skb = next; + } + } + return sum; +} +EXPORT_SYMBOL(inet_frag_rbtree_purge); + #ifdef CONFIG_SYSCTL static int dist_min; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 8b637f9f23a2..ca61e2a659e7 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -136,7 +136,7 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) __tcp_send_ack(sk, ca->prior_rcv_nxt); - tcp_enter_quickack_mode(sk, 1); + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } ca->prior_rcv_nxt = tp->rcv_nxt; @@ -157,7 +157,7 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) */ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) __tcp_send_ack(sk, ca->prior_rcv_nxt); - tcp_enter_quickack_mode(sk, 1); + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } ca->prior_rcv_nxt = tp->rcv_nxt; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 715d541b52dd..4c2dd9f863f7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -245,16 +245,16 @@ static void tcp_ecn_queue_cwr(struct tcp_sock *tp) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } -static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) +static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) { if (tcp_hdr(skb)->cwr) { - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + tcp_sk(sk)->ecn_flags &= ~TCP_ECN_DEMAND_CWR; /* If the sender is telling us it has entered CWR, then its * cwnd may be very low (even just 1 packet), so we should ACK * immediately. */ - tcp_enter_quickack_mode((struct sock *)tp, 2); + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } } @@ -4703,7 +4703,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); - tcp_ecn_accept_cwr(tp, skb); + tcp_ecn_accept_cwr(sk, skb); tp->rx_opt.dsack = 0; @@ -4735,11 +4735,11 @@ queue_and_out: if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); - /* RFC2581. 4.2. SHOULD send immediate ACK, when + /* RFC5681. 4.2. SHOULD send immediate ACK, when * gap in queue is filled. */ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) - inet_csk(sk)->icsk_ack.pingpong = 0; + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } if (tp->rx_opt.num_sacks) @@ -5179,7 +5179,9 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || __tcp_select_window(sk) >= tp->rcv_wnd)) || /* We ACK each frame or... */ - tcp_in_quickack_mode(sk)) { + tcp_in_quickack_mode(sk) || + /* Protocol state mandates a one-time immediate ACK */ + inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) { send_now: tcp_send_ack(sk); return; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 020f6e14a7af..673bba31eb18 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -832,6 +832,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; + net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 7f6b1f81c200..c9c53ade55c3 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -794,6 +794,7 @@ out: static int icmpv6_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; @@ -843,7 +844,8 @@ static int icmpv6_rcv(struct sk_buff *skb) switch (type) { case ICMPV6_ECHO_REQUEST: - icmpv6_echo_reply(skb); + if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) + icmpv6_echo_reply(skb); break; case ICMPV6_ECHO_REPLY: @@ -1104,6 +1106,13 @@ static struct ctl_table ipv6_icmp_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, + { + .procname = "echo_ignore_all", + .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { }, }; @@ -1115,9 +1124,10 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) sizeof(ipv6_icmp_table_template), GFP_KERNEL); - if (table) + if (table) { table[0].data = &net->ipv6.sysctl.icmpv6_time; - + table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; + } return table; } #endif diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fc7dd3a04360..18a3794b0f52 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1129,7 +1129,7 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, return; if (rt->dst.dev) { - dev->hard_header_len = rt->dst.dev->hard_header_len + + dev->needed_headroom = rt->dst.dev->hard_header_len + t_hlen; if (set_mtu) { @@ -1155,7 +1155,7 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; return t_hlen; } @@ -1825,7 +1825,7 @@ static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel) erspan_hdr_len(tunnel->parms.erspan_ver); t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen; return t_hlen; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 00e138a44cbb..5df2a58d945c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1113,7 +1113,7 @@ route_lookup: dst = NULL; goto tx_err_link_failure; } - if (t->parms.collect_md && + if (t->parms.collect_md && ipv6_addr_any(&fl6->saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, &fl6->daddr, 0, &fl6->saddr)) goto tx_err_link_failure; @@ -1133,12 +1133,8 @@ route_lookup: max_headroom += 8; mtu -= 8; } - if (skb->protocol == htons(ETH_P_IPV6)) { - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - } else if (mtu < 576) { - mtu = 576; - } + mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? + IPV6_MIN_MTU : IPV4_MIN_MTU); skb_dst_update_pmtu(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { @@ -1255,6 +1251,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPIP; + fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; @@ -1326,6 +1323,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPV6; + fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ec18b3ce8b6d..7208c16302f6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -978,10 +978,6 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) rt->rt6i_flags &= ~RTF_EXPIRES; rcu_assign_pointer(rt->from, from); dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); - if (from->fib6_metrics != &dst_default_metrics) { - rt->dst._metrics |= DST_METRICS_REFCOUNTED; - refcount_inc(&from->fib6_metrics->refcnt); - } } /* Caller must already hold reference to @ort */ diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index e1025b493a18..60325dbfe88b 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -459,36 +459,57 @@ drop: DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); +bool seg6_bpf_has_valid_srh(struct sk_buff *skb) +{ + struct seg6_bpf_srh_state *srh_state = + this_cpu_ptr(&seg6_bpf_srh_states); + struct ipv6_sr_hdr *srh = srh_state->srh; + + if (unlikely(srh == NULL)) + return false; + + if (unlikely(!srh_state->valid)) { + if ((srh_state->hdrlen & 7) != 0) + return false; + + srh->hdrlen = (u8)(srh_state->hdrlen >> 3); + if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)) + return false; + + srh_state->valid = true; + } + + return true; +} + static int input_action_end_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct seg6_bpf_srh_state *srh_state = this_cpu_ptr(&seg6_bpf_srh_states); - struct seg6_bpf_srh_state local_srh_state; struct ipv6_sr_hdr *srh; - int srhoff = 0; int ret; srh = get_and_validate_srh(skb); - if (!srh) - goto drop; + if (!srh) { + kfree_skb(skb); + return -EINVAL; + } advance_nextseg(srh, &ipv6_hdr(skb)->daddr); /* preempt_disable is needed to protect the per-CPU buffer srh_state, * which is also accessed by the bpf_lwt_seg6_* helpers */ preempt_disable(); + srh_state->srh = srh; srh_state->hdrlen = srh->hdrlen << 3; - srh_state->valid = 1; + srh_state->valid = true; rcu_read_lock(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb); rcu_read_unlock(); - local_srh_state = *srh_state; - preempt_enable(); - switch (ret) { case BPF_OK: case BPF_REDIRECT: @@ -500,24 +521,17 @@ static int input_action_end_bpf(struct sk_buff *skb, goto drop; } - if (unlikely((local_srh_state.hdrlen & 7) != 0)) - goto drop; - - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) - goto drop; - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3); - - if (!local_srh_state.valid && - unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))) + if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) goto drop; + preempt_enable(); if (ret != BPF_REDIRECT) seg6_lookup_nexthop(skb, NULL, 0); return dst_input(skb); drop: + preempt_enable(); kfree_skb(skb); return -EINVAL; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 92ee91e34395..a21d8ed0a325 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -150,7 +150,6 @@ static int afiucv_pm_freeze(struct device *dev) { struct iucv_sock *iucv; struct sock *sk; - int err = 0; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "afiucv_pm_freeze\n"); @@ -175,7 +174,7 @@ static int afiucv_pm_freeze(struct device *dev) skb_queue_purge(&iucv->backlog_skb_q); } read_unlock(&iucv_sk_list.lock); - return err; + return 0; } /** diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ac6a00bcec71..82cdf9020b53 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -203,44 +203,44 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) } EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth); -/* Lookup a session. A new reference is held on the returned session. */ -struct l2tp_session *l2tp_session_get(const struct net *net, - struct l2tp_tunnel *tunnel, - u32 session_id) +struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, + u32 session_id) { struct hlist_head *session_list; struct l2tp_session *session; - if (!tunnel) { - struct l2tp_net *pn = l2tp_pernet(net); - - session_list = l2tp_session_id_hash_2(pn, session_id); + session_list = l2tp_session_id_hash(tunnel, session_id); - rcu_read_lock_bh(); - hlist_for_each_entry_rcu(session, session_list, global_hlist) { - if (session->session_id == session_id) { - l2tp_session_inc_refcount(session); - rcu_read_unlock_bh(); + read_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session, session_list, hlist) + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + read_unlock_bh(&tunnel->hlist_lock); - return session; - } + return session; } - rcu_read_unlock_bh(); + read_unlock_bh(&tunnel->hlist_lock); - return NULL; - } + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_get_session); - session_list = l2tp_session_id_hash(tunnel, session_id); - read_lock_bh(&tunnel->hlist_lock); - hlist_for_each_entry(session, session_list, hlist) { +struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id) +{ + struct hlist_head *session_list; + struct l2tp_session *session; + + session_list = l2tp_session_id_hash_2(l2tp_pernet(net), session_id); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(session, session_list, global_hlist) if (session->session_id == session_id) { l2tp_session_inc_refcount(session); - read_unlock_bh(&tunnel->hlist_lock); + rcu_read_unlock_bh(); return session; } - } - read_unlock_bh(&tunnel->hlist_lock); + rcu_read_unlock_bh(); return NULL; } @@ -872,7 +872,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) } /* Find the session context */ - session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id); + session = l2tp_tunnel_get_session(tunnel, session_id); if (!session || !session->recv_skb) { if (session) l2tp_session_dec_refcount(session); @@ -1098,7 +1098,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Get routing info from the tunnel socket */ skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0))); + skb_dst_set(skb, sk_dst_check(sk, 0)); inet = inet_sk(sk); fl = &inet->cork.fl; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 5804065dfbfb..8480a0af973e 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -15,6 +15,10 @@ #include <net/dst.h> #include <net/sock.h> +#ifdef CONFIG_XFRM +#include <net/xfrm.h> +#endif + /* Just some random numbers */ #define L2TP_TUNNEL_MAGIC 0x42114DDA #define L2TP_SESSION_MAGIC 0x0C04EB7D @@ -192,12 +196,12 @@ static inline void *l2tp_session_priv(struct l2tp_session *session) struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth); +struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, + u32 session_id); void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); -struct l2tp_session *l2tp_session_get(const struct net *net, - struct l2tp_tunnel *tunnel, - u32 session_id); +struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); @@ -284,6 +288,21 @@ static inline u32 l2tp_tunnel_dst_mtu(const struct l2tp_tunnel *tunnel) return mtu; } +#ifdef CONFIG_XFRM +static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) +{ + struct sock *sk = tunnel->sock; + + return sk && (rcu_access_pointer(sk->sk_policy[0]) || + rcu_access_pointer(sk->sk_policy[1])); +} +#else +static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) +{ + return false; +} +#endif + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0bc39cc20a3f..35f6f86d4dcc 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -144,7 +144,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, NULL, session_id); + session = l2tp_session_get(net, session_id); if (!session) goto discard; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 42f828cf62fb..237f1a4a0b0c 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -157,7 +157,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, NULL, session_id); + session = l2tp_session_get(net, session_id); if (!session) goto discard; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 2e1e92651545..edbd5d1fbcde 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -66,7 +66,7 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info) session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_get(net, tunnel_id); if (tunnel) { - session = l2tp_session_get(net, tunnel, session_id); + session = l2tp_tunnel_get_session(tunnel, session_id); l2tp_tunnel_dec_refcount(tunnel); } } @@ -627,7 +627,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf &cfg); if (ret >= 0) { - session = l2tp_session_get(net, tunnel, session_id); + session = l2tp_tunnel_get_session(tunnel, session_id); if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); @@ -710,9 +710,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl void *hdr; struct nlattr *nest; struct l2tp_tunnel *tunnel = session->tunnel; - struct sock *sk = NULL; - - sk = tunnel->sock; hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) @@ -738,10 +735,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl nla_put_u8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq) || nla_put_u8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq) || nla_put_u8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode) || -#ifdef CONFIG_XFRM - (((sk) && (sk->sk_policy[0] || sk->sk_policy[1])) && + (l2tp_tunnel_uses_xfrm(tunnel) && nla_put_u8(skb, L2TP_ATTR_USING_IPSEC, 1)) || -#endif (session->reorder_timeout && nla_put_msecs(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout, L2TP_ATTR_PAD))) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6e2c8e7595e0..62f2d3f1e431 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -95,7 +95,6 @@ #include <net/netns/generic.h> #include <net/ip.h> #include <net/udp.h> -#include <net/xfrm.h> #include <net/inet_common.h> #include <asm/byteorder.h> @@ -758,7 +757,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = info.peer_tunnel_id; - session = l2tp_session_get(sock_net(sk), tunnel, info.session_id); + session = l2tp_tunnel_get_session(tunnel, info.session_id); if (session) { drop_refcnt = true; @@ -1027,8 +1026,10 @@ end: ****************************************************************************/ static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest, - struct l2tp_stats *stats) + const struct l2tp_stats *stats) { + memset(dest, 0, sizeof(*dest)); + dest->tx_packets = atomic_long_read(&stats->tx_packets); dest->tx_bytes = atomic_long_read(&stats->tx_bytes); dest->tx_errors = atomic_long_read(&stats->tx_errors); @@ -1039,188 +1040,107 @@ static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest, dest->rx_errors = atomic_long_read(&stats->rx_errors); } -/* Session ioctl helper. - */ -static int pppol2tp_session_ioctl(struct l2tp_session *session, - unsigned int cmd, unsigned long arg) +static int pppol2tp_tunnel_copy_stats(struct pppol2tp_ioc_stats *stats, + struct l2tp_tunnel *tunnel) { - int err = 0; - struct sock *sk; - int val = (int) arg; - struct l2tp_tunnel *tunnel = session->tunnel; - struct pppol2tp_ioc_stats stats; + struct l2tp_session *session; - l2tp_dbg(session, L2TP_MSG_CONTROL, - "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", - session->name, cmd, arg); + if (!stats->session_id) { + pppol2tp_copy_stats(stats, &tunnel->stats); + return 0; + } - sk = pppol2tp_session_get_sock(session); - if (!sk) + /* If session_id is set, search the corresponding session in the + * context of this tunnel and record the session's statistics. + */ + session = l2tp_tunnel_get_session(tunnel, stats->session_id); + if (!session) return -EBADR; - switch (cmd) { - case PPPIOCGMRU: - case PPPIOCGFLAGS: - err = -EFAULT; - if (put_user(0, (int __user *)arg)) - break; - err = 0; - break; - - case PPPIOCSMRU: - case PPPIOCSFLAGS: - err = -EFAULT; - if (get_user(val, (int __user *)arg)) - break; - err = 0; - break; - - case PPPIOCGL2TPSTATS: - err = -ENXIO; - if (!(sk->sk_state & PPPOX_CONNECTED)) - break; - - memset(&stats, 0, sizeof(stats)); - stats.tunnel_id = tunnel->tunnel_id; - stats.session_id = session->session_id; - pppol2tp_copy_stats(&stats, &session->stats); - if (copy_to_user((void __user *) arg, &stats, - sizeof(stats))) - break; - l2tp_info(session, L2TP_MSG_CONTROL, "%s: get L2TP stats\n", - session->name); - err = 0; - break; - - default: - err = -ENOSYS; - break; + if (session->pwtype != L2TP_PWTYPE_PPP) { + l2tp_session_dec_refcount(session); + return -EBADR; } - sock_put(sk); + pppol2tp_copy_stats(stats, &session->stats); + l2tp_session_dec_refcount(session); - return err; + return 0; } -/* Tunnel ioctl helper. - * - * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data - * specifies a session_id, the session ioctl handler is called. This allows an - * application to retrieve session stats via a tunnel socket. - */ -static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, - unsigned int cmd, unsigned long arg) +static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) { - int err = 0; - struct sock *sk; struct pppol2tp_ioc_stats stats; - - l2tp_dbg(tunnel, L2TP_MSG_CONTROL, - "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", - tunnel->name, cmd, arg); - - sk = tunnel->sock; - sock_hold(sk); + struct l2tp_session *session; + int val; switch (cmd) { - case PPPIOCGL2TPSTATS: - err = -ENXIO; - if (!(sk->sk_state & PPPOX_CONNECTED)) - break; - - if (copy_from_user(&stats, (void __user *) arg, - sizeof(stats))) { - err = -EFAULT; - break; - } - if (stats.session_id != 0) { - /* resend to session ioctl handler */ - struct l2tp_session *session = - l2tp_session_get(sock_net(sk), tunnel, - stats.session_id); - - if (!session) { - err = -EBADR; - break; - } - if (session->pwtype != L2TP_PWTYPE_PPP) { - l2tp_session_dec_refcount(session); - err = -EBADR; - break; - } + case PPPIOCGMRU: + case PPPIOCGFLAGS: + session = sock->sk->sk_user_data; + if (!session) + return -ENOTCONN; - err = pppol2tp_session_ioctl(session, cmd, arg); - l2tp_session_dec_refcount(session); - break; - } -#ifdef CONFIG_XFRM - stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0; -#endif - pppol2tp_copy_stats(&stats, &tunnel->stats); - if (copy_to_user((void __user *) arg, &stats, sizeof(stats))) { - err = -EFAULT; - break; - } - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: get L2TP stats\n", - tunnel->name); - err = 0; - break; + /* Not defined for tunnels */ + if (!session->session_id && !session->peer_session_id) + return -ENOSYS; - default: - err = -ENOSYS; + if (put_user(0, (int __user *)arg)) + return -EFAULT; break; - } - - sock_put(sk); - - return err; -} -/* Main ioctl() handler. - * Dispatch to tunnel or session helpers depending on the socket. - */ -static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, - unsigned long arg) -{ - struct sock *sk = sock->sk; - struct l2tp_session *session; - struct l2tp_tunnel *tunnel; - int err; + case PPPIOCSMRU: + case PPPIOCSFLAGS: + session = sock->sk->sk_user_data; + if (!session) + return -ENOTCONN; - if (!sk) - return 0; + /* Not defined for tunnels */ + if (!session->session_id && !session->peer_session_id) + return -ENOSYS; - err = -EBADF; - if (sock_flag(sk, SOCK_DEAD) != 0) - goto end; + if (get_user(val, (int __user *)arg)) + return -EFAULT; + break; - err = -ENOTCONN; - if ((sk->sk_user_data == NULL) || - (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)))) - goto end; + case PPPIOCGL2TPSTATS: + session = sock->sk->sk_user_data; + if (!session) + return -ENOTCONN; + + /* Session 0 represents the parent tunnel */ + if (!session->session_id && !session->peer_session_id) { + u32 session_id; + int err; + + if (copy_from_user(&stats, (void __user *)arg, + sizeof(stats))) + return -EFAULT; + + session_id = stats.session_id; + err = pppol2tp_tunnel_copy_stats(&stats, + session->tunnel); + if (err < 0) + return err; + + stats.session_id = session_id; + } else { + pppol2tp_copy_stats(&stats, &session->stats); + stats.session_id = session->session_id; + } + stats.tunnel_id = session->tunnel->tunnel_id; + stats.using_ipsec = l2tp_tunnel_uses_xfrm(session->tunnel); - /* Get session context from the socket */ - err = -EBADF; - session = pppol2tp_sock_to_session(sk); - if (session == NULL) - goto end; + if (copy_to_user((void __user *)arg, &stats, sizeof(stats))) + return -EFAULT; + break; - /* Special case: if session's session_id is zero, treat ioctl as a - * tunnel ioctl - */ - if ((session->session_id == 0) && - (session->peer_session_id == 0)) { - tunnel = session->tunnel; - err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg); - goto end_put_sess; + default: + return -ENOIOCTLCMD; } - err = pppol2tp_session_ioctl(session, cmd, arg); - -end_put_sess: - sock_put(sk); -end: - return err; + return 0; } /***************************************************************************** diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 89041260784c..260b3dc1b4a2 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value) rcu_read_lock_bh(); sap = __llc_sap_find(sap_value); - if (sap) - llc_sap_hold(sap); + if (!sap || !llc_sap_hold_safe(sap)) + sap = NULL; rcu_read_unlock_bh(); return sap; } diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 7e253455f9dd..bcd1a5e6ebf4 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -63,8 +63,21 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) int ret; if (!(local->hw.flags & IEEE802154_HW_TX_OMIT_CKSUM)) { - u16 crc = crc_ccitt(0, skb->data, skb->len); + struct sk_buff *nskb; + u16 crc; + + if (unlikely(skb_tailroom(skb) < IEEE802154_FCS_LEN)) { + nskb = skb_copy_expand(skb, 0, IEEE802154_FCS_LEN, + GFP_ATOMIC); + if (likely(nskb)) { + consume_skb(skb); + skb = nskb; + } else { + goto err_tx; + } + } + crc = crc_ccitt(0, skb->data, skb->len); put_unaligned_le16(crc, skb_put(skb, 2)); } diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 46aee65f339b..91fbd183da2d 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -24,13 +24,30 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_timeout.h> -struct ctnl_timeout * +struct nf_ct_timeout * (*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); -void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; +void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook); +static int untimeout(struct nf_conn *ct, void *timeout) +{ + struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); + + if (timeout_ext && (!timeout || timeout_ext->timeout == timeout)) + RCU_INIT_POINTER(timeout_ext->timeout, NULL); + + /* We are not intended to delete this conntrack. */ + return 0; +} + +void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout) +{ + nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); +} +EXPORT_SYMBOL_GPL(nf_ct_untimeout); + static const struct nf_ct_ext_type timeout_extend = { .len = sizeof(struct nf_conn_timeout), .align = __alignof__(struct nf_conn_timeout), diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 4199e5300575..d46a236cdf31 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -113,13 +113,13 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, /* You cannot replace one timeout policy by another of * different kind, sorry. */ - if (matching->l3num != l3num || - matching->l4proto->l4proto != l4num) + if (matching->timeout.l3num != l3num || + matching->timeout.l4proto->l4proto != l4num) return -EINVAL; - return ctnl_timeout_parse_policy(&matching->data, - matching->l4proto, net, - cda[CTA_TIMEOUT_DATA]); + return ctnl_timeout_parse_policy(&matching->timeout.data, + matching->timeout.l4proto, + net, cda[CTA_TIMEOUT_DATA]); } return -EBUSY; @@ -140,14 +140,14 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, + ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); - timeout->l3num = l3num; - timeout->l4proto = l4proto; + timeout->timeout.l3num = l3num; + timeout->timeout.l4proto = l4proto; refcount_set(&timeout->refcnt, 1); list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); @@ -166,7 +166,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - const struct nf_conntrack_l4proto *l4proto = timeout->l4proto; + const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); @@ -179,8 +179,9 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nfmsg->res_id = 0; if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || - nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) || - nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || + nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, + htons(timeout->timeout.l3num)) || + nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto) || nla_put_be32(skb, CTA_TIMEOUT_USE, htonl(refcount_read(&timeout->refcnt)))) goto nla_put_failure; @@ -194,7 +195,8 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (!nest_parms) goto nla_put_failure; - ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data); + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, + &timeout->timeout.data); if (ret < 0) goto nla_put_failure; @@ -297,22 +299,6 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, return ret; } -static int untimeout(struct nf_conn *ct, void *timeout) -{ - struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); - - if (timeout_ext && (!timeout || timeout_ext->timeout == timeout)) - RCU_INIT_POINTER(timeout_ext->timeout, NULL); - - /* We are not intended to delete this conntrack. */ - return 0; -} - -static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) -{ - nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); -} - /* try to delete object, fail if it is still in use. */ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { @@ -324,8 +310,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) if (refcount_dec_if_one(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); - nf_ct_l4proto_put(timeout->l4proto); - ctnl_untimeout(net, timeout); + nf_ct_l4proto_put(timeout->timeout.l4proto); + nf_ct_untimeout(net, &timeout->timeout); kfree_rcu(timeout, rcu_head); } else { ret = -EBUSY; @@ -526,8 +512,11 @@ err: return matching; } -static void ctnl_timeout_put(struct ctnl_timeout *timeout) +static void ctnl_timeout_put(struct nf_ct_timeout *t) { + struct ctnl_timeout *timeout = + container_of(t, struct ctnl_timeout, timeout); + if (refcount_dec_and_test(&timeout->refcnt)) kfree_rcu(timeout, rcu_head); @@ -573,11 +562,11 @@ static void __net_exit cttimeout_net_exit(struct net *net) struct ctnl_timeout *cur, *tmp; nf_ct_unconfirmed_destroy(net); - ctnl_untimeout(net, NULL); + nf_ct_untimeout(net, NULL); list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { list_del_rcu(&cur->head); - nf_ct_l4proto_put(cur->l4proto); + nf_ct_l4proto_put(cur->timeout.l4proto); if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index f9dba62c450f..00db27dfd2ff 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -271,7 +271,7 @@ const char *nf_osf_find(const struct sk_buff *skb, tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); if (!tcp) - return false; + return NULL; list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { f = &kf->finger; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 3bc82ee5464d..4855d4ce1c8f 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -22,6 +22,8 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> +#include <net/netfilter/nf_conntrack_timeout.h> +#include <net/netfilter/nf_conntrack_l4proto.h> struct nft_ct { enum nft_ct_keys key:8; @@ -765,6 +767,194 @@ static struct nft_expr_type nft_notrack_type __read_mostly = { .owner = THIS_MODULE, }; +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT +static int +nft_ct_timeout_parse_policy(void *timeouts, + const struct nf_conntrack_l4proto *l4proto, + struct net *net, const struct nlattr *attr) +{ + struct nlattr **tb; + int ret = 0; + + if (!l4proto->ctnl_timeout.nlattr_to_obj) + return 0; + + tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), + GFP_KERNEL); + + if (!tb) + return -ENOMEM; + + ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, + attr, l4proto->ctnl_timeout.nla_policy, + NULL); + if (ret < 0) + goto err; + + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); + +err: + kfree(tb); + return ret; +} + +struct nft_ct_timeout_obj { + struct nf_conn *tmpl; + u8 l4proto; +}; + +static void nft_ct_timeout_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_ct_timeout_obj *priv = nft_obj_data(obj); + struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb); + struct sk_buff *skb = pkt->skb; + + if (ct || + priv->l4proto != pkt->tprot) + return; + + nf_ct_set(skb, priv->tmpl, IP_CT_NEW); +} + +static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; + struct nft_ct_timeout_obj *priv = nft_obj_data(obj); + const struct nf_conntrack_l4proto *l4proto; + struct nf_conn_timeout *timeout_ext; + struct nf_ct_timeout *timeout; + int l3num = ctx->family; + struct nf_conn *tmpl; + __u8 l4num; + int ret; + + if (!tb[NFTA_CT_TIMEOUT_L3PROTO] || + !tb[NFTA_CT_TIMEOUT_L4PROTO] || + !tb[NFTA_CT_TIMEOUT_DATA]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]); + priv->l4proto = l4num; + + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; + goto err_proto_put; + } + + timeout = kzalloc(sizeof(struct nf_ct_timeout) + + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); + if (timeout == NULL) { + ret = -ENOMEM; + goto err_proto_put; + } + + ret = nft_ct_timeout_parse_policy(&timeout->data, l4proto, ctx->net, + tb[NFTA_CT_TIMEOUT_DATA]); + if (ret < 0) + goto err_free_timeout; + + timeout->l3num = l3num; + timeout->l4proto = l4proto; + tmpl = nf_ct_tmpl_alloc(ctx->net, zone, GFP_ATOMIC); + if (!tmpl) { + ret = -ENOMEM; + goto err_free_timeout; + } + + timeout_ext = nf_ct_timeout_ext_add(tmpl, timeout, GFP_ATOMIC); + if (!timeout_ext) { + ret = -ENOMEM; + goto err_free_tmpl; + } + + ret = nf_ct_netns_get(ctx->net, ctx->family); + if (ret < 0) + goto err_free_tmpl; + + priv->tmpl = tmpl; + + return 0; + +err_free_tmpl: + nf_ct_tmpl_free(tmpl); +err_free_timeout: + kfree(timeout); +err_proto_put: + nf_ct_l4proto_put(l4proto); + return ret; +} + +static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + struct nft_ct_timeout_obj *priv = nft_obj_data(obj); + struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl); + struct nf_ct_timeout *timeout; + + timeout = rcu_dereference_raw(t->timeout); + nf_ct_untimeout(ctx->net, timeout); + nf_ct_l4proto_put(timeout->l4proto); + nf_ct_netns_put(ctx->net, ctx->family); + nf_ct_tmpl_free(priv->tmpl); +} + +static int nft_ct_timeout_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + const struct nft_ct_timeout_obj *priv = nft_obj_data(obj); + const struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl); + const struct nf_ct_timeout *timeout = rcu_dereference_raw(t->timeout); + struct nlattr *nest_params; + int ret; + + if (nla_put_u8(skb, NFTA_CT_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || + nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num))) + return -1; + + nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED); + if (!nest_params) + return -1; + + ret = timeout->l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data); + if (ret < 0) + return -1; + nla_nest_end(skb, nest_params); + return 0; +} + +static const struct nla_policy nft_ct_timeout_policy[NFTA_CT_TIMEOUT_MAX + 1] = { + [NFTA_CT_TIMEOUT_L3PROTO] = {.type = NLA_U16 }, + [NFTA_CT_TIMEOUT_L4PROTO] = {.type = NLA_U8 }, + [NFTA_CT_TIMEOUT_DATA] = {.type = NLA_NESTED }, +}; + +static struct nft_object_type nft_ct_timeout_obj_type; + +static const struct nft_object_ops nft_ct_timeout_obj_ops = { + .type = &nft_ct_timeout_obj_type, + .size = sizeof(struct nft_ct_timeout_obj), + .eval = nft_ct_timeout_obj_eval, + .init = nft_ct_timeout_obj_init, + .destroy = nft_ct_timeout_obj_destroy, + .dump = nft_ct_timeout_obj_dump, +}; + +static struct nft_object_type nft_ct_timeout_obj_type __read_mostly = { + .type = NFT_OBJECT_CT_TIMEOUT, + .ops = &nft_ct_timeout_obj_ops, + .maxattr = NFTA_CT_TIMEOUT_MAX, + .policy = nft_ct_timeout_policy, + .owner = THIS_MODULE, +}; +#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ + static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, const struct nlattr * const tb[], struct nft_object *obj) @@ -773,6 +963,7 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, struct nf_conntrack_helper *help4, *help6; char name[NF_CT_HELPER_NAME_LEN]; int family = ctx->family; + int err; if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) return -EINVAL; @@ -823,7 +1014,18 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, priv->helper4 = help4; priv->helper6 = help6; + err = nf_ct_netns_get(ctx->net, ctx->family); + if (err < 0) + goto err_put_helper; + return 0; + +err_put_helper: + if (priv->helper4) + nf_conntrack_helper_put(priv->helper4); + if (priv->helper6) + nf_conntrack_helper_put(priv->helper6); + return err; } static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx, @@ -835,6 +1037,8 @@ static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx, nf_conntrack_helper_put(priv->helper4); if (priv->helper6) nf_conntrack_helper_put(priv->helper6); + + nf_ct_netns_put(ctx->net, ctx->family); } static void nft_ct_helper_obj_eval(struct nft_object *obj, @@ -949,9 +1153,17 @@ static int __init nft_ct_module_init(void) err = nft_register_obj(&nft_ct_helper_obj_type); if (err < 0) goto err2; - +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + err = nft_register_obj(&nft_ct_timeout_obj_type); + if (err < 0) + goto err3; +#endif return 0; +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT +err3: + nft_unregister_obj(&nft_ct_helper_obj_type); +#endif err2: nft_unregister_expr(&nft_notrack_type); err1: @@ -961,6 +1173,9 @@ err1: static void __exit nft_ct_module_exit(void) { +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + nft_unregister_obj(&nft_ct_timeout_obj_type); +#endif nft_unregister_obj(&nft_ct_helper_obj_type); nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); @@ -974,3 +1189,4 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("ct"); MODULE_ALIAS_NFT_EXPR("notrack"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 9b2f3de7be4f..5af74b37f423 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -4,8 +4,6 @@ #include <net/netfilter/nf_tables.h> #include <linux/netfilter/nfnetlink_osf.h> -#define OSF_GENRE_SIZE 32 - struct nft_osf { enum nft_registers dreg:8; }; @@ -37,9 +35,9 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, os_name = nf_osf_find(skb, nf_osf_fingers); if (!os_name) - strncpy((char *)dest, "unknown", IFNAMSIZ); + strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN); else - strncpy((char *)dest, os_name, IFNAMSIZ); + strncpy((char *)dest, os_name, NFT_OSF_MAXGENRELEN); } static int nft_osf_init(const struct nft_ctx *ctx, @@ -51,7 +49,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFTA_DATA_VALUE, OSF_GENRE_SIZE); + NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN); if (err < 0) return err; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 7ba454e9e3fa..89457efd2e00 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -104,7 +104,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout) +static void __xt_ct_tg_timeout_put(struct nf_ct_timeout *timeout) { typeof(nf_ct_timeout_put_hook) timeout_put; @@ -121,7 +121,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT typeof(nf_ct_timeout_find_get_hook) timeout_find_get; const struct nf_conntrack_l4proto *l4proto; - struct ctnl_timeout *timeout; + struct nf_ct_timeout *timeout; struct nf_conn_timeout *timeout_ext; const char *errmsg = NULL; int ret = 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 345e38058ae5..5610061e7f2e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4137,52 +4137,36 @@ static const struct vm_operations_struct packet_mmap_ops = { .close = packet_mm_close, }; -static void free_pg_vec(struct pgv *pg_vec, unsigned int order, - unsigned int len) +static void free_pg_vec(struct pgv *pg_vec, unsigned int len) { int i; for (i = 0; i < len; i++) { if (likely(pg_vec[i].buffer)) { - if (is_vmalloc_addr(pg_vec[i].buffer)) - vfree(pg_vec[i].buffer); - else - free_pages((unsigned long)pg_vec[i].buffer, - order); + kvfree(pg_vec[i].buffer); pg_vec[i].buffer = NULL; } } kfree(pg_vec); } -static char *alloc_one_pg_vec_page(unsigned long order) +static char *alloc_one_pg_vec_page(unsigned long size) { char *buffer; - gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | - __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; - - buffer = (char *) __get_free_pages(gfp_flags, order); - if (buffer) - return buffer; - /* __get_free_pages failed, fall back to vmalloc */ - buffer = vzalloc(array_size((1 << order), PAGE_SIZE)); + buffer = kvzalloc(size, GFP_KERNEL); if (buffer) return buffer; - /* vmalloc failed, lets dig into swap here */ - gfp_flags &= ~__GFP_NORETRY; - buffer = (char *) __get_free_pages(gfp_flags, order); - if (buffer) - return buffer; + buffer = kvzalloc(size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); - /* complete and utter failure */ - return NULL; + return buffer; } -static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) +static struct pgv *alloc_pg_vec(struct tpacket_req *req) { unsigned int block_nr = req->tp_block_nr; + unsigned long size = req->tp_block_size; struct pgv *pg_vec; int i; @@ -4191,7 +4175,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) goto out; for (i = 0; i < block_nr; i++) { - pg_vec[i].buffer = alloc_one_pg_vec_page(order); + pg_vec[i].buffer = alloc_one_pg_vec_page(size); if (unlikely(!pg_vec[i].buffer)) goto out_free_pgvec; } @@ -4200,7 +4184,7 @@ out: return pg_vec; out_free_pgvec: - free_pg_vec(pg_vec, order, block_nr); + free_pg_vec(pg_vec, block_nr); pg_vec = NULL; goto out; } @@ -4210,9 +4194,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); - int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; + int was_running; __be16 num; int err = -EINVAL; /* Added to avoid minimal code churn */ @@ -4230,6 +4214,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } if (req->tp_block_nr) { + unsigned int min_frame_size; + /* Sanity tests and some calculations */ err = -EBUSY; if (unlikely(rb->pg_vec)) @@ -4252,12 +4238,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; + min_frame_size = po->tp_hdrlen + po->tp_reserve; if (po->tp_version >= TPACKET_V3 && - req->tp_block_size <= - BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr)) + req->tp_block_size < + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) goto out; - if (unlikely(req->tp_frame_size < po->tp_hdrlen + - po->tp_reserve)) + if (unlikely(req->tp_frame_size < min_frame_size)) goto out; if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) goto out; @@ -4272,8 +4258,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; err = -ENOMEM; - order = get_order(req->tp_block_size); - pg_vec = alloc_pg_vec(req, order); + pg_vec = alloc_pg_vec(req); if (unlikely(!pg_vec)) goto out; switch (po->tp_version) { @@ -4327,7 +4312,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frame_size = req->tp_frame_size; spin_unlock_bh(&rb_queue->lock); - swap(rb->pg_vec_order, order); swap(rb->pg_vec_len, req->tp_block_nr); rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; @@ -4353,7 +4337,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } if (pg_vec) - free_pg_vec(pg_vec, order, req->tp_block_nr); + free_pg_vec(pg_vec, req->tp_block_nr); out: return err; } diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bb7c5fb3bff..8f50036f62f0 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -64,7 +64,6 @@ struct packet_ring_buffer { unsigned int frame_size; unsigned int frame_max; - unsigned int pg_vec_order; unsigned int pg_vec_pages; unsigned int pg_vec_len; diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index d152e48ea371..8596eed6d9a8 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -61,6 +61,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, pool->fmr_attr.max_pages); if (IS_ERR(frmr->mr)) { pr_warn("RDS/IB: %s failed to allocate MR", __func__); + err = PTR_ERR(frmr->mr); goto out_no_cigar; } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9d9278a13d91..c97558710421 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -104,9 +104,9 @@ struct rxrpc_net { #define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */ u8 peer_keepalive_cursor; - ktime_t peer_keepalive_base; - struct hlist_head peer_keepalive[RXRPC_KEEPALIVE_TIME + 1]; - struct hlist_head peer_keepalive_new; + time64_t peer_keepalive_base; + struct list_head peer_keepalive[32]; + struct list_head peer_keepalive_new; struct timer_list peer_keepalive_timer; struct work_struct peer_keepalive_work; }; @@ -295,7 +295,7 @@ struct rxrpc_peer { struct hlist_head error_targets; /* targets for net error distribution */ struct work_struct error_distributor; struct rb_root service_conns; /* Service connections */ - struct hlist_node keepalive_link; /* Link in net->peer_keepalive[] */ + struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ time64_t last_tx_at; /* Last time packet sent here */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 84d40ba9856f..6df56ce68861 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -138,7 +138,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret, rxrpc_tx_point_call_final_resend); @@ -252,7 +252,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 5d6a773db973..417d80867c4f 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -85,12 +85,12 @@ static __net_init int rxrpc_init_net(struct net *net) hash_init(rxnet->peer_hash); spin_lock_init(&rxnet->peer_hash_lock); for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++) - INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]); - INIT_HLIST_HEAD(&rxnet->peer_keepalive_new); + INIT_LIST_HEAD(&rxnet->peer_keepalive[i]); + INIT_LIST_HEAD(&rxnet->peer_keepalive_new); timer_setup(&rxnet->peer_keepalive_timer, rxrpc_peer_keepalive_timeout, 0); INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker); - rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC); + rxnet->peer_keepalive_base = ktime_get_seconds(); ret = -ENOMEM; rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 801dbf3d3478..ccf5de160444 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -209,7 +209,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, now = ktime_get_real(); if (ping) call->ping_time = now; - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_ack); @@ -299,7 +299,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, sizeof(pkt)); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_abort); @@ -397,7 +397,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, * message and update the peer record */ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); up_read(&conn->params.local->defrag_sem); if (ret < 0) @@ -466,7 +466,7 @@ send_fragmentable: if (ret == 0) { ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); opt = IP_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, SOL_IP, @@ -484,7 +484,7 @@ send_fragmentable: if (ret == 0) { ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); opt = IPV6_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, @@ -617,6 +617,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) trace_rxrpc_tx_packet(peer->debug_id, &whdr, rxrpc_tx_point_version_keepalive); - peer->last_tx_at = ktime_get_real(); + peer->last_tx_at = ktime_get_seconds(); _leave(""); } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 0ed8b651cec2..4f9da2f51c69 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -350,97 +350,117 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, } /* - * Perform keep-alive pings with VERSION packets to keep any NAT alive. + * Perform keep-alive pings. */ -void rxrpc_peer_keepalive_worker(struct work_struct *work) +static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, + struct list_head *collector, + time64_t base, + u8 cursor) { - struct rxrpc_net *rxnet = - container_of(work, struct rxrpc_net, peer_keepalive_work); struct rxrpc_peer *peer; - unsigned long delay; - ktime_t base, now = ktime_get_real(); - s64 diff; - u8 cursor, slot; + const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; + time64_t keepalive_at; + int slot; - base = rxnet->peer_keepalive_base; - cursor = rxnet->peer_keepalive_cursor; + spin_lock_bh(&rxnet->peer_hash_lock); - _enter("%u,%lld", cursor, ktime_sub(now, base)); + while (!list_empty(collector)) { + peer = list_entry(collector->next, + struct rxrpc_peer, keepalive_link); -next_bucket: - diff = ktime_to_ns(ktime_sub(now, base)); - if (diff < 0) - goto resched; + list_del_init(&peer->keepalive_link); + if (!rxrpc_get_peer_maybe(peer)) + continue; - _debug("at %u", cursor); - spin_lock_bh(&rxnet->peer_hash_lock); -next_peer: - if (!rxnet->live) { spin_unlock_bh(&rxnet->peer_hash_lock); - goto out; - } - /* Everything in the bucket at the cursor is processed this second; the - * bucket at cursor + 1 goes now + 1s and so on... - */ - if (hlist_empty(&rxnet->peer_keepalive[cursor])) { - if (hlist_empty(&rxnet->peer_keepalive_new)) { - spin_unlock_bh(&rxnet->peer_hash_lock); - goto emptied_bucket; + keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + slot = keepalive_at - base; + _debug("%02x peer %u t=%d {%pISp}", + cursor, peer->debug_id, slot, &peer->srx.transport); + + if (keepalive_at <= base || + keepalive_at > base + RXRPC_KEEPALIVE_TIME) { + rxrpc_send_keepalive(peer); + slot = RXRPC_KEEPALIVE_TIME; } - hlist_move_list(&rxnet->peer_keepalive_new, - &rxnet->peer_keepalive[cursor]); + /* A transmission to this peer occurred since last we examined + * it so put it into the appropriate future bucket. + */ + slot += cursor; + slot &= mask; + spin_lock_bh(&rxnet->peer_hash_lock); + list_add_tail(&peer->keepalive_link, + &rxnet->peer_keepalive[slot & mask]); + rxrpc_put_peer(peer); } - peer = hlist_entry(rxnet->peer_keepalive[cursor].first, - struct rxrpc_peer, keepalive_link); - hlist_del_init(&peer->keepalive_link); - if (!rxrpc_get_peer_maybe(peer)) - goto next_peer; - spin_unlock_bh(&rxnet->peer_hash_lock); +} - _debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport); +/* + * Perform keep-alive pings with VERSION packets to keep any NAT alive. + */ +void rxrpc_peer_keepalive_worker(struct work_struct *work) +{ + struct rxrpc_net *rxnet = + container_of(work, struct rxrpc_net, peer_keepalive_work); + const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; + time64_t base, now, delay; + u8 cursor, stop; + LIST_HEAD(collector); -recalc: - diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC); - if (diff < -30 || diff > 30) - goto send; /* LSW of 64-bit time probably wrapped on 32-bit */ - diff += RXRPC_KEEPALIVE_TIME - 1; - if (diff < 0) - goto send; + now = ktime_get_seconds(); + base = rxnet->peer_keepalive_base; + cursor = rxnet->peer_keepalive_cursor; + _enter("%lld,%u", base - now, cursor); - slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff; - if (slot == 0) - goto send; + if (!rxnet->live) + return; - /* A transmission to this peer occurred since last we examined it so - * put it into the appropriate future bucket. + /* Remove to a temporary list all the peers that are currently lodged + * in expired buckets plus all new peers. + * + * Everything in the bucket at the cursor is processed this + * second; the bucket at cursor + 1 goes at now + 1s and so + * on... */ - slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive); spin_lock_bh(&rxnet->peer_hash_lock); - hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]); - rxrpc_put_peer(peer); - goto next_peer; - -send: - rxrpc_send_keepalive(peer); - now = ktime_get_real(); - goto recalc; + list_splice_init(&rxnet->peer_keepalive_new, &collector); + + stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); + while (base <= now && (s8)(cursor - stop) < 0) { + list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask], + &collector); + base++; + cursor++; + } -emptied_bucket: - cursor++; - if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive)) - cursor = 0; - base = ktime_add_ns(base, NSEC_PER_SEC); - goto next_bucket; + base = now; + spin_unlock_bh(&rxnet->peer_hash_lock); -resched: rxnet->peer_keepalive_base = base; rxnet->peer_keepalive_cursor = cursor; - delay = nsecs_to_jiffies(-diff) + 1; - timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); -out: + rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor); + ASSERT(list_empty(&collector)); + + /* Schedule the timer for the next occupied timeslot. */ + cursor = rxnet->peer_keepalive_cursor; + stop = cursor + RXRPC_KEEPALIVE_TIME - 1; + for (; (s8)(cursor - stop) < 0; cursor++) { + if (!list_empty(&rxnet->peer_keepalive[cursor & mask])) + break; + base++; + } + + now = ktime_get_seconds(); + delay = base - now; + if (delay < 1) + delay = 1; + delay *= HZ; + if (rxnet->live) + timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); + _leave(""); } diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 1b7e8107b3ae..24ec7cdcf332 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -322,7 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, if (!peer) { peer = prealloc; hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); - hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new); + list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); } spin_unlock(&rxnet->peer_hash_lock); @@ -367,8 +367,8 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, if (!peer) { hash_add_rcu(rxnet->peer_hash, &candidate->hash_link, hash_key); - hlist_add_head(&candidate->keepalive_link, - &rxnet->peer_keepalive_new); + list_add_tail(&candidate->keepalive_link, + &rxnet->peer_keepalive_new); } spin_unlock_bh(&rxnet->peer_hash_lock); @@ -441,7 +441,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) spin_lock_bh(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); - hlist_del_init(&peer->keepalive_link); + list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); kfree_rcu(peer, rcu); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index eaf8f4f446b0..cea16838d588 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -670,7 +670,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_real(); + conn->params.peer->last_tx_at = ktime_get_seconds(); trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_rxkad_challenge); _leave(" = 0"); @@ -728,8 +728,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn, return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_real(); - trace_rxrpc_tx_packet(0, &whdr, rxrpc_tx_point_rxkad_response); + conn->params.peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; } diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 4a7af7aff37d..d75bd15151e6 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -15,7 +15,6 @@ #include "ar-internal.h" static struct ctl_table_header *rxrpc_sysctl_reg_table; -static const unsigned int zero = 0; static const unsigned int one = 1; static const unsigned int four = 4; static const unsigned int thirtytwo = 32; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 6203eb075c9a..9b30e62805c7 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -34,8 +34,8 @@ struct tcf_bpf_cfg { static unsigned int bpf_net_id; static struct tc_action_ops act_bpf_ops; -static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, - struct tcf_result *res) +static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, + struct tcf_result *res) { bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); @@ -143,11 +143,12 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, .index = prog->tcf_index, .refcnt = refcount_read(&prog->tcf_refcnt) - ref, .bindcnt = atomic_read(&prog->tcf_bindcnt) - bind, - .action = prog->tcf_action, }; struct tcf_t tm; int ret; + spin_lock(&prog->tcf_lock); + opt.action = prog->tcf_action; if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -163,9 +164,11 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, TCA_ACT_BPF_PAD)) goto nla_put_failure; + spin_unlock(&prog->tcf_lock); return skb->len; nla_put_failure: + spin_unlock(&prog->tcf_lock); nlmsg_trim(skb, tp); return -1; } @@ -264,7 +267,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, { cfg->is_ebpf = tcf_bpf_is_ebpf(prog); /* updates to prog->filter are prevented, since it's called either - * with rtnl lock or during final cleanup in rcu callback + * with tcf lock or during final cleanup in rcu callback */ cfg->filter = rcu_dereference_protected(prog->filter, 1); @@ -336,8 +339,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, goto out; prog = to_bpf(*act); - ASSERT_RTNL(); + spin_lock(&prog->tcf_lock); if (res != ACT_P_CREATED) tcf_bpf_prog_fill_cfg(prog, &old); @@ -349,6 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog->tcf_action = parm->action; rcu_assign_pointer(prog->filter, cfg.filter); + spin_unlock(&prog->tcf_lock); if (res == ACT_P_CREATED) { tcf_idr_insert(tn, *act); @@ -402,7 +406,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .type = TCA_ACT_BPF, .owner = THIS_MODULE, - .act = tcf_bpf, + .act = tcf_bpf_act, .dump = tcf_bpf_dump, .cleanup = tcf_bpf_cleanup, .init = tcf_bpf_init, diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2f9bc833d046..54c0bf54f2ac 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -31,8 +31,8 @@ static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; -static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash; struct nf_conntrack_tuple tuple; @@ -209,7 +209,7 @@ static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .type = TCA_ACT_CONNMARK, .owner = THIS_MODULE, - .act = tcf_connmark, + .act = tcf_connmark_act, .dump = tcf_connmark_dump, .init = tcf_connmark_init, .walk = tcf_connmark_walker, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 648a3a35b720..5596fae4e478 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -50,7 +50,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); - struct tcf_csum_params *params_old, *params_new; + struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tc_csum *parm; struct tcf_csum *p; @@ -88,20 +88,22 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } p = to_tcf_csum(*a); - ASSERT_RTNL(); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { tcf_idr_release(*a, bind); return -ENOMEM; } - params_old = rtnl_dereference(p->params); + params_new->update_flags = parm->update_flags; + spin_lock(&p->tcf_lock); p->tcf_action = parm->action; - params_new->update_flags = parm->update_flags; - rcu_assign_pointer(p->params, params_new); - if (params_old) - kfree_rcu(params_old, rcu); + rcu_swap_protected(p->params, params_new, + lockdep_is_held(&p->tcf_lock)); + spin_unlock(&p->tcf_lock); + + if (params_new) + kfree_rcu(params_new, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -553,8 +555,8 @@ fail: return 0; } -static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); struct tcf_csum_params *params; @@ -599,11 +601,13 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, .index = p->tcf_index, .refcnt = refcount_read(&p->tcf_refcnt) - ref, .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, - .action = p->tcf_action, }; struct tcf_t t; - params = rtnl_dereference(p->params); + spin_lock(&p->tcf_lock); + params = rcu_dereference_protected(p->params, + lockdep_is_held(&p->tcf_lock)); + opt.action = p->tcf_action; opt.update_flags = params->update_flags; if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) @@ -612,10 +616,12 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD)) goto nla_put_failure; + spin_unlock(&p->tcf_lock); return skb->len; nla_put_failure: + spin_unlock(&p->tcf_lock); nlmsg_trim(skb, b); return -1; } @@ -664,7 +670,7 @@ static struct tc_action_ops act_csum_ops = { .kind = "csum", .type = TCA_ACT_CSUM, .owner = THIS_MODULE, - .act = tcf_csum, + .act = tcf_csum_act, .dump = tcf_csum_dump, .init = tcf_csum_init, .cleanup = tcf_csum_cleanup, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 661b72b9147d..52a3e474d822 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -113,7 +113,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, gact = to_gact(*a); - ASSERT_RTNL(); + spin_lock(&gact->tcf_lock); gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB if (p_parm) { @@ -126,13 +126,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, gact->tcfg_ptype = p_parm->ptype; } #endif + spin_unlock(&gact->tcf_lock); + if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; } -static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_gact *gact = to_gact(a); int action = READ_ONCE(gact->tcf_action); @@ -178,10 +180,11 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, .index = gact->tcf_index, .refcnt = refcount_read(&gact->tcf_refcnt) - ref, .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind, - .action = gact->tcf_action, }; struct tcf_t t; + spin_lock(&gact->tcf_lock); + opt.action = gact->tcf_action; if (nla_put(skb, TCA_GACT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; #ifdef CONFIG_GACT_PROB @@ -199,9 +202,12 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &gact->tcf_tm); if (nla_put_64bit(skb, TCA_GACT_TM, sizeof(t), &t, TCA_GACT_PAD)) goto nla_put_failure; + spin_unlock(&gact->tcf_lock); + return skb->len; nla_put_failure: + spin_unlock(&gact->tcf_lock); nlmsg_trim(skb, b); return -1; } @@ -248,7 +254,7 @@ static struct tc_action_ops act_gact_ops = { .kind = "gact", .type = TCA_ACT_GACT, .owner = THIS_MODULE, - .act = tcf_gact, + .act = tcf_gact_act, .stats_update = tcf_gact_stats_update, .dump = tcf_gact_dump, .init = tcf_gact_init, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index df4060e32d43..5d200495e467 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -268,7 +268,8 @@ static const char *ife_meta_id2name(u32 metaid) * under ife->tcf_lock for existing action */ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len, bool exists) + void *val, int len, bool exists, + bool rtnl_held) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -278,9 +279,11 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, #ifdef CONFIG_MODULES if (exists) spin_unlock_bh(&ife->tcf_lock); - rtnl_unlock(); + if (rtnl_held) + rtnl_unlock(); request_module("ife-meta-%s", ife_meta_id2name(metaid)); - rtnl_lock(); + if (rtnl_held) + rtnl_lock(); if (exists) spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); @@ -421,7 +424,7 @@ static void tcf_ife_cleanup(struct tc_action *a) /* under ife->tcf_lock for existing action */ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, - bool exists) + bool exists, bool rtnl_held) { int len = 0; int rc = 0; @@ -433,7 +436,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len, exists); + rc = load_metaops_and_vet(ife, i, val, len, exists, + rtnl_held); if (rc != 0) return rc; @@ -454,7 +458,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; - struct tcf_ife_params *p, *p_old; + struct tcf_ife_params *p; struct tcf_ife_info *ife; u16 ife_type = ETH_P_IFE; struct tc_ife *parm; @@ -558,7 +562,7 @@ metadata_parse_err: return err; } - err = populate_metalist(ife, tb2, exists); + err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; @@ -581,13 +585,13 @@ metadata_parse_err: } ife->tcf_action = parm->action; + /* protected by tcf_lock when modifying existing action */ + rcu_swap_protected(ife->params, p, 1); + if (exists) spin_unlock_bh(&ife->tcf_lock); - - p_old = rtnl_dereference(ife->params); - rcu_assign_pointer(ife->params, p); - if (p_old) - kfree_rcu(p_old, rcu); + if (p) + kfree_rcu(p, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -600,16 +604,20 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); - struct tcf_ife_params *p = rtnl_dereference(ife->params); + struct tcf_ife_params *p; struct tc_ife opt = { .index = ife->tcf_index, .refcnt = refcount_read(&ife->tcf_refcnt) - ref, .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, - .action = ife->tcf_action, - .flags = p->flags, }; struct tcf_t t; + spin_lock_bh(&ife->tcf_lock); + opt.action = ife->tcf_action; + p = rcu_dereference_protected(ife->params, + lockdep_is_held(&ife->tcf_lock)); + opt.flags = p->flags; + if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -635,9 +643,11 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, pr_info("Failed to dump metalist\n"); } + spin_unlock_bh(&ife->tcf_lock); return skb->len; nla_put_failure: + spin_unlock_bh(&ife->tcf_lock); nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 0dc787a57798..51f235bbeb5b 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -222,8 +222,8 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla, bind); } -static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); @@ -288,6 +288,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, * for foolproof you need to not assume this */ + spin_lock_bh(&ipt->tcf_lock); t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); if (unlikely(!t)) goto nla_put_failure; @@ -307,10 +308,12 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) goto nla_put_failure; + spin_unlock_bh(&ipt->tcf_lock); kfree(t); return skb->len; nla_put_failure: + spin_unlock_bh(&ipt->tcf_lock); nlmsg_trim(skb, b); kfree(t); return -1; @@ -345,7 +348,7 @@ static struct tc_action_ops act_ipt_ops = { .kind = "ipt", .type = TCA_ACT_IPT, .owner = THIS_MODULE, - .act = tcf_ipt, + .act = tcf_ipt_act, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_release, .init = tcf_ipt_init, @@ -403,7 +406,7 @@ static struct tc_action_ops act_xt_ops = { .kind = "xt", .type = TCA_ACT_XT, .owner = THIS_MODULE, - .act = tcf_ipt, + .act = tcf_ipt_act, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_release, .init = tcf_xt_init, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index b26d060da08e..8ec216001077 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -30,6 +30,7 @@ #include <net/tc_act/tc_mirred.h> static LIST_HEAD(mirred_list); +static DEFINE_SPINLOCK(mirred_list_lock); static bool tcf_mirred_is_act_redirect(int action) { @@ -62,13 +63,23 @@ static bool tcf_mirred_can_reinsert(int action) return false; } +static struct net_device *tcf_mirred_dev_dereference(struct tcf_mirred *m) +{ + return rcu_dereference_protected(m->tcfm_dev, + lockdep_is_held(&m->tcf_lock)); +} + static void tcf_mirred_release(struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; + spin_lock(&mirred_list_lock); list_del(&m->tcfm_list); - dev = rtnl_dereference(m->tcfm_dev); + spin_unlock(&mirred_list_lock); + + /* last reference to action, no need to lock */ + dev = rcu_dereference_protected(m->tcfm_dev, 1); if (dev) dev_put(dev); } @@ -128,22 +139,9 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option"); return -EINVAL; } - if (parm->ifindex) { - dev = __dev_get_by_index(net, parm->ifindex); - if (dev == NULL) { - if (exists) - tcf_idr_release(*a, bind); - else - tcf_idr_cleanup(tn, parm->index); - return -ENODEV; - } - mac_header_xmit = dev_is_mac_header_xmit(dev); - } else { - dev = NULL; - } if (!exists) { - if (!dev) { + if (!parm->ifindex) { tcf_idr_cleanup(tn, parm->index); NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); return -EINVAL; @@ -161,27 +159,39 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } m = to_mirred(*a); - ASSERT_RTNL(); + spin_lock(&m->tcf_lock); m->tcf_action = parm->action; m->tcfm_eaction = parm->eaction; - if (dev != NULL) { - if (ret != ACT_P_CREATED) - dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); - dev_hold(dev); - rcu_assign_pointer(m->tcfm_dev, dev); + + if (parm->ifindex) { + dev = dev_get_by_index(net, parm->ifindex); + if (!dev) { + spin_unlock(&m->tcf_lock); + tcf_idr_release(*a, bind); + return -ENODEV; + } + mac_header_xmit = dev_is_mac_header_xmit(dev); + rcu_swap_protected(m->tcfm_dev, dev, + lockdep_is_held(&m->tcf_lock)); + if (dev) + dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } + spin_unlock(&m->tcf_lock); if (ret == ACT_P_CREATED) { + spin_lock(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); + spin_unlock(&mirred_list_lock); + tcf_idr_insert(tn, *a); } return ret; } -static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); struct sk_buff *skb2 = skb; @@ -287,26 +297,33 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); - struct net_device *dev = rtnl_dereference(m->tcfm_dev); struct tc_mirred opt = { .index = m->tcf_index, - .action = m->tcf_action, .refcnt = refcount_read(&m->tcf_refcnt) - ref, .bindcnt = atomic_read(&m->tcf_bindcnt) - bind, - .eaction = m->tcfm_eaction, - .ifindex = dev ? dev->ifindex : 0, }; + struct net_device *dev; struct tcf_t t; + spin_lock(&m->tcf_lock); + opt.action = m->tcf_action; + opt.eaction = m->tcfm_eaction; + dev = tcf_mirred_dev_dereference(m); + if (dev) + opt.ifindex = dev->ifindex; + if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; + spin_unlock(&m->tcf_lock); + return skb->len; nla_put_failure: + spin_unlock(&m->tcf_lock); nlmsg_trim(skb, b); return -1; } @@ -337,15 +354,19 @@ static int mirred_device_event(struct notifier_block *unused, ASSERT_RTNL(); if (event == NETDEV_UNREGISTER) { + spin_lock(&mirred_list_lock); list_for_each_entry(m, &mirred_list, tcfm_list) { - if (rcu_access_pointer(m->tcfm_dev) == dev) { + spin_lock(&m->tcf_lock); + if (tcf_mirred_dev_dereference(m) == dev) { dev_put(dev); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); } + spin_unlock(&m->tcf_lock); } + spin_unlock(&mirred_list_lock); } return NOTIFY_DONE; @@ -358,8 +379,20 @@ static struct notifier_block mirred_device_notifier = { static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) { struct tcf_mirred *m = to_mirred(a); + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(m->tcfm_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); - return rtnl_dereference(m->tcfm_dev); + return dev; +} + +static void tcf_mirred_put_dev(struct net_device *dev) +{ + dev_put(dev); } static int tcf_mirred_delete(struct net *net, u32 index) @@ -373,7 +406,7 @@ static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .type = TCA_ACT_MIRRED, .owner = THIS_MODULE, - .act = tcf_mirred, + .act = tcf_mirred_act, .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, @@ -382,6 +415,7 @@ static struct tc_action_ops act_mirred_ops = { .lookup = tcf_mirred_search, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, + .put_dev = tcf_mirred_put_dev, .delete = tcf_mirred_delete, }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 4dd9188a72fd..822e903bfc25 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -93,8 +93,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, return ret; } -static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_nat *p = to_tcf_nat(a); struct iphdr *iph; @@ -311,7 +311,7 @@ static struct tc_action_ops act_nat_ops = { .kind = "nat", .type = TCA_ACT_NAT, .owner = THIS_MODULE, - .act = tcf_nat, + .act = tcf_nat_act, .dump = tcf_nat_dump, .init = tcf_nat_init, .walk = tcf_nat_walker, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 43ba999b2d23..8a7a7cb94e83 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -187,44 +187,38 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, tcf_idr_cleanup(tn, parm->index); goto out_free; } - p = to_pedit(*a); - keys = kmalloc(ksize, GFP_KERNEL); - if (!keys) { - tcf_idr_release(*a, bind); - ret = -ENOMEM; - goto out_free; - } ret = ACT_P_CREATED; } else if (err > 0) { if (bind) goto out_free; if (!ovr) { - tcf_idr_release(*a, bind); ret = -EEXIST; - goto out_free; - } - p = to_pedit(*a); - if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { - keys = kmalloc(ksize, GFP_KERNEL); - if (!keys) { - ret = -ENOMEM; - goto out_free; - } + goto out_release; } } else { return err; } + p = to_pedit(*a); spin_lock_bh(&p->tcf_lock); - p->tcfp_flags = parm->flags; - p->tcf_action = parm->action; - if (keys) { + + if (ret == ACT_P_CREATED || + (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) { + keys = kmalloc(ksize, GFP_ATOMIC); + if (!keys) { + spin_unlock_bh(&p->tcf_lock); + ret = -ENOMEM; + goto out_release; + } kfree(p->tcfp_keys); p->tcfp_keys = keys; p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + p->tcfp_flags = parm->flags; + p->tcf_action = parm->action; + kfree(p->tcfp_keys_ex); p->tcfp_keys_ex = keys_ex; @@ -232,6 +226,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; + +out_release: + tcf_idr_release(*a, bind); out_free: kfree(keys_ex); return ret; @@ -291,8 +288,8 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb, return ret; } -static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); int i; @@ -410,6 +407,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, if (unlikely(!opt)) return -ENOBUFS; + spin_lock_bh(&p->tcf_lock); memcpy(opt->keys, p->tcfp_keys, p->tcfp_nkeys * sizeof(struct tc_pedit_key)); opt->index = p->tcf_index; @@ -432,11 +430,13 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD)) goto nla_put_failure; + spin_unlock_bh(&p->tcf_lock); kfree(opt); return skb->len; nla_put_failure: + spin_unlock_bh(&p->tcf_lock); nlmsg_trim(skb, b); kfree(opt); return -1; @@ -471,7 +471,7 @@ static struct tc_action_ops act_pedit_ops = { .kind = "pedit", .type = TCA_ACT_PEDIT, .owner = THIS_MODULE, - .act = tcf_pedit, + .act = tcf_pedit_act, .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, .init = tcf_pedit_init, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 1f3192ea8df7..06f0742db593 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -56,7 +56,7 @@ struct tc_police_compat { static unsigned int police_net_id; static struct tc_action_ops act_police_ops; -static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, +static int tcf_police_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) @@ -73,7 +73,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RESULT] = { .type = NLA_U32 }, }; -static int tcf_act_police_init(struct net *net, struct nlattr *nla, +static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, struct netlink_ext_ack *extack) @@ -203,7 +203,7 @@ failure: return err; } -static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, +static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = to_police(a); @@ -267,21 +267,22 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcf_action; } -static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, +static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = to_police(a); struct tc_police opt = { .index = police->tcf_index, - .action = police->tcf_action, - .mtu = police->tcfp_mtu, - .burst = PSCHED_NS2TICKS(police->tcfp_burst), .refcnt = refcount_read(&police->tcf_refcnt) - ref, .bindcnt = atomic_read(&police->tcf_bindcnt) - bind, }; struct tcf_t t; + spin_lock_bh(&police->tcf_lock); + opt.action = police->tcf_action; + opt.mtu = police->tcfp_mtu; + opt.burst = PSCHED_NS2TICKS(police->tcfp_burst); if (police->rate_present) psched_ratecfg_getrate(&opt.rate, &police->rate); if (police->peak_present) @@ -301,10 +302,12 @@ static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, t.expires = jiffies_to_clock_t(police->tcf_tm.expires); if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD)) goto nla_put_failure; + spin_unlock_bh(&police->tcf_lock); return skb->len; nla_put_failure: + spin_unlock_bh(&police->tcf_lock); nlmsg_trim(skb, b); return -1; } @@ -332,10 +335,10 @@ static struct tc_action_ops act_police_ops = { .kind = "police", .type = TCA_ID_POLICE, .owner = THIS_MODULE, - .act = tcf_act_police, - .dump = tcf_act_police_dump, - .init = tcf_act_police_init, - .walk = tcf_act_police_walker, + .act = tcf_police_act, + .dump = tcf_police_dump, + .init = tcf_police_init, + .walk = tcf_police_walker, .lookup = tcf_police_search, .delete = tcf_police_delete, .size = sizeof(struct tcf_police), diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 2608ccc83e5e..81071afe1b43 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -80,11 +80,13 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, } s = to_sample(*a); + spin_lock(&s->tcf_lock); s->tcf_action = parm->action; s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, s->psample_group_num); if (!psample_group) { + spin_unlock(&s->tcf_lock); tcf_idr_release(*a, bind); return -ENOMEM; } @@ -94,6 +96,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, s->truncate = true; s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } + spin_unlock(&s->tcf_lock); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -105,7 +108,8 @@ static void tcf_sample_cleanup(struct tc_action *a) struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; - psample_group = rtnl_dereference(s->psample_group); + /* last reference to action, no need to lock */ + psample_group = rcu_dereference_protected(s->psample_group, 1); RCU_INIT_POINTER(s->psample_group, NULL); if (psample_group) psample_group_put(psample_group); @@ -174,12 +178,13 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_sample *s = to_sample(a); struct tc_sample opt = { .index = s->tcf_index, - .action = s->tcf_action, .refcnt = refcount_read(&s->tcf_refcnt) - ref, .bindcnt = atomic_read(&s->tcf_bindcnt) - bind, }; struct tcf_t t; + spin_lock(&s->tcf_lock); + opt.action = s->tcf_action; if (nla_put(skb, TCA_SAMPLE_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -196,9 +201,12 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_u32(skb, TCA_SAMPLE_PSAMPLE_GROUP, s->psample_group_num)) goto nla_put_failure; + spin_unlock(&s->tcf_lock); + return skb->len; nla_put_failure: + spin_unlock(&s->tcf_lock); nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index aa51152e0066..e616523ba3c1 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -28,8 +28,8 @@ static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; #define SIMP_MAX_DATA 32 -static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_defact *d = to_defact(a); @@ -156,10 +156,11 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, .index = d->tcf_index, .refcnt = refcount_read(&d->tcf_refcnt) - ref, .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, - .action = d->tcf_action, }; struct tcf_t t; + spin_lock_bh(&d->tcf_lock); + opt.action = d->tcf_action; if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt) || nla_put_string(skb, TCA_DEF_DATA, d->tcfd_defdata)) goto nla_put_failure; @@ -167,9 +168,12 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD)) goto nla_put_failure; + spin_unlock_bh(&d->tcf_lock); + return skb->len; nla_put_failure: + spin_unlock_bh(&d->tcf_lock); nlmsg_trim(skb, b); return -1; } @@ -203,7 +207,7 @@ static struct tc_action_ops act_simp_ops = { .kind = "simple", .type = TCA_ACT_SIMP, .owner = THIS_MODULE, - .act = tcf_simp, + .act = tcf_simp_act, .dump = tcf_simp_dump, .cleanup = tcf_simp_release, .init = tcf_simp_init, diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index a6db47ebec11..926d7bc4a89d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -33,8 +33,8 @@ static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; -static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_skbedit_params *params; @@ -310,7 +310,7 @@ static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .type = TCA_ACT_SKBEDIT, .owner = THIS_MODULE, - .act = tcf_skbedit, + .act = tcf_skbedit_act, .dump = tcf_skbedit_dump, .init = tcf_skbedit_init, .cleanup = tcf_skbedit_cleanup, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index c437c6d51a71..d6a1af0c4171 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -24,7 +24,7 @@ static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; #define MAX_EDIT_LEN ETH_HLEN -static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, +static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); @@ -156,7 +156,6 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, d = to_skbmod(*a); - ASSERT_RTNL(); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { tcf_idr_release(*a, bind); @@ -166,10 +165,10 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, p->flags = lflags; d->tcf_action = parm->action; - p_old = rtnl_dereference(d->skbmod_p); - if (ovr) spin_lock_bh(&d->tcf_lock); + /* Protected by tcf_lock if overwriting existing action. */ + p_old = rcu_dereference_protected(d->skbmod_p, 1); if (lflags & SKBMOD_F_DMAC) ether_addr_copy(p->eth_dst, daddr); @@ -205,15 +204,18 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, { struct tcf_skbmod *d = to_skbmod(a); unsigned char *b = skb_tail_pointer(skb); - struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); + struct tcf_skbmod_params *p; struct tc_skbmod opt = { .index = d->tcf_index, .refcnt = refcount_read(&d->tcf_refcnt) - ref, .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, - .action = d->tcf_action, }; struct tcf_t t; + spin_lock_bh(&d->tcf_lock); + opt.action = d->tcf_action; + p = rcu_dereference_protected(d->skbmod_p, + lockdep_is_held(&d->tcf_lock)); opt.flags = p->flags; if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -231,8 +233,10 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) goto nla_put_failure; + spin_unlock_bh(&d->tcf_lock); return skb->len; nla_put_failure: + spin_unlock_bh(&d->tcf_lock); nlmsg_trim(skb, b); return -1; } @@ -266,7 +270,7 @@ static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .type = TCA_ACT_SKBMOD, .owner = THIS_MODULE, - .act = tcf_skbmod_run, + .act = tcf_skbmod_act, .dump = tcf_skbmod_dump, .init = tcf_skbmod_init, .cleanup = tcf_skbmod_cleanup, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index d42d9e112789..ba2ae9f75ef5 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -204,7 +204,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; - struct tcf_tunnel_key_params *params_old; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; struct tc_tunnel_key *parm; @@ -346,24 +345,22 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, t = to_tunnel_key(*a); - ASSERT_RTNL(); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { tcf_idr_release(*a, bind); NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); return -ENOMEM; } - - params_old = rtnl_dereference(t->params); - - t->tcf_action = parm->action; params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; - rcu_assign_pointer(t->params, params_new); - - if (params_old) - kfree_rcu(params_old, rcu); + spin_lock(&t->tcf_lock); + t->tcf_action = parm->action; + rcu_swap_protected(t->params, params_new, + lockdep_is_held(&t->tcf_lock)); + spin_unlock(&t->tcf_lock); + if (params_new) + kfree_rcu(params_new, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -485,12 +482,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, .index = t->tcf_index, .refcnt = refcount_read(&t->tcf_refcnt) - ref, .bindcnt = atomic_read(&t->tcf_bindcnt) - bind, - .action = t->tcf_action, }; struct tcf_t tm; - params = rtnl_dereference(t->params); - + spin_lock(&t->tcf_lock); + params = rcu_dereference_protected(t->params, + lockdep_is_held(&t->tcf_lock)); + opt.action = t->tcf_action; opt.t_action = params->tcft_action; if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) @@ -522,10 +520,12 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), &tm, TCA_TUNNEL_KEY_PAD)) goto nla_put_failure; + spin_unlock(&t->tcf_lock); return skb->len; nla_put_failure: + spin_unlock(&t->tcf_lock); nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 15a0ee214c9c..d1f5028384c9 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -22,8 +22,8 @@ static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; -static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_vlan *v = to_vlan(a); struct tcf_vlan_params *p; @@ -109,7 +109,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; - struct tcf_vlan_params *p, *p_old; + struct tcf_vlan_params *p; struct tc_vlan *parm; struct tcf_vlan *v; int action; @@ -202,26 +202,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, v = to_vlan(*a); - ASSERT_RTNL(); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { tcf_idr_release(*a, bind); return -ENOMEM; } - v->tcf_action = parm->action; - - p_old = rtnl_dereference(v->vlan_p); - p->tcfv_action = action; p->tcfv_push_vid = push_vid; p->tcfv_push_prio = push_prio; p->tcfv_push_proto = push_proto; - rcu_assign_pointer(v->vlan_p, p); + spin_lock(&v->tcf_lock); + v->tcf_action = parm->action; + rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock)); + spin_unlock(&v->tcf_lock); - if (p_old) - kfree_rcu(p_old, rcu); + if (p) + kfree_rcu(p, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -243,16 +241,18 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_vlan *v = to_vlan(a); - struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p); + struct tcf_vlan_params *p; struct tc_vlan opt = { .index = v->tcf_index, .refcnt = refcount_read(&v->tcf_refcnt) - ref, .bindcnt = atomic_read(&v->tcf_bindcnt) - bind, - .action = v->tcf_action, - .v_action = p->tcfv_action, }; struct tcf_t t; + spin_lock(&v->tcf_lock); + opt.action = v->tcf_action; + p = rcu_dereference_protected(v->vlan_p, lockdep_is_held(&v->tcf_lock)); + opt.v_action = p->tcfv_action; if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -268,9 +268,12 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, tcf_tm_dump(&t, &v->tcf_tm); if (nla_put_64bit(skb, TCA_VLAN_TM, sizeof(t), &t, TCA_VLAN_PAD)) goto nla_put_failure; + spin_unlock(&v->tcf_lock); + return skb->len; nla_put_failure: + spin_unlock(&v->tcf_lock); nlmsg_trim(skb, b); return -1; } @@ -304,7 +307,7 @@ static struct tc_action_ops act_vlan_ops = { .kind = "vlan", .type = TCA_ACT_VLAN, .owner = THIS_MODULE, - .act = tcf_vlan, + .act = tcf_vlan_act, .dump = tcf_vlan_dump, .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 194c2e0b2737..31bd1439cf60 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -780,6 +780,8 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, block->refcnt--; if (list_empty(&block->chain_list)) kfree(block); + } else { + block->refcnt--; } } EXPORT_SYMBOL(tcf_block_put_ext); @@ -2174,6 +2176,7 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, if (!dev) continue; ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); + a->ops->put_dev(dev); if (ret < 0) return ret; ok_count += ret; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a3b69bb6f4b0..6fd9bdd93796 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -24,6 +24,7 @@ #include <net/pkt_cls.h> #include <net/ip.h> #include <net/flow_dissector.h> +#include <net/geneve.h> #include <net/dst.h> #include <net/dst_metadata.h> @@ -53,6 +54,7 @@ struct fl_flow_key { struct flow_dissector_key_tcp tcp; struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; + struct flow_dissector_key_enc_opts enc_opts; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY, + .len = 128 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap, fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)); } +static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1]; + struct nlattr *class = NULL, *type = NULL, *data = NULL; + struct geneve_opt *opt; + int err, data_len = 0; + + if (option_len > sizeof(struct geneve_opt)) + data_len = option_len - sizeof(struct geneve_opt); + + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; + memset(opt, 0xff, option_len); + opt->length = data_len / 4; + opt->r1 = 0; + opt->r2 = 0; + opt->r3 = 0; + + /* If no mask has been prodived we assume an exact match. */ + if (!depth) + return sizeof(struct geneve_opt) + data_len; + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) { + NL_SET_ERR_MSG(extack, "Non-geneve option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); + if (err < 0) + return err; + + /* We are not allowed to omit any of CLASS, TYPE or DATA + * fields from the key. + */ + if (!option_len && + (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) { + NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data"); + return -EINVAL; + } + + /* Omitting any of CLASS, TYPE or DATA fields is allowed + * for the mask. + */ + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) { + int new_len = key->enc_opts.len; + + data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]; + data_len = nla_len(data); + if (data_len < 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long"); + return -ERANGE; + } + if (data_len % 4) { + NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long"); + return -ERANGE; + } + + new_len += sizeof(struct geneve_opt) + data_len; + BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX); + if (new_len > FLOW_DIS_TUN_OPTS_MAX) { + NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size"); + return -ERANGE; + } + opt->length = data_len / 4; + memcpy(opt->opt_data, nla_data(data), data_len); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) { + class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]; + opt->opt_class = nla_get_be16(class); + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) { + type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]; + opt->type = nla_get_u8(type); + } + + return sizeof(struct geneve_opt) + data_len; +} + +static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; + int option_len, key_depth, msk_depth = 0; + + nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); + + if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { + nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + } + + nla_for_each_attr(nla_opt_key, nla_enc_key, + nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) { + switch (nla_type(nla_opt_key)) { + case TCA_FLOWER_KEY_ENC_OPTS_GENEVE: + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT; + option_len = fl_set_geneve_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); + return -EINVAL; + } + + if (msk_depth) + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); + break; + default: + NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); + return -EINVAL; + } + } + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip); + if (tb[TCA_FLOWER_KEY_ENC_OPTS]) { + ret = fl_set_enc_opt(tb, key, mask, extack); + if (ret) + return ret; + } + if (tb[TCA_FLOWER_KEY_FLAGS]) ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags); @@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_ENC_IP, enc_ip); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts); skb_flow_dissector_init(dissector, keys, cnt); } @@ -1174,8 +1338,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long)f; cls_flower.dissector = &mask->dissector; - cls_flower.mask = &f->mkey; - cls_flower.key = &f->key; + cls_flower.mask = &mask->key; + cls_flower.key = &f->mkey; cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; @@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } +static int fl_dump_key_geneve_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct geneve_opt *opt; + struct nlattr *nest; + int opt_off = 0; + + nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); + if (!nest) + goto nla_put_failure; + + while (enc_opts->len > opt_off) { + opt = (struct geneve_opt *)&enc_opts->data[opt_off]; + + if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, + opt->opt_class)) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, + opt->type)) + goto nla_put_failure; + if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, + opt->length * 4, opt->opt_data)) + goto nla_put_failure; + + opt_off += sizeof(struct geneve_opt) + opt->length * 4; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, + struct flow_dissector_key_enc_opts *enc_opts) +{ + struct nlattr *nest; + int err; + + if (!enc_opts->len) + return 0; + + nest = nla_nest_start(skb, enc_opt_type); + if (!nest) + goto nla_put_failure; + + switch (enc_opts->dst_opt_type) { + case TUNNEL_GENEVE_OPT: + err = fl_dump_key_geneve_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; + default: + goto nla_put_failure; + } + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int fl_dump_key_enc_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *key_opts, + struct flow_dissector_key_enc_opts *msk_opts) +{ + int err; + + err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts); + if (err) + return err; + + return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts); +} + static int fl_dump_key(struct sk_buff *skb, struct net *net, struct fl_flow_key *key, struct fl_flow_key *mask) { @@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, sizeof(key->enc_tp.dst)) || - fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip)) + fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) || + fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts)) goto nla_put_failure; if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index bfb9f812e2ef..ce8087846f05 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -325,7 +325,8 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { struct sctp_stream_out *streamout = - &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; + SCTP_SO(&chunk->asoc->stream, + chunk->sinfo.sinfo_stream); if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; @@ -339,7 +340,8 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { struct sctp_stream_out *streamout = - &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; + SCTP_SO(&chunk->asoc->stream, + chunk->sinfo.sinfo_stream); chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d68aa33485a9..d74d00b29942 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -80,7 +80,7 @@ static inline void sctp_outq_head_data(struct sctp_outq *q, q->out_qlen += ch->skb->len; stream = sctp_chunk_stream_no(ch); - oute = q->asoc->stream.out[stream].ext; + oute = SCTP_SO(&q->asoc->stream, stream)->ext; list_add(&ch->stream_list, &oute->outq); } @@ -101,7 +101,7 @@ static inline void sctp_outq_tail_data(struct sctp_outq *q, q->out_qlen += ch->skb->len; stream = sctp_chunk_stream_no(ch); - oute = q->asoc->stream.out[stream].ext; + oute = SCTP_SO(&q->asoc->stream, stream)->ext; list_add_tail(&ch->stream_list, &oute->outq); } @@ -372,7 +372,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); - streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; + streamout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; @@ -416,7 +416,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { struct sctp_stream_out *streamout = - &asoc->stream.out[chk->sinfo.sinfo_stream]; + SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; } @@ -1082,6 +1082,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx, /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(ctx->q)) != NULL) { __u32 sid = ntohs(chunk->subh.data_hdr->stream); + __u8 stream_state = SCTP_SO(&ctx->asoc->stream, sid)->state; /* Has this chunk expired? */ if (sctp_chunk_abandoned(chunk)) { @@ -1091,7 +1092,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx, continue; } - if (ctx->asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) { + if (stream_state == SCTP_STREAM_CLOSED) { sctp_outq_head_data(ctx->q, chunk); break; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 502c0d7cb105..e96b15a66aba 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1911,7 +1911,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, goto err; } - if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) { + if (unlikely(!SCTP_SO(&asoc->stream, sinfo->sinfo_stream)->ext)) { err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream); if (err) goto err; @@ -7154,7 +7154,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) goto out; - streamoute = asoc->stream.out[params.sprstat_sid].ext; + streamoute = SCTP_SO(&asoc->stream, params.sprstat_sid)->ext; if (!streamoute) { /* Not allocated yet, means all stats are 0 */ params.sprstat_abandoned_unsent = 0; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index f1f1d1b232ba..ffb940d3b57c 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -37,6 +37,53 @@ #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> +static struct flex_array *fa_alloc(size_t elem_size, size_t elem_count, + gfp_t gfp) +{ + struct flex_array *result; + int err; + + result = flex_array_alloc(elem_size, elem_count, gfp); + if (result) { + err = flex_array_prealloc(result, 0, elem_count, gfp); + if (err) { + flex_array_free(result); + result = NULL; + } + } + + return result; +} + +static void fa_free(struct flex_array *fa) +{ + if (fa) + flex_array_free(fa); +} + +static void fa_copy(struct flex_array *fa, struct flex_array *from, + size_t index, size_t count) +{ + void *elem; + + while (count--) { + elem = flex_array_get(from, index); + flex_array_put(fa, index, elem, 0); + index++; + } +} + +static void fa_zero(struct flex_array *fa, size_t index, size_t count) +{ + void *elem; + + while (count--) { + elem = flex_array_get(fa, index); + memset(elem, 0, fa->element_size); + index++; + } +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -78,34 +125,33 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { - kfree(new->out[i].ext); - new->out[i].ext = stream->out[i].ext; - stream->out[i].ext = NULL; + kfree(SCTP_SO(new, i)->ext); + SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; + SCTP_SO(stream, i)->ext = NULL; } } for (i = outcnt; i < stream->outcnt; i++) - kfree(stream->out[i].ext); + kfree(SCTP_SO(stream, i)->ext); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, gfp_t gfp) { - struct sctp_stream_out *out; + struct flex_array *out; + size_t elem_size = sizeof(struct sctp_stream_out); - out = kmalloc_array(outcnt, sizeof(*out), gfp); + out = fa_alloc(elem_size, outcnt, gfp); if (!out) return -ENOMEM; if (stream->out) { - memcpy(out, stream->out, min(outcnt, stream->outcnt) * - sizeof(*out)); - kfree(stream->out); + fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt)); + fa_free(stream->out); } if (outcnt > stream->outcnt) - memset(out + stream->outcnt, 0, - (outcnt - stream->outcnt) * sizeof(*out)); + fa_zero(out, stream->outcnt, (outcnt - stream->outcnt)); stream->out = out; @@ -115,22 +161,20 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, gfp_t gfp) { - struct sctp_stream_in *in; - - in = kmalloc_array(incnt, sizeof(*stream->in), gfp); + struct flex_array *in; + size_t elem_size = sizeof(struct sctp_stream_in); + in = fa_alloc(elem_size, incnt, gfp); if (!in) return -ENOMEM; if (stream->in) { - memcpy(in, stream->in, min(incnt, stream->incnt) * - sizeof(*in)); - kfree(stream->in); + fa_copy(in, stream->in, 0, min(incnt, stream->incnt)); + fa_free(stream->in); } if (incnt > stream->incnt) - memset(in + stream->incnt, 0, - (incnt - stream->incnt) * sizeof(*in)); + fa_zero(in, stream->incnt, (incnt - stream->incnt)); stream->in = in; @@ -162,7 +206,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; sched->init(stream); @@ -174,7 +218,7 @@ in: ret = sctp_stream_alloc_in(stream, incnt, gfp); if (ret) { sched->free(stream); - kfree(stream->out); + fa_free(stream->out); stream->out = NULL; stream->outcnt = 0; goto out; @@ -193,7 +237,7 @@ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) soute = kzalloc(sizeof(*soute), GFP_KERNEL); if (!soute) return -ENOMEM; - stream->out[sid].ext = soute; + SCTP_SO(stream, sid)->ext = soute; return sctp_sched_init_sid(stream, sid, GFP_KERNEL); } @@ -205,9 +249,9 @@ void sctp_stream_free(struct sctp_stream *stream) sched->free(stream); for (i = 0; i < stream->outcnt; i++) - kfree(stream->out[i].ext); - kfree(stream->out); - kfree(stream->in); + kfree(SCTP_SO(stream, i)->ext); + fa_free(stream->out); + fa_free(stream->in); } void sctp_stream_clear(struct sctp_stream *stream) @@ -215,12 +259,12 @@ void sctp_stream_clear(struct sctp_stream *stream) int i; for (i = 0; i < stream->outcnt; i++) { - stream->out[i].mid = 0; - stream->out[i].mid_uo = 0; + SCTP_SO(stream, i)->mid = 0; + SCTP_SO(stream, i)->mid_uo = 0; } for (i = 0; i < stream->incnt; i++) - stream->in[i].mid = 0; + SCTP_SI(stream, i)->mid = 0; } void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) @@ -273,8 +317,8 @@ static bool sctp_stream_outq_is_empty(struct sctp_stream *stream, for (i = 0; i < str_nums; i++) { __u16 sid = ntohs(str_list[i]); - if (stream->out[sid].ext && - !list_empty(&stream->out[sid].ext->outq)) + if (SCTP_SO(stream, sid)->ext && + !list_empty(&SCTP_SO(stream, sid)->ext->outq)) return false; } @@ -361,11 +405,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc, if (out) { if (str_nums) for (i = 0; i < str_nums; i++) - stream->out[str_list[i]].state = + SCTP_SO(stream, str_list[i])->state = SCTP_STREAM_CLOSED; else for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_CLOSED; + SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED; } asoc->strreset_chunk = chunk; @@ -380,11 +424,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc, if (str_nums) for (i = 0; i < str_nums; i++) - stream->out[str_list[i]].state = + SCTP_SO(stream, str_list[i])->state = SCTP_STREAM_OPEN; else for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; goto out; } @@ -418,7 +462,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) /* Block further xmit of data until this request is completed */ for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_CLOSED; + SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED; asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); @@ -429,7 +473,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc) asoc->strreset_chunk = NULL; for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; return retval; } @@ -609,10 +653,10 @@ struct sctp_chunk *sctp_process_strreset_outreq( } for (i = 0; i < nums; i++) - stream->in[ntohs(str_p[i])].mid = 0; + SCTP_SI(stream, ntohs(str_p[i]))->mid = 0; } else { for (i = 0; i < stream->incnt; i++) - stream->in[i].mid = 0; + SCTP_SI(stream, i)->mid = 0; } result = SCTP_STRRESET_PERFORMED; @@ -683,11 +727,11 @@ struct sctp_chunk *sctp_process_strreset_inreq( if (nums) for (i = 0; i < nums; i++) - stream->out[ntohs(str_p[i])].state = + SCTP_SO(stream, ntohs(str_p[i]))->state = SCTP_STREAM_CLOSED; else for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_CLOSED; + SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED; asoc->strreset_chunk = chunk; asoc->strreset_outstanding = 1; @@ -786,11 +830,11 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( * incoming and outgoing streams. */ for (i = 0; i < stream->outcnt; i++) { - stream->out[i].mid = 0; - stream->out[i].mid_uo = 0; + SCTP_SO(stream, i)->mid = 0; + SCTP_SO(stream, i)->mid_uo = 0; } for (i = 0; i < stream->incnt; i++) - stream->in[i].mid = 0; + SCTP_SI(stream, i)->mid = 0; result = SCTP_STRRESET_PERFORMED; @@ -979,15 +1023,18 @@ struct sctp_chunk *sctp_process_strreset_resp( sizeof(__u16); if (result == SCTP_STRRESET_PERFORMED) { + struct sctp_stream_out *sout; if (nums) { for (i = 0; i < nums; i++) { - stream->out[ntohs(str_p[i])].mid = 0; - stream->out[ntohs(str_p[i])].mid_uo = 0; + sout = SCTP_SO(stream, ntohs(str_p[i])); + sout->mid = 0; + sout->mid_uo = 0; } } else { for (i = 0; i < stream->outcnt; i++) { - stream->out[i].mid = 0; - stream->out[i].mid_uo = 0; + sout = SCTP_SO(stream, i); + sout->mid = 0; + sout->mid_uo = 0; } } @@ -995,7 +1042,7 @@ struct sctp_chunk *sctp_process_strreset_resp( } for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); @@ -1050,15 +1097,15 @@ struct sctp_chunk *sctp_process_strreset_resp( asoc->adv_peer_ack_point = asoc->ctsn_ack_point; for (i = 0; i < stream->outcnt; i++) { - stream->out[i].mid = 0; - stream->out[i].mid_uo = 0; + SCTP_SO(stream, i)->mid = 0; + SCTP_SO(stream, i)->mid_uo = 0; } for (i = 0; i < stream->incnt; i++) - stream->in[i].mid = 0; + SCTP_SI(stream, i)->mid = 0; } for (i = 0; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags, stsn, rtsn, GFP_ATOMIC); @@ -1072,7 +1119,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (result == SCTP_STRRESET_PERFORMED) for (i = number; i < stream->outcnt; i++) - stream->out[i].state = SCTP_STREAM_OPEN; + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; else stream->outcnt = number; diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c index d3764c181299..0a78cdf86463 100644 --- a/net/sctp/stream_interleave.c +++ b/net/sctp/stream_interleave.c @@ -197,7 +197,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_partial( __u32 next_fsn = 0; int is_last = 0; - sin = sctp_stream_in(ulpq->asoc, event->stream); + sin = sctp_stream_in(&ulpq->asoc->stream, event->stream); skb_queue_walk(&ulpq->reasm, pos) { struct sctp_ulpevent *cevent = sctp_skb2event(pos); @@ -278,7 +278,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled( __u32 pd_len = 0; __u32 mid = 0; - sin = sctp_stream_in(ulpq->asoc, event->stream); + sin = sctp_stream_in(&ulpq->asoc->stream, event->stream); skb_queue_walk(&ulpq->reasm, pos) { struct sctp_ulpevent *cevent = sctp_skb2event(pos); @@ -368,7 +368,7 @@ static struct sctp_ulpevent *sctp_intl_reasm(struct sctp_ulpq *ulpq, sctp_intl_store_reasm(ulpq, event); - sin = sctp_stream_in(ulpq->asoc, event->stream); + sin = sctp_stream_in(&ulpq->asoc->stream, event->stream); if (sin->pd_mode && event->mid == sin->mid && event->fsn == sin->fsn) retval = sctp_intl_retrieve_partial(ulpq, event); @@ -575,7 +575,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_partial_uo( __u32 next_fsn = 0; int is_last = 0; - sin = sctp_stream_in(ulpq->asoc, event->stream); + sin = sctp_stream_in(&ulpq->asoc->stream, event->stream); skb_queue_walk(&ulpq->reasm_uo, pos) { struct sctp_ulpevent *cevent = sctp_skb2event(pos); @@ -659,7 +659,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo( __u32 pd_len = 0; __u32 mid = 0; - sin = sctp_stream_in(ulpq->asoc, event->stream); + sin = sctp_stream_in(&ulpq->asoc->stream, event->stream); skb_queue_walk(&ulpq->reasm_uo, pos) { struct sctp_ulpevent *cevent = sctp_skb2event(pos); @@ -750,7 +750,7 @@ static struct sctp_ulpevent *sctp_intl_reasm_uo(struct sctp_ulpq *ulpq, sctp_intl_store_reasm_uo(ulpq, event); - sin = sctp_stream_in(ulpq->asoc, event->stream); + sin = sctp_stream_in(&ulpq->asoc->stream, event->stream); if (sin->pd_mode_uo && event->mid == sin->mid_uo && event->fsn == sin->fsn_uo) retval = sctp_intl_retrieve_partial_uo(ulpq, event); @@ -774,7 +774,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq) skb_queue_walk(&ulpq->reasm_uo, pos) { struct sctp_ulpevent *cevent = sctp_skb2event(pos); - csin = sctp_stream_in(ulpq->asoc, cevent->stream); + csin = sctp_stream_in(&ulpq->asoc->stream, cevent->stream); if (csin->pd_mode_uo) continue; @@ -875,7 +875,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq) skb_queue_walk(&ulpq->reasm, pos) { struct sctp_ulpevent *cevent = sctp_skb2event(pos); - csin = sctp_stream_in(ulpq->asoc, cevent->stream); + csin = sctp_stream_in(&ulpq->asoc->stream, cevent->stream); if (csin->pd_mode) continue; @@ -1053,7 +1053,7 @@ static void sctp_intl_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) __u16 sid; for (sid = 0; sid < stream->incnt; sid++) { - struct sctp_stream_in *sin = &stream->in[sid]; + struct sctp_stream_in *sin = SCTP_SI(stream, sid); __u32 mid; if (sin->pd_mode_uo) { @@ -1247,7 +1247,7 @@ static void sctp_handle_fwdtsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk) static void sctp_intl_skip(struct sctp_ulpq *ulpq, __u16 sid, __u32 mid, __u8 flags) { - struct sctp_stream_in *sin = sctp_stream_in(ulpq->asoc, sid); + struct sctp_stream_in *sin = sctp_stream_in(&ulpq->asoc->stream, sid); struct sctp_stream *stream = &ulpq->asoc->stream; if (flags & SCTP_FTSN_U_BIT) { diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index f5fcd425232a..a6c04a94b08f 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -161,7 +161,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, /* Give the next scheduler a clean slate. */ for (i = 0; i < asoc->stream.outcnt; i++) { - void *p = asoc->stream.out[i].ext; + void *p = SCTP_SO(&asoc->stream, i)->ext; if (!p) continue; @@ -175,7 +175,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, asoc->outqueue.sched = n; n->init(&asoc->stream); for (i = 0; i < asoc->stream.outcnt; i++) { - if (!asoc->stream.out[i].ext) + if (!SCTP_SO(&asoc->stream, i)->ext) continue; ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); @@ -217,7 +217,7 @@ int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, if (sid >= asoc->stream.outcnt) return -EINVAL; - if (!asoc->stream.out[sid].ext) { + if (!SCTP_SO(&asoc->stream, sid)->ext) { int ret; ret = sctp_stream_init_ext(&asoc->stream, sid); @@ -234,7 +234,7 @@ int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, if (sid >= asoc->stream.outcnt) return -EINVAL; - if (!asoc->stream.out[sid].ext) + if (!SCTP_SO(&asoc->stream, sid)->ext) return 0; return asoc->outqueue.sched->get(&asoc->stream, sid, value); @@ -252,7 +252,7 @@ void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) * priority stream comes in. */ sid = sctp_chunk_stream_no(ch); - sout = &q->asoc->stream.out[sid]; + sout = SCTP_SO(&q->asoc->stream, sid); q->asoc->stream.out_curr = sout; return; } @@ -272,8 +272,9 @@ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + struct sctp_stream_out_ext *ext = SCTP_SO(stream, sid)->ext; - INIT_LIST_HEAD(&stream->out[sid].ext->outq); + INIT_LIST_HEAD(&ext->outq); return sched->init_sid(stream, sid, gfp); } diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 7997d35dd0fd..2245083a98f2 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -75,10 +75,10 @@ static struct sctp_stream_priorities *sctp_sched_prio_get_head( /* No luck. So we search on all streams now. */ for (i = 0; i < stream->outcnt; i++) { - if (!stream->out[i].ext) + if (!SCTP_SO(stream, i)->ext) continue; - p = stream->out[i].ext->prio_head; + p = SCTP_SO(stream, i)->ext->prio_head; if (!p) /* Means all other streams won't be initialized * as well. @@ -165,7 +165,7 @@ static void sctp_sched_prio_sched(struct sctp_stream *stream, static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid, __u16 prio, gfp_t gfp) { - struct sctp_stream_out *sout = &stream->out[sid]; + struct sctp_stream_out *sout = SCTP_SO(stream, sid); struct sctp_stream_out_ext *soute = sout->ext; struct sctp_stream_priorities *prio_head, *old; bool reschedule = false; @@ -186,7 +186,7 @@ static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid, return 0; for (i = 0; i < stream->outcnt; i++) { - soute = stream->out[i].ext; + soute = SCTP_SO(stream, i)->ext; if (soute && soute->prio_head == old) /* It's still in use, nothing else to do here. */ return 0; @@ -201,7 +201,7 @@ static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid, static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid, __u16 *value) { - *value = stream->out[sid].ext->prio_head->prio; + *value = SCTP_SO(stream, sid)->ext->prio_head->prio; return 0; } @@ -215,7 +215,7 @@ static int sctp_sched_prio_init(struct sctp_stream *stream) static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { - INIT_LIST_HEAD(&stream->out[sid].ext->prio_list); + INIT_LIST_HEAD(&SCTP_SO(stream, sid)->ext->prio_list); return sctp_sched_prio_set(stream, sid, 0, gfp); } @@ -233,9 +233,9 @@ static void sctp_sched_prio_free(struct sctp_stream *stream) */ sctp_sched_prio_unsched_all(stream); for (i = 0; i < stream->outcnt; i++) { - if (!stream->out[i].ext) + if (!SCTP_SO(stream, i)->ext) continue; - prio = stream->out[i].ext->prio_head; + prio = SCTP_SO(stream, i)->ext->prio_head; if (prio && list_empty(&prio->prio_sched)) list_add(&prio->prio_sched, &list); } @@ -255,7 +255,7 @@ static void sctp_sched_prio_enqueue(struct sctp_outq *q, ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); sid = sctp_chunk_stream_no(ch); stream = &q->asoc->stream; - sctp_sched_prio_sched(stream, stream->out[sid].ext); + sctp_sched_prio_sched(stream, SCTP_SO(stream, sid)->ext); } static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q) @@ -297,7 +297,7 @@ static void sctp_sched_prio_dequeue_done(struct sctp_outq *q, * this priority. */ sid = sctp_chunk_stream_no(ch); - soute = q->asoc->stream.out[sid].ext; + soute = SCTP_SO(&q->asoc->stream, sid)->ext; prio = soute->prio_head; sctp_sched_prio_next_stream(prio); @@ -317,7 +317,7 @@ static void sctp_sched_prio_sched_all(struct sctp_stream *stream) __u16 sid; sid = sctp_chunk_stream_no(ch); - sout = &stream->out[sid]; + sout = SCTP_SO(stream, sid); if (sout->ext) sctp_sched_prio_sched(stream, sout->ext); } diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index 1155692448f1..52ba743fa7a7 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -100,7 +100,7 @@ static int sctp_sched_rr_init(struct sctp_stream *stream) static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { - INIT_LIST_HEAD(&stream->out[sid].ext->rr_list); + INIT_LIST_HEAD(&SCTP_SO(stream, sid)->ext->rr_list); return 0; } @@ -120,7 +120,7 @@ static void sctp_sched_rr_enqueue(struct sctp_outq *q, ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); sid = sctp_chunk_stream_no(ch); stream = &q->asoc->stream; - sctp_sched_rr_sched(stream, stream->out[sid].ext); + sctp_sched_rr_sched(stream, SCTP_SO(stream, sid)->ext); } static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q) @@ -154,7 +154,7 @@ static void sctp_sched_rr_dequeue_done(struct sctp_outq *q, /* Last chunk on that msg, move to the next stream */ sid = sctp_chunk_stream_no(ch); - soute = q->asoc->stream.out[sid].ext; + soute = SCTP_SO(&q->asoc->stream, sid)->ext; sctp_sched_rr_next_stream(&q->asoc->stream); @@ -173,7 +173,7 @@ static void sctp_sched_rr_sched_all(struct sctp_stream *stream) __u16 sid; sid = sctp_chunk_stream_no(ch); - soute = stream->out[sid].ext; + soute = SCTP_SO(stream, sid)->ext; if (soute) sctp_sched_rr_sched(stream, soute); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0fc94f296e54..2d8a1e15e4f9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1305,6 +1305,8 @@ static void smc_tcp_listen_work(struct work_struct *work) sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); + new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; + new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; sock_hold(&new_smc->sk); /* sock_put in passive closing */ if (!schedule_work(&new_smc->smc_listen_work)) sock_put(&new_smc->sk); @@ -1581,8 +1583,7 @@ static int smc_shutdown(struct socket *sock, int how) lock_sock(sk); rc = -ENOTCONN; - if ((sk->sk_state != SMC_LISTEN) && - (sk->sk_state != SMC_ACTIVE) && + if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_PEERCLOSEWAIT1) && (sk->sk_state != SMC_PEERCLOSEWAIT2) && (sk->sk_state != SMC_APPCLOSEWAIT1) && @@ -1706,12 +1707,16 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, smc = smc_sk(sock->sk); conn = &smc->conn; + lock_sock(&smc->sk); if (smc->use_fallback) { - if (!smc->clcsock) + if (!smc->clcsock) { + release_sock(&smc->sk); return -EBADF; - return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); + } + answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); + release_sock(&smc->sk); + return answ; } - lock_sock(&smc->sk); switch (cmd) { case SIOCINQ: /* same as FIONREAD */ if (smc->sk.sk_state == SMC_LISTEN) { diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index cfade7fdcc6d..e36f21ce7252 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -184,6 +184,37 @@ struct smc_ism_event_work { struct smcd_event event; }; +#define ISM_EVENT_REQUEST 0x0001 +#define ISM_EVENT_RESPONSE 0x0002 +#define ISM_EVENT_REQUEST_IR 0x00000001 +#define ISM_EVENT_CODE_TESTLINK 0x83 + +static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) +{ + union { + u64 info; + struct { + u32 uid; + unsigned short vlanid; + u16 code; + }; + } ev_info; + + switch (wrk->event.code) { + case ISM_EVENT_CODE_TESTLINK: /* Activity timer */ + ev_info.info = wrk->event.info; + if (ev_info.code == ISM_EVENT_REQUEST) { + ev_info.code = ISM_EVENT_RESPONSE; + wrk->smcd->ops->signal_event(wrk->smcd, + wrk->event.tok, + ISM_EVENT_REQUEST_IR, + ISM_EVENT_CODE_TESTLINK, + ev_info.info); + } + break; + } +} + /* worker for SMC-D events */ static void smc_ism_event_work(struct work_struct *work) { @@ -196,6 +227,9 @@ static void smc_ism_event_work(struct work_struct *work) break; case ISM_EVENT_DMB: break; + case ISM_EVENT_SWR: /* Software defined event */ + smcd_handle_sw_event(wrk); + break; } kfree(wrk); } diff --git a/net/tipc/net.c b/net/tipc/net.c index a7f6964c3a4b..62199cf5a56c 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -123,15 +123,13 @@ void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); - spin_lock_bh(&tn->node_list_lock); - if (!tipc_own_addr(net)) { + if (!cmpxchg(&tn->node_addr, 0, addr)) { tipc_set_node_addr(net, addr); tipc_named_reinit(net); tipc_sk_reinit(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, TIPC_CLUSTER_SCOPE, 0, addr); } - spin_unlock_bh(&tn->node_list_lock); } void tipc_net_stop(struct net *net) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 83d67df33f0c..52fbe727d7c1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -48,19 +48,13 @@ static int tls_do_decryption(struct sock *sk, struct scatterlist *sgout, char *iv_recv, size_t data_len, - struct sk_buff *skb, - gfp_t flags) + struct aead_request *aead_req) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - struct aead_request *aead_req; - int ret; - aead_req = aead_request_alloc(ctx->aead_recv, flags); - if (!aead_req) - return -ENOMEM; - + aead_request_set_tfm(aead_req, ctx->aead_recv); aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); aead_request_set_crypt(aead_req, sgin, sgout, data_len + tls_ctx->rx.tag_size, @@ -69,8 +63,6 @@ static int tls_do_decryption(struct sock *sk, crypto_req_done, &ctx->async_wait); ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait); - - aead_request_free(aead_req); return ret; } @@ -657,8 +649,132 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags, return skb; } +/* This function decrypts the input skb into either out_iov or in out_sg + * or in skb buffers itself. The input parameter 'zc' indicates if + * zero-copy mode needs to be tried or not. With zero-copy mode, either + * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are + * NULL, then the decryption happens inside skb buffers itself, i.e. + * zero-copy gets disabled and 'zc' is updated. + */ + +static int decrypt_internal(struct sock *sk, struct sk_buff *skb, + struct iov_iter *out_iov, + struct scatterlist *out_sg, + int *chunk, bool *zc) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = strp_msg(skb); + int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0; + struct aead_request *aead_req; + struct sk_buff *unused; + u8 *aad, *iv, *mem = NULL; + struct scatterlist *sgin = NULL; + struct scatterlist *sgout = NULL; + const int data_len = rxm->full_len - tls_ctx->rx.overhead_size; + + if (*zc && (out_iov || out_sg)) { + if (out_iov) + n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1; + else + n_sgout = sg_nents(out_sg); + } else { + n_sgout = 0; + *zc = false; + } + + n_sgin = skb_cow_data(skb, 0, &unused); + if (n_sgin < 1) + return -EBADMSG; + + /* Increment to accommodate AAD */ + n_sgin = n_sgin + 1; + + nsg = n_sgin + n_sgout; + + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); + mem_size = aead_size + (nsg * sizeof(struct scatterlist)); + mem_size = mem_size + TLS_AAD_SPACE_SIZE; + mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv); + + /* Allocate a single block of memory which contains + * aead_req || sgin[] || sgout[] || aad || iv. + * This order achieves correct alignment for aead_req, sgin, sgout. + */ + mem = kmalloc(mem_size, sk->sk_allocation); + if (!mem) + return -ENOMEM; + + /* Segment the allocated memory */ + aead_req = (struct aead_request *)mem; + sgin = (struct scatterlist *)(mem + aead_size); + sgout = sgin + n_sgin; + aad = (u8 *)(sgout + n_sgout); + iv = aad + TLS_AAD_SPACE_SIZE; + + /* Prepare IV */ + err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, + iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + tls_ctx->rx.iv_size); + if (err < 0) { + kfree(mem); + return err; + } + memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + + /* Prepare AAD */ + tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size, + tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size, + ctx->control); + + /* Prepare sgin */ + sg_init_table(sgin, n_sgin); + sg_set_buf(&sgin[0], aad, TLS_AAD_SPACE_SIZE); + err = skb_to_sgvec(skb, &sgin[1], + rxm->offset + tls_ctx->rx.prepend_size, + rxm->full_len - tls_ctx->rx.prepend_size); + if (err < 0) { + kfree(mem); + return err; + } + + if (n_sgout) { + if (out_iov) { + sg_init_table(sgout, n_sgout); + sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE); + + *chunk = 0; + err = zerocopy_from_iter(sk, out_iov, data_len, &pages, + chunk, &sgout[1], + (n_sgout - 1), false); + if (err < 0) + goto fallback_to_reg_recv; + } else if (out_sg) { + memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); + } else { + goto fallback_to_reg_recv; + } + } else { +fallback_to_reg_recv: + sgout = sgin; + pages = 0; + *chunk = 0; + *zc = false; + } + + /* Prepare and submit AEAD request */ + err = tls_do_decryption(sk, sgin, sgout, iv, data_len, aead_req); + + /* Release the pages in case iov was mapped to pages */ + for (; pages > 0; pages--) + put_page(sg_page(&sgout[pages])); + + kfree(mem); + return err; +} + static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, - struct scatterlist *sgout, bool *zc) + struct iov_iter *dest, int *chunk, bool *zc) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -671,7 +787,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; #endif if (!ctx->decrypted) { - err = decrypt_skb(sk, skb, sgout); + err = decrypt_internal(sk, skb, dest, NULL, chunk, zc); if (err < 0) return err; } else { @@ -690,54 +806,10 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE]; - struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2]; - struct scatterlist *sgin = &sgin_arr[0]; - struct strp_msg *rxm = strp_msg(skb); - int ret, nsg = ARRAY_SIZE(sgin_arr); - struct sk_buff *unused; - - ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, - iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - tls_ctx->rx.iv_size); - if (ret < 0) - return ret; - - memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - if (!sgout) { - nsg = skb_cow_data(skb, 0, &unused) + 1; - sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation); - sgout = sgin; - } - - sg_init_table(sgin, nsg); - sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE); - - nsg = skb_to_sgvec(skb, &sgin[1], - rxm->offset + tls_ctx->rx.prepend_size, - rxm->full_len - tls_ctx->rx.prepend_size); - if (nsg < 0) { - ret = nsg; - goto out; - } - - tls_make_aad(ctx->rx_aad_ciphertext, - rxm->full_len - tls_ctx->rx.overhead_size, - tls_ctx->rx.rec_seq, - tls_ctx->rx.rec_seq_size, - ctx->control); - - ret = tls_do_decryption(sk, sgin, sgout, iv, - rxm->full_len - tls_ctx->rx.overhead_size, - skb, sk->sk_allocation); - -out: - if (sgin != &sgin_arr[0]) - kfree(sgin); + bool zc = true; + int chunk; - return ret; + return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc); } static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, @@ -816,43 +888,17 @@ int tls_sw_recvmsg(struct sock *sk, } if (!ctx->decrypted) { - int page_count; - int to_copy; - - page_count = iov_iter_npages(&msg->msg_iter, - MAX_SKB_FRAGS); - to_copy = rxm->full_len - tls_ctx->rx.overhead_size; - if (!is_kvec && to_copy <= len && page_count < MAX_SKB_FRAGS && - likely(!(flags & MSG_PEEK))) { - struct scatterlist sgin[MAX_SKB_FRAGS + 1]; - int pages = 0; + int to_copy = rxm->full_len - tls_ctx->rx.overhead_size; + if (!is_kvec && to_copy <= len && + likely(!(flags & MSG_PEEK))) zc = true; - sg_init_table(sgin, MAX_SKB_FRAGS + 1); - sg_set_buf(&sgin[0], ctx->rx_aad_plaintext, - TLS_AAD_SPACE_SIZE); - - err = zerocopy_from_iter(sk, &msg->msg_iter, - to_copy, &pages, - &chunk, &sgin[1], - MAX_SKB_FRAGS, false); - if (err < 0) - goto fallback_to_reg_recv; - - err = decrypt_skb_update(sk, skb, sgin, &zc); - for (; pages > 0; pages--) - put_page(sg_page(&sgin[pages])); - if (err < 0) { - tls_err_abort(sk, EBADMSG); - goto recv_end; - } - } else { -fallback_to_reg_recv: - err = decrypt_skb_update(sk, skb, NULL, &zc); - if (err < 0) { - tls_err_abort(sk, EBADMSG); - goto recv_end; - } + + err = decrypt_skb_update(sk, skb, &msg->msg_iter, + &chunk, &zc); + if (err < 0) { + tls_err_abort(sk, EBADMSG); + goto recv_end; } ctx->decrypted = true; } @@ -903,7 +949,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, int err = 0; long timeo; int chunk; - bool zc; + bool zc = false; lock_sock(sk); @@ -920,7 +966,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } if (!ctx->decrypted) { - err = decrypt_skb_update(sk, skb, NULL, &zc); + err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc); if (err < 0) { tls_err_abort(sk, EBADMSG); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index c1076c19b858..ab27a2872935 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -451,14 +451,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode) return transport->shutdown(vsock_sk(sk), mode); } -void vsock_pending_work(struct work_struct *work) +static void vsock_pending_work(struct work_struct *work) { struct sock *sk; struct sock *listener; struct vsock_sock *vsk; bool cleanup; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, pending_work.work); sk = sk_vsock(vsk); listener = vsk->listener; cleanup = true; @@ -498,7 +498,6 @@ out: sock_put(sk); sock_put(listener); } -EXPORT_SYMBOL_GPL(vsock_pending_work); /**** SOCKET OPERATIONS ****/ @@ -597,6 +596,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) return retval; } +static void vsock_connect_timeout(struct work_struct *work); + struct sock *__vsock_create(struct net *net, struct socket *sock, struct sock *parent, @@ -638,6 +639,8 @@ struct sock *__vsock_create(struct net *net, vsk->sent_request = false; vsk->ignore_connecting_rst = false; vsk->peer_shutdown = 0; + INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); + INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); psk = parent ? vsock_sk(parent) : NULL; if (parent) { @@ -1117,7 +1120,7 @@ static void vsock_connect_timeout(struct work_struct *work) struct vsock_sock *vsk; int cancel = 0; - vsk = container_of(work, struct vsock_sock, dwork.work); + vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); lock_sock(sk); @@ -1221,9 +1224,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * timeout fires. */ sock_hold(sk); - INIT_DELAYED_WORK(&vsk->dwork, - vsock_connect_timeout); - schedule_delayed_work(&vsk->dwork, timeout); + schedule_delayed_work(&vsk->connect_work, timeout); /* Skip ahead to preserve error code set above. */ goto out_wait; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index a7a73ffe675b..cb332adb84cd 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1094,8 +1094,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vpending->listener = sk; sock_hold(sk); sock_hold(pending); - INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work); - schedule_delayed_work(&vpending->dwork, HZ); + schedule_delayed_work(&vpending->pending_work, HZ); out: return err; |