diff options
Diffstat (limited to 'net')
48 files changed, 391 insertions, 185 deletions
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 3315b9a598af..4026f198a734 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -32,10 +32,21 @@ #include "bat_v_elp.h" #include "bat_v_ogm.h" +#include "hard-interface.h" #include "hash.h" #include "originator.h" #include "packet.h" +static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) +{ + /* B.A.T.M.A.N. V does not use any queuing mechanism, therefore it can + * set the interface as ACTIVE right away, without any risk of race + * condition + */ + if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) + hard_iface->if_status = BATADV_IF_ACTIVE; +} + static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) { int ret; @@ -274,6 +285,7 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", + .bat_iface_activate = batadv_v_iface_activate, .bat_iface_enable = batadv_v_iface_enable, .bat_iface_disable = batadv_v_iface_disable, .bat_iface_update_mac = batadv_v_iface_update_mac, diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index e96d7c745b4a..3e6b2624f980 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -568,6 +568,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT + * @vid: VLAN identifier * * An originator O is selected if and only if its DHT_ID value is one of three * closest values (from the LEFT, with wrap around if needed) then the hash @@ -576,7 +577,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * -batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, + unsigned short vid) { int select; batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; @@ -592,7 +594,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) return NULL; dat.ip = ip_dst; - dat.vid = 0; + dat.vid = vid; ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat, BATADV_DAT_ADDR_MAX); @@ -612,6 +614,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @bat_priv: the bat priv with all the soft interface information * @skb: payload to send * @ip: the DHT key + * @vid: VLAN identifier * @packet_subtype: unicast4addr packet subtype to use * * This function copies the skb with pskb_copy() and is sent as unicast packet @@ -622,7 +625,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, - int packet_subtype) + unsigned short vid, int packet_subtype) { int i; bool ret = false; @@ -631,7 +634,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *tmp_skb; struct batadv_dat_candidate *cand; - cand = batadv_dat_select_candidates(bat_priv, ip); + cand = batadv_dat_select_candidates(bat_priv, ip, vid); if (!cand) goto out; @@ -1022,7 +1025,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, ret = true; } else { /* Send the request to the DHT */ - ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_GET); } out: @@ -1150,8 +1153,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, /* Send the ARP reply to the candidates for both the IP addresses that * the node obtained from the ARP reply */ - batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); - batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); } /** diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b22b2775a0a5..0a7deaf2670a 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -407,6 +407,9 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) batadv_update_min_mtu(hard_iface->soft_iface); + if (bat_priv->bat_algo_ops->bat_iface_activate) + bat_priv->bat_algo_ops->bat_iface_activate(hard_iface); + out: if (primary_if) batadv_hardif_put(primary_if); @@ -572,8 +575,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if = NULL; - if (hard_iface->if_status == BATADV_IF_ACTIVE) - batadv_hardif_deactivate_interface(hard_iface); + batadv_hardif_deactivate_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index e4cbb0753e37..c355a824713c 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -250,7 +250,6 @@ static void batadv_neigh_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; - struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_neigh_ifinfo *neigh_ifinfo; struct batadv_algo_ops *bao; @@ -262,13 +261,7 @@ static void batadv_neigh_node_release(struct kref *ref) batadv_neigh_ifinfo_put(neigh_ifinfo); } - hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, - neigh_node->addr); - if (hardif_neigh) { - /* batadv_hardif_neigh_get() increases refcount too */ - batadv_hardif_neigh_put(hardif_neigh); - batadv_hardif_neigh_put(hardif_neigh); - } + batadv_hardif_neigh_put(neigh_node->hardif_neigh); if (bao->bat_neigh_free) bao->bat_neigh_free(neigh_node); @@ -663,6 +656,11 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, ether_addr_copy(neigh_node->addr, neigh_addr); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; + neigh_node->last_seen = jiffies; + + /* increment unique neighbor refcount */ + kref_get(&hardif_neigh->refcount); + neigh_node->hardif_neigh = hardif_neigh; /* extra reference for return */ kref_init(&neigh_node->refcount); @@ -672,9 +670,6 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); spin_unlock_bh(&orig_node->neigh_list_lock); - /* increment unique neighbor refcount */ - kref_get(&hardif_neigh->refcount); - batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, "Creating new neighbor %pM for orig_node %pM on interface %s\n", neigh_addr, orig_node->orig, hard_iface->net_dev->name); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 4dd646a52f1a..b781bf753250 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -105,6 +105,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_put(orig_ifinfo); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 3ce06e0a91b1..76417850d3fc 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -675,6 +675,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->bcast_queue_left); + batadv_forw_packet_free(forw_packet); } } @@ -702,6 +705,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + batadv_forw_packet_free(forw_packet); } } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0710379491bf..8a136b6a1ff0 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -408,11 +408,17 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) + goto dropped; + vhdr = (struct vlan_ethhdr *)skb->data; if (vhdr->h_vlan_encapsulated_proto != ethertype) @@ -424,8 +430,6 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) - goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); /* should not be necessary anymore as we use skb_pull_rcsum() diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 0b43e86328a5..9b4551a86535 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -215,6 +215,8 @@ static void batadv_tt_local_entry_release(struct kref *ref) tt_local_entry = container_of(ref, struct batadv_tt_local_entry, common.refcount); + batadv_softif_vlan_put(tt_local_entry->vlan); + kfree_rcu(tt_local_entry, common.rcu); } @@ -673,6 +675,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, kref_get(&tt_local->common.refcount); tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; + tt_local->vlan = vlan; /* the batman interface mac and multicast addresses should never be * purged @@ -991,7 +994,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; - struct batadv_softif_vlan *vlan; struct hlist_head *head; unsigned short vid; u32 i; @@ -1027,14 +1029,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) last_seen_msecs = last_seen_msecs % 1000; no_purge = tt_common_entry->flags & np_flag; - - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan) { - seq_printf(seq, "Cannot retrieve VLAN %d\n", - BATADV_PRINT_VID(vid)); - continue; - } - seq_printf(seq, " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, @@ -1052,9 +1046,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), no_purge ? 0 : last_seen_secs, no_purge ? 0 : last_seen_msecs, - vlan->tt.crc); - - batadv_softif_vlan_put(vlan); + tt_local->vlan->tt.crc); } rcu_read_unlock(); } @@ -1099,7 +1091,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, { struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - struct batadv_softif_vlan *vlan; void *tt_entry_exists; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); @@ -1139,14 +1130,6 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, /* extra call to free the local tt entry */ batadv_tt_local_entry_put(tt_local_entry); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, vid); - if (!vlan) - goto out; - - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - out: if (tt_local_entry) batadv_tt_local_entry_put(tt_local_entry); @@ -1219,7 +1202,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; - struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; u32 i; @@ -1241,14 +1223,6 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) struct batadv_tt_local_entry, common); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, - tt_common_entry->vid); - if (vlan) { - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - } - batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); @@ -3309,7 +3283,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; - struct batadv_softif_vlan *vlan; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ @@ -3339,13 +3312,6 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) struct batadv_tt_local_entry, common); - /* decrease the reference held for this vlan */ - vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); - if (vlan) { - batadv_softif_vlan_put(vlan); - batadv_softif_vlan_put(vlan); - } - batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9abfb3e73c34..1e47fbe8bb7b 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -433,6 +433,7 @@ struct batadv_hardif_neigh_node { * @ifinfo_lock: lock protecting private ifinfo members and list * @if_incoming: pointer to incoming hard-interface * @last_seen: when last packet via this neighbor was received + * @hardif_neigh: hardif_neigh of this neighbor * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner */ @@ -444,6 +445,7 @@ struct batadv_neigh_node { spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ struct batadv_hard_iface *if_incoming; unsigned long last_seen; + struct batadv_hardif_neigh_node *hardif_neigh; struct kref refcount; struct rcu_head rcu; }; @@ -1073,10 +1075,12 @@ struct batadv_tt_common_entry { * struct batadv_tt_local_entry - translation table local entry data * @common: general translation table data * @last_seen: timestamp used for purging stale tt local entries + * @vlan: soft-interface vlan of the entry */ struct batadv_tt_local_entry { struct batadv_tt_common_entry common; unsigned long last_seen; + struct batadv_softif_vlan *vlan; }; /** @@ -1250,6 +1254,8 @@ struct batadv_forw_packet { * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list * @name: name of the algorithm + * @bat_iface_activate: start routing mechanisms when hard-interface is brought + * up * @bat_iface_enable: init routing info when hard-interface is enabled * @bat_iface_disable: de-init routing info when hard-interface is disabled * @bat_iface_update_mac: (re-)init mac addresses of the protocol information @@ -1277,6 +1283,7 @@ struct batadv_forw_packet { struct batadv_algo_ops { struct hlist_node list; char *name; + void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface); int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 263b4de4de57..60a3dbfca8a1 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,18 +21,19 @@ #include <asm/uaccess.h> #include "br_private.h" -/* called with RTNL */ static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } + rcu_read_unlock(); return i; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 191ea66e4d92..6852f3c7009c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1279,6 +1279,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); __be32 group; int err = 0; @@ -1289,14 +1290,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == sizeof(*ih)) { + if (skb->len == offset + sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= sizeof(*ih3)) { + } else if (skb->len >= offset + sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1357,6 +1358,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; + unsigned int offset = skb_transport_offset(skb); const struct in6_addr *group = NULL; bool is_general_query; int err = 0; @@ -1366,8 +1368,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == sizeof(*mld)) { - if (!pskb_may_pull(skb, sizeof(*mld))) { + if (skb->len == offset + sizeof(*mld)) { + if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; } @@ -1376,7 +1378,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (max_delay) group = &mld->mld_mca; } else { - if (!pskb_may_pull(skb, sizeof(*mld2q))) { + if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) { err = -EINVAL; goto out; } diff --git a/net/core/dev.c b/net/core/dev.c index 77a71cd68535..5c925ac50b95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2802,7 +2802,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_CSUM_MASK; + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } diff --git a/net/core/flow.c b/net/core/flow.c index 1033725be40b..3937b1b68d5b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -92,8 +92,11 @@ static void flow_cache_gc_task(struct work_struct *work) list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); spin_unlock_bh(&xfrm->flow_cache_gc_lock); - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) + list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { flow_entry_kill(fce, xfrm); + atomic_dec(&xfrm->flow_cache_gc_count); + WARN_ON(atomic_read(&xfrm->flow_cache_gc_count) < 0); + } } static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, @@ -101,6 +104,7 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, struct netns_xfrm *xfrm) { if (deleted) { + atomic_add(deleted, &xfrm->flow_cache_gc_count); fcp->hash_count -= deleted; spin_lock_bh(&xfrm->flow_cache_gc_lock); list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); @@ -232,6 +236,13 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_count > fc->high_watermark) flow_cache_shrink(fc, fcp); + if (fcp->hash_count > 2 * fc->high_watermark || + atomic_read(&net->xfrm.flow_cache_gc_count) > fc->high_watermark) { + atomic_inc(&net->xfrm.flow_cache_genid); + flo = ERR_PTR(-ENOBUFS); + goto ret_object; + } + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->net = net; @@ -446,6 +457,7 @@ int flow_cache_init(struct net *net) INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); mutex_init(&net->xfrm.flow_flush_sem); + atomic_set(&net->xfrm.flow_cache_gc_count, 0); fc->hash_shift = 10; fc->low_watermark = 2 * flow_cache_hash_size(fc); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a75f7e94b445..65763c29f845 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1180,14 +1180,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; + if (nla_put(skb, IFLA_MAP, sizeof(map), &map)) return -EMSGSIZE; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d97268e8ff10..2b68418c7198 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) val = 65535 - 40; if (type == RTAX_MTU && val > 65535 - 15) val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) return -EINVAL; fi->fib_metrics[type - 1] = val; diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index a39068b4a4d9..a6962ccad98a 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -228,8 +228,6 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, int err = -ENOSYS; const struct net_offload **offloads; - udp_tunnel_gro_complete(skb, nhoff); - rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -238,6 +236,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff, err = ops->callbacks.gro_complete(skb, nhoff); + skb_set_inner_mac_header(skb, nhoff); + out_unlock: rcu_read_unlock(); @@ -414,6 +414,8 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff, err = ops->callbacks.gro_complete(skb, nhoff + guehlen); + skb_set_inner_mac_header(skb, nhoff + guehlen); + out_unlock: rcu_read_unlock(); return err; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bc68eced0105..0d9e9d7bb029 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -470,6 +470,7 @@ static int inet_reuseport_add_sock(struct sock *sk, const struct sock *sk2, bool match_wildcard)) { + struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; struct sock *sk2; struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); @@ -479,6 +480,7 @@ static int inet_reuseport_add_sock(struct sock *sk, sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && + inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && saddr_same(sk, sk2, false)) return reuseport_add_sock(sk, sk2); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index af5d1f38217f..4cc84212cce1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -179,6 +179,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags) return flags; } +/* Fills in tpi and returns header length to be pulled. */ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -238,7 +239,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto, false); + return hdr_len; } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -341,7 +342,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (parse_gre_header(skb, &tpi, &csum_err) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -397,7 +398,10 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) iph->saddr, iph->daddr, tpi->key); if (tunnel) { - skb_pop_mac_header(skb); + if (tunnel->dev->type != ARPHRD_NONE) + skb_pop_mac_header(skb); + else + skb_reset_mac_header(skb); if (tunnel->collect_md) { __be16 flags; __be64 tun_id; @@ -419,6 +423,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -428,7 +433,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + hdr_len = parse_gre_header(skb, &tpi, &csum_err); + if (hdr_len < 0) + goto drop; + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD) @@ -523,7 +531,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb, return ip_route_output_key(net, fl); } -static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) +static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, + __be16 proto) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -575,7 +584,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) } flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); - build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), + build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -616,7 +625,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, const struct iphdr *tnl_params; if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; } @@ -660,7 +669,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; } @@ -893,7 +902,7 @@ static int ipgre_tunnel_init(struct net_device *dev) netif_keep_dst(dev); dev->addr_len = 4; - if (iph->daddr) { + if (iph->daddr && !tunnel->collect_md) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { if (!iph->saddr) @@ -902,8 +911,9 @@ static int ipgre_tunnel_init(struct net_device *dev) dev->header_ops = &ipgre_header_ops; } #endif - } else + } else if (!tunnel->collect_md) { dev->header_ops = &ipgre_header_ops; + } return ip_tunnel_init(dev); } @@ -946,6 +956,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) if (flags & (GRE_VERSION|GRE_ROUTING)) return -EINVAL; + if (data[IFLA_GRE_COLLECT_METADATA] && + data[IFLA_GRE_ENCAP_TYPE] && + nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) + return -EINVAL; + return 0; } @@ -1019,6 +1034,8 @@ static void ipgre_netlink_parms(struct net_device *dev, struct ip_tunnel *t = netdev_priv(dev); t->collect_md = true; + if (dev->type == ARPHRD_IPGRE) + dev->type = ARPHRD_NONE; } } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6aad0192443d..a69ed94bda1b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -326,12 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (!IS_ERR(rt)) { tdev = rt->dst.dev; - dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, - fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) dev->flags |= IFF_POINTOPOINT; + + dst_cache_reset(&tunnel->dst_cache); } if (!tdev && tunnel->parms.link) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5cf10b777b7e..a917903d5e97 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -156,6 +156,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ int err; + int mtu; if (!dst) { dev->stats.tx_carrier_errors++; @@ -192,6 +193,23 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_count = 0; } + mtu = dst_mtu(dst); + if (skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->protocol == htons(ETH_P_IP)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + } else { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + } + + dst_release(dst); + goto tx_error; + } + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 441ae9da3a23..79a03b87a771 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2640,8 +2640,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { - struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, - GFP_ATOMIC); + struct sk_buff *nskb; + + skb_mstamp_get(&skb->skb_mstamp); + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } else { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0ed2dafb7cc4..e330c0e56b11 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -399,6 +399,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) uh->len = newlen; + /* Set encapsulation before calling into inner gro_complete() functions + * to make them set up the inner offsets. + */ + skb->encapsulation = 1; + rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); @@ -421,9 +426,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; - skb->encapsulation = 1; - skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); - return err; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0a37ddc7af51..0013cacf7164 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -445,6 +445,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; + else + iif = l3mdev_master_ifindex(skb->dev); /* * Must not send error if the source does not uniquely @@ -499,9 +501,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - if (!fl6.flowi6_oif) - fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev); - dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 2ae3c4fd8aab..41f18de5dcc2 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -120,8 +120,7 @@ nla_put_failure: static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { - /* No encapsulation overhead */ - return 0; + return nla_total_size(sizeof(u64)); /* ILA_ATTR_LOCATOR */ } static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d916d6ab9ad2..6f32944e0223 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1750,6 +1750,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, } else { val = nla_get_u32(nla); } + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) goto err; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 711d209f9124..f443c6b0ce16 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -810,8 +810,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + fl6.flowi6_oif = oif; + } + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index afca2eb4dfa7..6edfa9980314 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net, memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, sizeof(udp_conf.peer_ip6)); udp_conf.use_udp6_tx_checksums = - cfg->udp6_zero_tx_checksums; + ! cfg->udp6_zero_tx_checksums; udp_conf.use_udp6_rx_checksums = - cfg->udp6_zero_rx_checksums; + ! cfg->udp6_zero_rx_checksums; } else #endif { diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b3c52e3f689a..8ae3ed97d95c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) if (llc->cmsg_flags & LLC_CMSG_PKTINFO) { struct llc_pktinfo info; + memset(&info, 0, sizeof(info)); info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex; llc_pdu_decode_dsap(skb, &info.lpi_sap); llc_pdu_decode_da(skb, info.lpi_mac); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 453b4e741780..e1cb22c16530 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1761,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } @@ -1847,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index afde5f5e728a..e27fd17c6743 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); -static __read_mostly spinlock_t nf_conntrack_locks_all_lock; +static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) @@ -1778,6 +1778,7 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { + static atomic64_t unique_id; int ret = -ENOMEM; int cpu; @@ -1800,7 +1801,8 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu", + (u64)atomic64_inc_return(&unique_id)); if (!net->ct.slabname) goto err_slabname; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 4c2b4c0c4d5f..dbd0803b1827 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -96,6 +96,8 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, return -EINVAL; if (flags & NFACCT_F_OVERQUOTA) return -EINVAL; + if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA]) + return -EINVAL; size += sizeof(u64); } diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 29d2c31f406c..daf45da448fa 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -236,6 +236,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); + cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b5fea1101faa..10c84d882881 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -776,6 +776,19 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, return -EINVAL; } + /* Userspace may decide to perform a ct lookup without a helper + * specified followed by a (recirculate and) commit with one. + * Therefore, for unconfirmed connections which we will commit, + * we need to attach the helper here. + */ + if (!nf_ct_is_confirmed(ct) && info->commit && + info->helper && !nfct_help(ct)) { + int err = __nf_ct_try_assign_helper(ct, info->ct, + GFP_ATOMIC); + if (err) + return err; + } + /* Call the helper only if: * - nf_conntrack_in() was executed above ("!cached") for a * confirmed connection, or diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 61ed2a8764ba..86187dad1440 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -127,7 +127,7 @@ void rds_tcp_restore_callbacks(struct socket *sock, /* * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_CONNECTED bit protects those paths from being + * it is set. The RDS_CONN_UP bit protects those paths from being * called while it isn't set. */ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) @@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) if (!tc) return -ENOMEM; + mutex_init(&tc->t_conn_lock); tc->t_sock = NULL; tc->t_tinc = NULL; tc->t_tinc_hdr_rem = sizeof(struct rds_header); diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 64f873c0c6b6..41c228300525 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,10 @@ struct rds_tcp_connection { struct list_head t_tcp_node; struct rds_connection *conn; + /* t_conn_lock synchronizes the connection establishment between + * rds_tcp_accept_one and rds_tcp_conn_connect + */ + struct mutex t_conn_lock; struct socket *t_sock; void *t_orig_write_space; void *t_orig_data_ready; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 5cb16875c460..49a3fcfed360 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn) struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; + struct rds_tcp_connection *tc = conn->c_transport_data; + + mutex_lock(&tc->t_conn_lock); + if (rds_conn_up(conn)) { + mutex_unlock(&tc->t_conn_lock); + return 0; + } ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) @@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) } out: + mutex_unlock(&tc->t_conn_lock); if (sock) sock_release(sock); return ret; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 0936a4a32b47..be263cdf268b 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock) struct rds_connection *conn; int ret; struct inet_sock *inet; - struct rds_tcp_connection *rs_tcp; + struct rds_tcp_connection *rs_tcp = NULL; + int conn_state; + struct sock *nsk; ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, @@ -115,28 +117,44 @@ int rds_tcp_accept_one(struct socket *sock) * rds_tcp_state_change() will do that cleanup */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; - if (rs_tcp->t_sock && - ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { - struct sock *nsk = new_sock->sk; - - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; - ret = 0; - goto out; - } else if (rs_tcp->t_sock) { - rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); - conn->c_outgoing = 0; - } - rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); + mutex_lock(&rs_tcp->t_conn_lock); + conn_state = rds_conn_state(conn); + if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP) + goto rst_nsk; + if (rs_tcp->t_sock) { + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * c_transport_data. + */ + wait_event(conn->c_waitq, + !test_bit(RDS_IN_XMIT, &conn->c_flags)); + if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { + goto rst_nsk; + } else if (rs_tcp->t_sock) { + rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + conn->c_outgoing = 0; + } + } rds_tcp_set_callbacks(new_sock, conn); - rds_connect_complete(conn); + rds_connect_complete(conn); /* marks RDS_CONN_UP */ + new_sock = NULL; + ret = 0; + goto out; +rst_nsk: + /* reset the newly returned accept sock and bail */ + nsk = new_sock->sk; + rds_tcp_stats_inc(s_tcp_listen_closed_stale); + nsk->sk_user_data = NULL; + nsk->sk_prot->disconnect(nsk, 0); + tcp_done(nsk); new_sock = NULL; ret = 0; - out: + if (rs_tcp) + mutex_unlock(&rs_tcp->t_conn_lock); if (new_sock) sock_release(new_sock); return ret; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c589a9ba506a..343d011aa818 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -423,7 +423,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, u16 ife_type = 0; u8 *daddr = NULL; u8 *saddr = NULL; - int ret = 0; + int ret = 0, exists = 0; int err; err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy); @@ -435,25 +435,29 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + if (parm->flags & IFE_ENCODE) { /* Until we get issued the ethertype, we cant have * a default.. **/ if (!tb[TCA_IFE_TYPE]) { + if (exists) + tcf_hash_release(a, bind); pr_info("You MUST pass etherype for encoding\n"); return -EINVAL; } } - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*ife), bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - if (bind) /* dont override defaults */ - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; @@ -495,6 +499,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, NULL); if (err) { metadata_parse_err: + if (exists) + tcf_hash_release(a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 350e134cffb3..8b5270008a6e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -96,7 +96,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, struct tcf_ipt *ipt; struct xt_entry_target *td, *t; char *tname; - int ret = 0, err; + int ret = 0, err, exists = 0; u32 hook = 0; u32 index = 0; @@ -107,18 +107,23 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (err < 0) return err; - if (tb[TCA_IPT_HOOK] == NULL) - return -EINVAL; - if (tb[TCA_IPT_TARG] == NULL) + if (tb[TCA_IPT_INDEX] != NULL) + index = nla_get_u32(tb[TCA_IPT_INDEX]); + + exists = tcf_hash_check(tn, index, a, bind); + if (exists && bind) + return 0; + + if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) return -EINVAL; - if (tb[TCA_IPT_INDEX] != NULL) - index = nla_get_u32(tb[TCA_IPT_INDEX]); - if (!tcf_hash_check(tn, index, a, bind)) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index e8a760cf7775..8f3948dd38b8 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -61,7 +61,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0; + int ret, ok_push = 0, exists = 0; if (nla == NULL) return -EINVAL; @@ -71,17 +71,27 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (tb[TCA_MIRRED_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: break; default: + if (exists) + tcf_hash_release(a, bind); return -EINVAL; } if (parm->ifindex) { dev = __dev_get_by_index(net, parm->ifindex); - if (dev == NULL) + if (dev == NULL) { + if (exists) + tcf_hash_release(a, bind); return -ENODEV; + } switch (dev->type) { case ARPHRD_TUNNEL: case ARPHRD_TUNNEL6: @@ -99,7 +109,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev = NULL; } - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { if (dev == NULL) return -EINVAL; ret = tcf_hash_create(tn, parm->index, est, a, @@ -108,9 +118,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return ret; ret = ACT_P_CREATED; } else { - if (bind) - return 0; - tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 75b2be13fbcc..3a33fb648a6d 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -87,7 +87,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct tc_defact *parm; struct tcf_defact *d; char *defdata; - int ret = 0, err; + int ret = 0, err, exists = 0; if (nla == NULL) return -EINVAL; @@ -99,13 +99,21 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (tb[TCA_DEF_PARMS] == NULL) return -EINVAL; - if (tb[TCA_DEF_DATA] == NULL) - return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (tb[TCA_DEF_DATA] == NULL) { + if (exists) + tcf_hash_release(a, bind); + return -EINVAL; + } + defdata = nla_data(tb[TCA_DEF_DATA]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*d), bind, false); if (ret) @@ -122,8 +130,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } else { d = to_defact(a); - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cfcdbdc00c9b..69da5a8f0034 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -69,7 +69,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL; u16 *queue_mapping = NULL; - int ret = 0, err; + int ret = 0, err, exists = 0; if (nla == NULL) return -EINVAL; @@ -96,12 +96,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, mark = nla_data(tb[TCA_SKBEDIT_MARK]); } - if (!flags) - return -EINVAL; - parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!flags) { + tcf_hash_release(a, bind); + return -EINVAL; + } + + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*d), bind, false); if (ret) @@ -111,8 +117,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(a); - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index bab8ae0cefc0..c45f926dafb9 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -77,7 +77,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, int action; __be16 push_vid = 0; __be16 push_proto = 0; - int ret = 0; + int ret = 0, exists = 0; int err; if (!nla) @@ -90,15 +90,25 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + switch (parm->v_action) { case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: - if (!tb[TCA_VLAN_PUSH_VLAN_ID]) + if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); - if (push_vid >= VLAN_VID_MASK) + if (push_vid >= VLAN_VID_MASK) { + if (exists) + tcf_hash_release(a, bind); return -ERANGE; + } if (tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]) { push_proto = nla_get_be16(tb[TCA_VLAN_PUSH_VLAN_PROTOCOL]); @@ -114,11 +124,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } break; default: + if (exists) + tcf_hash_release(a, bind); return -EINVAL; } action = parm->v_action; - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*v), bind, false); if (ret) @@ -126,8 +138,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { - if (bind) - return 0; tcf_hash_release(a, bind); if (!ovr) return -EEXIST; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9640bb39a5d2..4befe97a9034 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) sch->q.qlen++; } +/* netem can't properly corrupt a megapacket (like we get from GSO), so instead + * when we statistically choose to corrupt one, we instead segment it, returning + * the first packet to be corrupted, and re-enqueue the remaining frames + */ +static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct sk_buff *segs; + netdev_features_t features = netif_skb_features(skb); + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) { + qdisc_reshape_fail(skb, sch); + return NULL; + } + consume_skb(skb); + return segs; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; struct sk_buff *skb2; + struct sk_buff *segs = NULL; + unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); + int nb = 0; int count = 1; + int rc = NET_XMIT_SUCCESS; /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) @@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (skb_is_gso(skb)) { + segs = netem_segment(skb, sch); + if (!segs) + return NET_XMIT_DROP; + } else { + segs = skb; + } + + skb = segs; + segs = segs->next; + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb))) - return qdisc_drop(skb, sch); + skb_checksum_help(skb))) { + rc = qdisc_drop(skb, sch); + goto finish_segs; + } skb->data[prandom_u32() % skb_headlen(skb)] ^= 1<<(prandom_u32() % 8); @@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.requeues++; } +finish_segs: + if (segs) { + while (segs) { + skb2 = segs->next; + segs->next = NULL; + qdisc_skb_cb(segs)->pkt_len = segs->len; + last_len = segs->len; + rc = qdisc_enqueue(segs, sch); + if (rc != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(rc)) + qdisc_qstats_drop(sch); + } else { + nb++; + len += last_len; + } + segs = skb2; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + } return NET_XMIT_SUCCESS; } diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178fd3850..9aaa1bc566ae 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1444,6 +1444,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int bearer_id = b->identity; struct tipc_link_entry *le; u16 bc_ack = msg_bcast_ack(hdr); + u32 self = tipc_own_addr(net); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1460,6 +1461,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) return tipc_node_bc_rcv(net, skb, bearer_id); } + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) + goto discard; + /* Locate neighboring node that sent packet */ n = tipc_node_find(net, msg_prevnode(hdr)); if (unlikely(!n)) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3dce53ebea92..b5f1221f48d4 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1808,27 +1808,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, else if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; - if (copied > 0) { - /* We only do these additional bookkeeping/notification steps - * if we actually copied something out of the queue pair - * instead of just peeking ahead. - */ - - if (!(flags & MSG_PEEK)) { - /* If the other side has shutdown for sending and there - * is nothing more to read, then modify the socket - * state. - */ - if (vsk->peer_shutdown & SEND_SHUTDOWN) { - if (vsock_stream_has_data(vsk) <= 0) { - sk->sk_state = SS_UNCONNECTED; - sock_set_flag(sk, SOCK_DONE); - sk->sk_state_change(sk); - } - } - } + if (copied > 0) err = copied; - } out: release_sock(sk); diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 7ecd04c21360..997ff7b2509b 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memset(&theirs, 0, sizeof(theirs)); memcpy(new, ours, sizeof(*new)); + memset(dte, 0, sizeof(*dte)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); if (len < 0) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ff4a91fcab9f..637387bbaaea 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -99,6 +99,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); + /* Inner headers are invalid now. */ + skb->encapsulation = 0; + err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out; |