diff options
Diffstat (limited to 'net')
133 files changed, 2954 insertions, 1325 deletions
diff --git a/net/Kconfig b/net/Kconfig index 2f2842d2d3ed..f19c0c3b9589 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -413,6 +413,10 @@ config DST_CACHE bool default n +config GRO_CELLS + bool + default n + config NET_DEVLINK tristate "Network physical/parent device Netlink interface" help diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index d208ee9ab60a..ea71513fca21 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -79,7 +79,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_multicast_flood(mdst, skb, false, true); else br_flood(br, skb, BR_PKT_MULTICAST, false, true); - } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) { + } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) { br_forward(dst->dst, skb, false, true); } else { br_flood(br, skb, BR_PKT_UNICAST, false, true); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 5028691fa68a..4ac11574117c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -28,9 +28,6 @@ #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; -static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr, - __u16 vid); static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, @@ -86,6 +83,47 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } +static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_entry *f; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + hlist_for_each_entry_rcu(f, head, hlist) + if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid) + break; + + return f; +} + +/* requires bridge hash_lock */ +static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + struct net_bridge_fdb_entry *fdb; + + WARN_ON_ONCE(!br_hash_lock_held(br)); + + rcu_read_lock(); + fdb = fdb_find_rcu(head, addr, vid); + rcu_read_unlock(); + + return fdb; +} + +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + + return fdb_find_rcu(head, addr, vid); +} + /* When a static FDB entry is added, the mac address from the entry is * added to the bridge private HW address list and all required ports * are then updated with the new information. @@ -198,11 +236,10 @@ void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *f; spin_lock_bh(&br->hash_lock); - f = fdb_find(head, addr, vid); + f = br_fdb_find(br, addr, vid); if (f && f->is_local && !f->added_by_user && f->dst == p) fdb_delete_local(br, p, f); spin_unlock_bh(&br->hash_lock); @@ -266,7 +303,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) spin_lock_bh(&br->hash_lock); /* If old entry was unassociated with any port, then delete it. */ - f = __br_fdb_get(br, br->dev->dev_addr, 0); + f = br_fdb_find(br, br->dev->dev_addr, 0); if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); @@ -281,7 +318,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - f = __br_fdb_get(br, br->dev->dev_addr, v->vid); + f = br_fdb_find(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, v->vid); @@ -380,24 +417,6 @@ void br_fdb_delete_by_port(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -/* No locking or refcounting, assumes caller has rcu_read_lock */ -struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry_rcu(fdb, - &br->hash[br_mac_hash(addr, vid)], hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) { - return fdb; - } - } - - return NULL; -} - #if IS_ENABLED(CONFIG_ATM_LANE) /* Interface used by ATM LANE hook to test * if an addr is on some other bridge port */ @@ -412,7 +431,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) if (!port) ret = 0; else { - fdb = __br_fdb_get(port->br, addr, 0); + fdb = br_fdb_find_rcu(port->br, addr, 0); ret = fdb && fdb->dst && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; } @@ -474,34 +493,6 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, return num; } -static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry(fdb, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) - return fdb; - } - return NULL; -} - -static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry_rcu(fdb, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) - return fdb; - } - return NULL; -} - static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, const unsigned char *addr, @@ -535,7 +526,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, if (!is_valid_ether_addr(addr)) return -EINVAL; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. @@ -592,13 +583,15 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n", source->dev->name, addr, vid); } else { + unsigned long now = jiffies; + /* fastpath: update of existing entry */ if (unlikely(source != fdb->dst)) { fdb->dst = source; fdb_modified = true; } - if (jiffies != fdb->updated) - fdb->updated = jiffies; + if (now != fdb->updated) + fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; if (unlikely(fdb_modified)) @@ -606,7 +599,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find(head, addr, vid))) { + if (likely(!fdb_find_rcu(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid, 0, 0); if (fdb) { if (unlikely(added_by_user)) @@ -790,7 +783,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, return -EINVAL; } - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) return -ENOENT; @@ -937,55 +930,30 @@ out: return err; } -static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, - u16 vid) -{ - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; - struct net_bridge_fdb_entry *fdb; - - fdb = fdb_find(head, addr, vid); - if (!fdb) - return -ENOENT; - - fdb_delete(br, fdb); - return 0; -} - -static int __br_fdb_delete_by_addr(struct net_bridge *br, - const unsigned char *addr, u16 vid) -{ - int err; - - spin_lock_bh(&br->hash_lock); - err = fdb_delete_by_addr(br, addr, vid); - spin_unlock_bh(&br->hash_lock); - - return err; -} - -static int fdb_delete_by_addr_and_port(struct net_bridge_port *p, +static int fdb_delete_by_addr_and_port(struct net_bridge *br, + const struct net_bridge_port *p, const u8 *addr, u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, vlan); + fdb = br_fdb_find(br, addr, vlan); if (!fdb || fdb->dst != p) return -ENOENT; fdb_delete(br, fdb); + return 0; } -static int __br_fdb_delete(struct net_bridge_port *p, +static int __br_fdb_delete(struct net_bridge *br, + const struct net_bridge_port *p, const unsigned char *addr, u16 vid) { int err; - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr_and_port(p, addr, vid); - spin_unlock_bh(&p->br->hash_lock); + spin_lock_bh(&br->hash_lock); + err = fdb_delete_by_addr_and_port(br, p, addr, vid); + spin_unlock_bh(&br->hash_lock); return err; } @@ -998,7 +966,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; struct net_bridge_vlan *v; - struct net_bridge *br = NULL; + struct net_bridge *br; int err; if (dev->priv_flags & IFF_EBRIDGE) { @@ -1012,6 +980,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } vg = nbp_vlan_group(p); + br = p->br; } if (vid) { @@ -1021,30 +990,20 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, vid); - else - err = __br_fdb_delete(p, addr, vid); + err = __br_fdb_delete(br, p, addr, vid); } else { err = -ENOENT; - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, 0); - else - err &= __br_fdb_delete(p, addr, 0); - + err &= __br_fdb_delete(br, p, addr, 0); if (!vg || !vg->num_vlans) - goto out; + return err; list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, v->vid); - else - err &= __br_fdb_delete(p, addr, v->vid); + err &= __br_fdb_delete(br, p, addr, v->vid); } } -out: + return err; } @@ -1115,7 +1074,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (!fdb) { fdb = fdb_create(head, p, addr, vid, 0, 0); if (!fdb) { @@ -1143,15 +1102,13 @@ err_unlock: int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); - head = &br->hash[br_mac_hash(addr, vid)]; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb && fdb->added_by_external_learn) fdb_delete(br, fdb); else diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 220943f920d2..236f34244dbe 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -114,7 +114,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, return; } - f = __br_fdb_get(br, n->ha, vid); + f = br_fdb_find_rcu(br, n->ha, vid); if (f && ((p->flags & BR_PROXYARP) || (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, @@ -189,17 +189,19 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } break; case BR_PKT_UNICAST: - dst = __br_fdb_get(br, dest, vid); + dst = br_fdb_find_rcu(br, dest, vid); default: break; } if (dst) { + unsigned long now = jiffies; + if (dst->is_local) return br_pass_frame_up(skb); - if (jiffies != dst->used) - dst->used = jiffies; + if (now != dst->used) + dst->used = now; br_forward(dst->dst, skb, local_rcv, false); } else { if (!mcast_hit) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1de3438e36bf..b760f2620abf 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -27,6 +27,7 @@ #include <linux/inetdevice.h> #include <linux/mroute.h> #include <net/ip.h> +#include <net/switchdev.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/mld.h> @@ -46,6 +47,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, __u16 vid, const unsigned char *src); +static void __del_port_router(struct net_bridge_port *p); #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, @@ -849,16 +851,10 @@ static void br_multicast_router_expired(unsigned long data) spin_lock(&br->multicast_lock); if (port->multicast_router == MDB_RTR_TYPE_DISABLED || port->multicast_router == MDB_RTR_TYPE_PERM || - timer_pending(&port->multicast_router_timer) || - hlist_unhashed(&port->rlist)) + timer_pending(&port->multicast_router_timer)) goto out; - hlist_del_init_rcu(&port->rlist); - br_rtr_notify(br->dev, port, RTM_DELMDB); - /* Don't allow timer refresh if the router expired */ - if (port->multicast_router == MDB_RTR_TYPE_TEMP) - port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - + __del_port_router(port); out: spin_unlock(&br->multicast_lock); } @@ -1007,6 +1003,18 @@ static void br_ip6_multicast_port_query_expired(unsigned long data) } #endif +static void br_mc_disabled_update(struct net_device *dev, bool value) +{ + struct switchdev_attr attr = { + .orig_dev = dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + .flags = SWITCHDEV_F_DEFER, + .u.mc_disabled = value, + }; + + switchdev_port_attr_set(dev, &attr); +} + int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; @@ -1019,6 +1027,8 @@ int br_multicast_add_port(struct net_bridge_port *port) setup_timer(&port->ip6_own_query.timer, br_ip6_multicast_port_query_expired, (unsigned long)port); #endif + br_mc_disabled_update(port->dev, port->br->multicast_disabled); + port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); if (!port->mcast_stats) return -ENOMEM; @@ -1086,13 +1096,8 @@ void br_multicast_disable_port(struct net_bridge_port *port) if (!(pg->flags & MDB_PG_FLAGS_PERMANENT)) br_multicast_del_pg(br, pg); - if (!hlist_unhashed(&port->rlist)) { - hlist_del_init_rcu(&port->rlist); - br_rtr_notify(br->dev, port, RTM_DELMDB); - /* Don't allow timer refresh if disabling */ - if (port->multicast_router == MDB_RTR_TYPE_TEMP) - port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - } + __del_port_router(port); + del_timer(&port->multicast_router_timer); del_timer(&port->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) @@ -1312,6 +1317,19 @@ br_multicast_update_query_timer(struct net_bridge *br, mod_timer(&query->timer, jiffies + br->multicast_querier_interval); } +static void br_port_mc_router_state_change(struct net_bridge_port *p, + bool is_mc_router) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_MROUTER, + .flags = SWITCHDEV_F_DEFER, + .u.mrouter = is_mc_router, + }; + + switchdev_port_attr_set(p->dev, &attr); +} + /* * Add port to router_list * list is maintained ordered by pointer value @@ -1337,6 +1355,7 @@ static void br_multicast_add_router(struct net_bridge *br, else hlist_add_head_rcu(&port->rlist, &br->router_list); br_rtr_notify(br->dev, port, RTM_NEWMDB); + br_port_mc_router_state_change(port, true); } static void br_multicast_mark_router(struct net_bridge *br, @@ -2044,6 +2063,11 @@ static void __del_port_router(struct net_bridge_port *p) return; hlist_del_init_rcu(&p->rlist); br_rtr_notify(p->br->dev, p, RTM_DELMDB); + br_port_mc_router_state_change(p, false); + + /* don't allow timer refresh */ + if (p->multicast_router == MDB_RTR_TYPE_TEMP) + p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; } int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) @@ -2121,6 +2145,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (br->multicast_disabled == !val) goto unlock; + br_mc_disabled_update(br->dev, !val); br->multicast_disabled = !val; if (br->multicast_disabled) goto unlock; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 1cbdc5b96aa7..a8f6acd23e30 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -612,9 +612,6 @@ static int br_afspec(struct net_bridge *br, return err; break; } - - if (err) - return err; } return err; diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index 99c68012c9d4..c913491495ab 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -30,18 +30,18 @@ static size_t __get_vlan_tinfo_size(void) nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ } -static bool vlan_tunnel_id_isrange(struct net_bridge_vlan *v, - struct net_bridge_vlan *v_end) +static bool vlan_tunid_inrange(struct net_bridge_vlan *v_curr, + struct net_bridge_vlan *v_last) { - __be32 tunid_curr = tunnel_id_to_key32(v->tinfo.tunnel_id); - __be32 tunid_end = tunnel_id_to_key32(v_end->tinfo.tunnel_id); + __be32 tunid_curr = tunnel_id_to_key32(v_curr->tinfo.tunnel_id); + __be32 tunid_last = tunnel_id_to_key32(v_last->tinfo.tunnel_id); - return (be32_to_cpu(tunid_curr) - be32_to_cpu(tunid_end)) == 1; + return (be32_to_cpu(tunid_curr) - be32_to_cpu(tunid_last)) == 1; } static int __get_num_vlan_tunnel_infos(struct net_bridge_vlan_group *vg) { - struct net_bridge_vlan *v, *v_start = NULL, *v_end = NULL; + struct net_bridge_vlan *v, *vtbegin = NULL, *vtend = NULL; int num_tinfos = 0; /* Count number of vlan infos */ @@ -50,27 +50,25 @@ static int __get_num_vlan_tunnel_infos(struct net_bridge_vlan_group *vg) if (!br_vlan_should_use(v) || !v->tinfo.tunnel_id) continue; - if (!v_start) { + if (!vtbegin) { goto initvars; - } else if ((v->vid - v_end->vid) == 1 && - vlan_tunnel_id_isrange(v_end, v) == 1) { - v_end = v; + } else if ((v->vid - vtend->vid) == 1 && + vlan_tunid_inrange(v, vtend)) { + vtend = v; continue; } else { - if ((v_end->vid - v->vid) > 0 && - vlan_tunnel_id_isrange(v_end, v) > 0) + if ((vtend->vid - vtbegin->vid) > 0) num_tinfos += 2; else num_tinfos += 1; } initvars: - v_start = v; - v_end = v; + vtbegin = v; + vtend = v; } - if (v_start) { - if ((v_end->vid - v->vid) > 0 && - vlan_tunnel_id_isrange(v_end, v) > 0) + if (vtbegin && vtend) { + if ((vtend->vid - vtbegin->vid) > 0) num_tinfos += 2; else num_tinfos += 1; @@ -127,7 +125,7 @@ static int br_fill_vlan_tinfo_range(struct sk_buff *skb, { int err; - if (vtbegin && vtend && (vtend->vid - vtbegin->vid) > 0) { + if (vtend && (vtend->vid - vtbegin->vid) > 0) { /* add range to skb */ err = br_fill_vlan_tinfo(skb, vtbegin->vid, vtbegin->tinfo.tunnel_id, @@ -171,7 +169,7 @@ int br_fill_vlan_tunnel_info(struct sk_buff *skb, if (!vtbegin) { goto initvars; } else if ((v->vid - vtend->vid) == 1 && - vlan_tunnel_id_isrange(v, vtend)) { + vlan_tunid_inrange(v, vtend)) { vtend = v; continue; } else { @@ -229,15 +227,15 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, memset(tinfo, 0, sizeof(*tinfo)); - if (!tb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || - !tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) - return -EINVAL; - err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr, vlan_tunnel_policy); if (err < 0) return err; + if (!tb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || + !tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) + return -EINVAL; + tun_id = nla_get_u32(tb[IFLA_BRIDGE_VLAN_TUNNEL_ID]); vid = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]); if (vid >= VLAN_VID_MASK) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1cbbf63a5ef7..2288fca7756c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -507,8 +507,9 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); void br_fdb_cleanup(struct work_struct *work); void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, u16 vid, int do_all); -struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr, __u16 vid); +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid); int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); @@ -530,6 +531,15 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); +static inline bool br_hash_lock_held(struct net_bridge *br) +{ +#ifdef CONFIG_LOCKDEP + return lockdep_is_held(&br->hash_lock); +#else + return true; +#endif +} + /* br_forward.c */ enum br_pkt_type { BR_PKT_UNICAST, diff --git a/net/core/Makefile b/net/core/Makefile index f6761b6e3b29..79f9479e9658 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o obj-$(CONFIG_DST_CACHE) += dst_cache.o obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_NET_DEVLINK) += devlink.o +obj-$(CONFIG_GRO_CELLS) += gro_cells.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 662bea587165..ea633342ab0d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -332,7 +332,9 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) EXPORT_SYMBOL(__skb_free_datagram_locked); int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, - unsigned int flags) + unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb)) { int err = 0; @@ -342,6 +344,8 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, if (skb == skb_peek(&sk->sk_receive_queue)) { __skb_unlink(skb, &sk->sk_receive_queue); atomic_dec(&skb->users); + if (destructor) + destructor(sk, skb); err = 0; } spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -375,7 +379,7 @@ EXPORT_SYMBOL(__sk_queue_drop_skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { - int err = __sk_queue_drop_skb(sk, skb, flags); + int err = __sk_queue_drop_skb(sk, skb, flags, NULL); kfree_skb(skb); sk_mem_reclaim_partial(sk); diff --git a/net/core/dev.c b/net/core/dev.c index 64efbb9e4436..05d19c6acf94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1,5 +1,5 @@ /* - * NET3 Protocol independent device support routines. + * NET3 Protocol independent device support routines. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. * * Derived from the non IP parts of dev.c 1.0.19 - * Authors: Ross Biro + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * @@ -21,9 +21,9 @@ * * Changes: * D.J. Barrow : Fixed bug where dev->refcnt gets set - * to 2 if register_netdev gets called - * before net_dev_init & also removed a - * few lines of code in the process. + * to 2 if register_netdev gets called + * before net_dev_init & also removed a + * few lines of code in the process. * Alan Cox : device private ioctl copies fields back. * Alan Cox : Transmit queue code does relevant * stunts to keep the queue safe. @@ -36,7 +36,7 @@ * Alan Cox : 100 backlog just doesn't cut it when * you start doing multicast video 8) * Alan Cox : Rewrote net_bh and list manager. - * Alan Cox : Fix ETH_P_ALL echoback lengths. + * Alan Cox : Fix ETH_P_ALL echoback lengths. * Alan Cox : Took out transmit every packet pass * Saved a few bytes in the ioctl handler * Alan Cox : Network driver sets packet type before @@ -46,7 +46,7 @@ * Richard Kooijman: Timestamp fixes. * Alan Cox : Wrong field in SIOCGIFDSTADDR * Alan Cox : Device lock protection. - * Alan Cox : Fixed nasty side effect of device close + * Alan Cox : Fixed nasty side effect of device close * changes. * Rudi Cilibrasi : Pass the right thing to * set_mac_address() @@ -67,8 +67,8 @@ * Paul Rusty Russell : SIOCSIFNAME * Pekka Riikonen : Netdev boot-time settings code * Andrew Morton : Make unregister_netdevice wait - * indefinitely on dev->refcnt - * J Hadi Salim : - Backlog queue sampling + * indefinitely on dev->refcnt + * J Hadi Salim : - Backlog queue sampling * - netif_rx() feedback */ @@ -192,7 +192,8 @@ static seqcount_t devnet_rename_seq; static inline void dev_base_seq_inc(struct net *net) { - while (++net->dev_base_seq == 0); + while (++net->dev_base_seq == 0) + ; } static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) @@ -274,8 +275,8 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * register_netdevice() inits txq->_xmit_lock and sets lockdep class * according to dev->type */ -static const unsigned short netdev_lock_type[] = - {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, +static const unsigned short netdev_lock_type[] = { + ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25, ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET, ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM, ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP, @@ -291,22 +292,22 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; -static const char *const netdev_lock_name[] = - {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", - "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", - "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", - "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", - "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", - "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", - "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", - "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", - "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", - "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", - "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", - "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", - "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", - "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", - "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; +static const char *const netdev_lock_name[] = { + "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", + "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET", + "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM", + "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP", + "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD", + "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25", + "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP", + "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD", + "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI", + "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE", + "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", + "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", + "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", + "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE", + "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; @@ -352,10 +353,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev) #endif /******************************************************************************* + * + * Protocol management and registration routines + * + *******************************************************************************/ - Protocol management and registration routines - -*******************************************************************************/ /* * Add a protocol ID to the list. Now that the input handler is @@ -538,10 +540,10 @@ void dev_remove_offload(struct packet_offload *po) EXPORT_SYMBOL(dev_remove_offload); /****************************************************************************** - - Device Boot-time Settings Routines - -*******************************************************************************/ + * + * Device Boot-time Settings Routines + * + ******************************************************************************/ /* Boot time configuration table */ static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; @@ -574,13 +576,13 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) } /** - * netdev_boot_setup_check - check boot time settings - * @dev: the netdevice + * netdev_boot_setup_check - check boot time settings + * @dev: the netdevice * - * Check boot time settings for the device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found, 1 if they are. + * Check boot time settings for the device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found, 1 if they are. */ int netdev_boot_setup_check(struct net_device *dev) { @@ -590,10 +592,10 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && !strcmp(dev->name, s[i].name)) { - dev->irq = s[i].map.irq; - dev->base_addr = s[i].map.base_addr; - dev->mem_start = s[i].map.mem_start; - dev->mem_end = s[i].map.mem_end; + dev->irq = s[i].map.irq; + dev->base_addr = s[i].map.base_addr; + dev->mem_start = s[i].map.mem_start; + dev->mem_end = s[i].map.mem_end; return 1; } } @@ -603,14 +605,14 @@ EXPORT_SYMBOL(netdev_boot_setup_check); /** - * netdev_boot_base - get address from boot time settings - * @prefix: prefix for network device - * @unit: id for network device + * netdev_boot_base - get address from boot time settings + * @prefix: prefix for network device + * @unit: id for network device * - * Check boot time settings for the base address of device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found. + * Check boot time settings for the base address of device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found. */ unsigned long netdev_boot_base(const char *prefix, int unit) { @@ -663,10 +665,10 @@ int __init netdev_boot_setup(char *str) __setup("netdev=", netdev_boot_setup); /******************************************************************************* - - Device Interface Subroutines - -*******************************************************************************/ + * + * Device Interface Subroutines + * + *******************************************************************************/ /** * dev_get_iflink - get 'iflink' value of a interface @@ -737,15 +739,15 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name) EXPORT_SYMBOL(__dev_get_by_name); /** - * dev_get_by_name_rcu - find a device by its name - * @net: the applicable net namespace - * @name: name to find + * dev_get_by_name_rcu - find a device by its name + * @net: the applicable net namespace + * @name: name to find * - * Find an interface by name. - * If the name is found a pointer to the device is returned. - * If the name is not found then %NULL is returned. - * The reference counters are not incremented so the caller must be - * careful with locks. The caller must hold RCU lock. + * Find an interface by name. + * If the name is found a pointer to the device is returned. + * If the name is not found then %NULL is returned. + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. */ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) @@ -1289,8 +1291,8 @@ void netdev_state_change(struct net_device *dev) EXPORT_SYMBOL(netdev_state_change); /** - * netdev_notify_peers - notify network peers about existence of @dev - * @dev: network device + * netdev_notify_peers - notify network peers about existence of @dev + * @dev: network device * * Generate traffic such that interested network peers are aware of * @dev, such as by generating a gratuitous ARP. This may be used when @@ -1518,17 +1520,17 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, static int dev_boot_phase = 1; /** - * register_netdevice_notifier - register a network notifier block - * @nb: notifier + * register_netdevice_notifier - register a network notifier block + * @nb: notifier * - * Register a notifier to be called when network device events occur. - * The notifier passed is linked into the kernel structures and must - * not be reused until it has been unregistered. A negative errno code - * is returned on a failure. + * Register a notifier to be called when network device events occur. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. * - * When registered all registration and up events are replayed - * to the new notifier to allow device to have a race free - * view of the network device list. + * When registered all registration and up events are replayed + * to the new notifier to allow device to have a race free + * view of the network device list. */ int register_netdevice_notifier(struct notifier_block *nb) @@ -1585,17 +1587,17 @@ outroll: EXPORT_SYMBOL(register_netdevice_notifier); /** - * unregister_netdevice_notifier - unregister a network notifier block - * @nb: notifier + * unregister_netdevice_notifier - unregister a network notifier block + * @nb: notifier * - * Unregister a notifier previously registered by - * register_netdevice_notifier(). The notifier is unlinked into the - * kernel structures and may then be reused. A negative errno code - * is returned on a failure. + * Unregister a notifier previously registered by + * register_netdevice_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. * - * After unregistering unregister and down device events are synthesized - * for all devices on the device list to the removed notifier to remove - * the need for special case cleanup code. + * After unregistering unregister and down device events are synthesized + * for all devices on the device list to the removed notifier to remove + * the need for special case cleanup code. */ int unregister_netdevice_notifier(struct notifier_block *nb) @@ -1695,24 +1697,19 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call static_key_slow_dec() from irq context - * If net_disable_timestamp() is called from irq context, defer the - * static_key_slow_dec() calls. - */ static atomic_t netstamp_needed_deferred; -#endif - -void net_enable_timestamp(void) +static void netstamp_clear(struct work_struct *work) { -#ifdef HAVE_JUMP_LABEL int deferred = atomic_xchg(&netstamp_needed_deferred, 0); - if (deferred) { - while (--deferred) - static_key_slow_dec(&netstamp_needed); - return; - } + while (deferred--) + static_key_slow_dec(&netstamp_needed); +} +static DECLARE_WORK(netstamp_work, netstamp_clear); #endif + +void net_enable_timestamp(void) +{ static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1720,12 +1717,12 @@ EXPORT_SYMBOL(net_enable_timestamp); void net_disable_timestamp(void) { #ifdef HAVE_JUMP_LABEL - if (in_interrupt()) { - atomic_inc(&netstamp_needed_deferred); - return; - } -#endif + /* net_disable_timestamp() can be called from non process context */ + atomic_inc(&netstamp_needed_deferred); + schedule_work(&netstamp_work); +#else static_key_slow_dec(&netstamp_needed); +#endif } EXPORT_SYMBOL(net_disable_timestamp); @@ -2501,6 +2498,7 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb, if (dev->num_tc) { u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; qcount = dev->tc_to_txq[tc].count; } @@ -2722,9 +2720,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; + if (!(dev->features & NETIF_F_HIGHDMA)) { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (PageHighMem(skb_frag_page(frag))) return 1; } @@ -2738,6 +2738,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr_t addr = page_to_phys(skb_frag_page(frag)); + if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) return 1; } @@ -3213,6 +3214,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) if (queue_index < 0 || skb->ooo_okay || queue_index >= dev->real_num_tx_queues) { int new_index = get_xps_queue(dev, skb); + if (new_index < 0) new_index = skb_tx_hash(dev, skb); @@ -3242,6 +3244,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, if (dev->real_num_tx_queues != 1) { const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) queue_index = ops->ndo_select_queue(dev, skb, accel_priv, __netdev_pick_tx); @@ -3330,16 +3333,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) } /* The device has no queue. Common case for software devices: - loopback, all the sorts of tunnels... + * loopback, all the sorts of tunnels... - Really, it is unlikely that netif_tx_lock protection is necessary - here. (f.e. loopback and IP tunnels are clean ignoring statistics - counters.) - However, it is possible, that they rely on protection - made by us here. + * Really, it is unlikely that netif_tx_lock protection is necessary + * here. (f.e. loopback and IP tunnels are clean ignoring statistics + * counters.) + * However, it is possible, that they rely on protection + * made by us here. - Check this and shot the lock. It is not prone from deadlocks. - Either shot noqueue qdisc, it is even simpler 8) + * Check this and shot the lock. It is not prone from deadlocks. + *Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ @@ -3401,9 +3404,9 @@ int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) EXPORT_SYMBOL(dev_queue_xmit_accel); -/*======================================================================= - Receiver routines - =======================================================================*/ +/************************************************************************* + * Receiver routines + *************************************************************************/ int netdev_max_backlog __read_mostly = 1000; EXPORT_SYMBOL(netdev_max_backlog); @@ -3771,6 +3774,7 @@ static int netif_rx_internal(struct sk_buff *skb) #endif { unsigned int qtail; + ret = enqueue_to_backlog(skb, get_cpu(), &qtail); put_cpu(); } @@ -3830,6 +3834,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) while (clist) { struct sk_buff *skb = clist; + clist = clist->next; WARN_ON(atomic_read(&skb->users)); @@ -5010,9 +5015,6 @@ count: LINUX_MIB_BUSYPOLLRXPACKETS, rc); local_bh_enable(); - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || busy_loop_timeout(end_time)) break; @@ -5673,6 +5675,7 @@ static int netdev_adjacent_sysfs_add(struct net_device *dev, struct list_head *dev_list) { char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? "upper_%s" : "lower_%s", adj_dev->name); return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj), @@ -5683,6 +5686,7 @@ static void netdev_adjacent_sysfs_del(struct net_device *dev, struct list_head *dev_list) { char linkname[IFNAMSIZ+7]; + sprintf(linkname, dev_list == &dev->adj_list.upper ? "upper_%s" : "lower_%s", name); sysfs_remove_link(&(dev->dev.kobj), linkname); @@ -5952,6 +5956,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { struct netdev_notifier_changeupper_info changeupper_info; + ASSERT_RTNL(); changeupper_info.upper_dev = upper_dev; @@ -6370,8 +6375,8 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) } /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI - is important. Some (broken) drivers set IFF_PROMISC, when - IFF_ALLMULTI is requested not asking us and not reporting. + * is important. Some (broken) drivers set IFF_PROMISC, when + * IFF_ALLMULTI is requested not asking us and not reporting. */ if ((flags ^ dev->gflags) & IFF_ALLMULTI) { int inc = (flags & IFF_ALLMULTI) ? 1 : -1; @@ -6669,6 +6674,7 @@ EXPORT_SYMBOL(dev_change_xdp_fd); static int dev_new_index(struct net *net) { int ifindex = net->ifindex; + for (;;) { if (++ifindex <= 0) ifindex = 1; @@ -6735,8 +6741,8 @@ static void rollback_registered_many(struct list_head *head) /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ + * this device. They should clean all the things. + */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); if (!dev->rtnl_link_ops || @@ -7082,6 +7088,7 @@ void netif_tx_stop_all_queues(struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + netif_tx_stop_queue(txq); } } @@ -7556,17 +7563,17 @@ void netdev_freemem(struct net_device *dev) } /** - * alloc_netdev_mqs - allocate network device - * @sizeof_priv: size of private data to allocate space for - * @name: device name format string - * @name_assign_type: origin of device name - * @setup: callback to initialize device - * @txqs: the number of TX subqueues to allocate - * @rxqs: the number of RX subqueues to allocate - * - * Allocates a struct net_device with private data area for driver use - * and performs basic initialization. Also allocates subqueue structs - * for each queue on the device. + * alloc_netdev_mqs - allocate network device + * @sizeof_priv: size of private data to allocate space for + * @name: device name format string + * @name_assign_type: origin of device name + * @setup: callback to initialize device + * @txqs: the number of TX subqueues to allocate + * @rxqs: the number of RX subqueues to allocate + * + * Allocates a struct net_device with private data area for driver use + * and performs basic initialization. Also allocates subqueue structs + * for each queue on the device. */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, @@ -7678,13 +7685,13 @@ free_dev: EXPORT_SYMBOL(alloc_netdev_mqs); /** - * free_netdev - free network device - * @dev: device + * free_netdev - free network device + * @dev: device * - * This function does the last stage of destroying an allocated device - * interface. The reference to the device object is released. - * If this is the last reference then it will be freed. - * Must be called in process context. + * This function does the last stage of destroying an allocated device + * interface. The reference to the device object is released. If this + * is the last reference then it will be freed.Must be called in process + * context. */ void free_netdev(struct net_device *dev) { @@ -7866,12 +7873,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_shutdown(dev); /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - - Note that dev->reg_state stays at NETREG_REGISTERED. - This is wanted because this way 8021q and macvlan know - the device is just moving and can keep their slaves up. - */ + * this device. They should clean all the things. + * + * Note that dev->reg_state stays at NETREG_REGISTERED. + * This is wanted because this way 8021q and macvlan know + * the device is just moving and can keep their slaves up. + */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); diff --git a/net/core/devlink.c b/net/core/devlink.c index 2b5bf9efa720..e9c1e6acfb6d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1392,9 +1392,9 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, return -EOPNOTSUPP; } -static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, - enum devlink_command cmd, u32 portid, - u32 seq, int flags) +static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; void *hdr; @@ -1408,50 +1408,52 @@ static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, err = devlink_nl_put_handle(msg, devlink); if (err) - goto out; + goto nla_put_failure; - err = ops->eswitch_mode_get(devlink, &mode); - if (err) - goto out; - err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); - if (err) - goto out; + if (ops->eswitch_mode_get) { + err = ops->eswitch_mode_get(devlink, &mode); + if (err) + goto nla_put_failure; + err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); + if (err) + goto nla_put_failure; + } if (ops->eswitch_inline_mode_get) { err = ops->eswitch_inline_mode_get(devlink, &inline_mode); if (err) - goto out; + goto nla_put_failure; err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE, inline_mode); if (err) - goto out; + goto nla_put_failure; } genlmsg_end(msg, hdr); return 0; -out: +nla_put_failure: genlmsg_cancel(msg, hdr); return err; } -static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, - struct genl_info *info) +static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; struct sk_buff *msg; int err; - if (!ops || !ops->eswitch_mode_get) + if (!ops) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; - err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, - info->snd_portid, info->snd_seq, 0); + err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET, + info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); @@ -1461,8 +1463,8 @@ static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb, - struct genl_info *info) +static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; @@ -1629,15 +1631,15 @@ static const struct genl_ops devlink_nl_ops[] = { DEVLINK_NL_FLAG_LOCK_PORTS, }, { - .cmd = DEVLINK_CMD_ESWITCH_MODE_GET, - .doit = devlink_nl_cmd_eswitch_mode_get_doit, + .cmd = DEVLINK_CMD_ESWITCH_GET, + .doit = devlink_nl_cmd_eswitch_get_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { - .cmd = DEVLINK_CMD_ESWITCH_MODE_SET, - .doit = devlink_nl_cmd_eswitch_mode_set_doit, + .cmd = DEVLINK_CMD_ESWITCH_SET, + .doit = devlink_nl_cmd_eswitch_set_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, diff --git a/net/core/dst.c b/net/core/dst.c index b5cbbe07f786..960e503b5a52 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -190,7 +190,6 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; - dst->pending_confirm = 0; dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d5f412b3093d..be7bab1adcde 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1404,9 +1404,12 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (regs.len > reglen) regs.len = reglen; - regbuf = vzalloc(reglen); - if (reglen && !regbuf) - return -ENOMEM; + regbuf = NULL; + if (reglen) { + regbuf = vzalloc(reglen); + if (!regbuf) + return -ENOMEM; + } ops->get_regs(dev, ®s, regbuf); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c new file mode 100644 index 000000000000..c98bbfbd26b8 --- /dev/null +++ b/net/core/gro_cells.c @@ -0,0 +1,92 @@ +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <net/gro_cells.h> + +struct gro_cell { + struct sk_buff_head napi_skbs; + struct napi_struct napi; +}; + +int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct gro_cell *cell; + + if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) + return netif_rx(skb); + + cell = this_cpu_ptr(gcells->cells); + + if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + __skb_queue_tail(&cell->napi_skbs, skb); + if (skb_queue_len(&cell->napi_skbs) == 1) + napi_schedule(&cell->napi); + return NET_RX_SUCCESS; +} +EXPORT_SYMBOL(gro_cells_receive); + +/* called under BH context */ +static int gro_cell_poll(struct napi_struct *napi, int budget) +{ + struct gro_cell *cell = container_of(napi, struct gro_cell, napi); + struct sk_buff *skb; + int work_done = 0; + + while (work_done < budget) { + skb = __skb_dequeue(&cell->napi_skbs); + if (!skb) + break; + napi_gro_receive(napi, skb); + work_done++; + } + + if (work_done < budget) + napi_complete_done(napi, work_done); + return work_done; +} + +int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) +{ + int i; + + gcells->cells = alloc_percpu(struct gro_cell); + if (!gcells->cells) + return -ENOMEM; + + for_each_possible_cpu(i) { + struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + + __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + + netif_napi_add(dev, &cell->napi, gro_cell_poll, + NAPI_POLL_WEIGHT); + napi_enable(&cell->napi); + } + return 0; +} +EXPORT_SYMBOL(gro_cells_init); + +void gro_cells_destroy(struct gro_cells *gcells) +{ + int i; + + if (!gcells->cells) + return; + for_each_possible_cpu(i) { + struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + + netif_napi_del(&cell->napi); + __skb_queue_purge(&cell->napi_skbs); + } + free_percpu(gcells->cells); + gcells->cells = NULL; +} +EXPORT_SYMBOL(gro_cells_destroy); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7bb12e07ffef..e7c12caa20c8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2923,7 +2923,8 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) return; set_bit(index, p->data_state); - call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); + if (index == NEIGH_VAR_DELAY_PROBE_TIME) + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 2ec86fc552df..756637dc7a57 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/types.h> +#include <linux/module.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> diff --git a/net/core/sock.c b/net/core/sock.c index 8b35debfe454..b74356535559 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -502,6 +502,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); + sk->sk_dst_pending_confirm = 0; RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; @@ -1519,6 +1520,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) af_family_clock_key_strings[newsk->sk_family]); newsk->sk_dst_cache = NULL; + newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; atomic_set(&newsk->sk_drops, 0); diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 72912982de3d..31d343796251 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,5 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o slave.o dsa2.o dsa_core-y += dsa.o slave.o dsa2.o switch.o # tagging formats diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 22e44f691ab9..b6d4f6a23f06 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -492,7 +492,7 @@ struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) } EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); -static struct net_device *dev_to_net_device(struct device *dev) +struct net_device *dsa_dev_to_net_device(struct device *dev) { struct device *d; @@ -509,6 +509,7 @@ static struct net_device *dev_to_net_device(struct device *dev) return NULL; } +EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); #ifdef CONFIG_OF static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, @@ -817,7 +818,7 @@ static int dsa_probe(struct platform_device *pdev) dev = pd->of_netdev; dev_hold(dev); } else { - dev = dev_to_net_device(pd->netdev); + dev = dsa_dev_to_net_device(pd->netdev); } if (dev == NULL) { ret = -EPROBE_DEFER; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1c546b6621ee..737be6470c7f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -78,19 +78,28 @@ static void dsa_dst_del_ds(struct dsa_switch_tree *dst, kref_put(&dst->refcount, dsa_free_dst); } +/* For platform data configurations, we need to have a valid name argument to + * differentiate a disabled port from an enabled one + */ static bool dsa_port_is_valid(struct dsa_port *port) { - return !!port->dn; + return !!(port->dn || port->name); } static bool dsa_port_is_dsa(struct dsa_port *port) { - return !!of_parse_phandle(port->dn, "link", 0); + if (port->name && !strcmp(port->name, "dsa")) + return true; + else + return !!of_parse_phandle(port->dn, "link", 0); } static bool dsa_port_is_cpu(struct dsa_port *port) { - return !!of_parse_phandle(port->dn, "ethernet", 0); + if (port->name && !strcmp(port->name, "cpu")) + return true; + else + return !!of_parse_phandle(port->dn, "ethernet", 0); } static bool dsa_ds_find_port_dn(struct dsa_switch *ds, @@ -250,10 +259,11 @@ static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, static int dsa_user_port_apply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { - const char *name; + const char *name = port->name; int err; - name = of_get_property(port->dn, "label", NULL); + if (port->dn) + name = of_get_property(port->dn, "label", NULL); if (!name) name = "eth%d"; @@ -261,6 +271,7 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index, if (err) { dev_warn(ds->dev, "Failed to create slave %d: %d\n", index, err); + ds->ports[index].netdev = NULL; return err; } @@ -444,11 +455,16 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, struct net_device *ethernet_dev; struct device_node *ethernet; - ethernet = of_parse_phandle(port->dn, "ethernet", 0); - if (!ethernet) - return -EINVAL; + if (port->dn) { + ethernet = of_parse_phandle(port->dn, "ethernet", 0); + if (!ethernet) + return -EINVAL; + ethernet_dev = of_find_net_device_by_node(ethernet); + } else { + ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); + dev_put(ethernet_dev); + } - ethernet_dev = of_find_net_device_by_node(ethernet); if (!ethernet_dev) return -EPROBE_DEFER; @@ -551,6 +567,33 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) return 0; } +static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) +{ + bool valid_name_found = false; + unsigned int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + if (!cd->port_names[i]) + continue; + + ds->ports[i].name = cd->port_names[i]; + + /* Initialize enabled_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + if (!dsa_port_is_cpu(&ds->ports[i])) + ds->enabled_port_mask |= 1 << i; + + valid_name_found = true; + } + + if (!valid_name_found && i == DSA_MAX_PORTS) + return -EINVAL; + + return 0; +} + static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) { int err; @@ -575,6 +618,18 @@ static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) return 0; } +static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) +{ + if (!pd) + return -ENODEV; + + /* We do not support complex trees with dsa_chip_data */ + *tree = 0; + *index = 0; + + return 0; +} + static struct device_node *dsa_get_ports(struct dsa_switch *ds, struct device_node *np) { @@ -591,23 +646,34 @@ static struct device_node *dsa_get_ports(struct dsa_switch *ds, static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) { + struct dsa_chip_data *pdata = dev->platform_data; struct device_node *np = dev->of_node; struct dsa_switch_tree *dst; struct device_node *ports; u32 tree, index; int i, err; - err = dsa_parse_member_dn(np, &tree, &index); - if (err) - return err; + if (np) { + err = dsa_parse_member_dn(np, &tree, &index); + if (err) + return err; - ports = dsa_get_ports(ds, np); - if (IS_ERR(ports)) - return PTR_ERR(ports); + ports = dsa_get_ports(ds, np); + if (IS_ERR(ports)) + return PTR_ERR(ports); - err = dsa_parse_ports_dn(ports, ds); - if (err) - return err; + err = dsa_parse_ports_dn(ports, ds); + if (err) + return err; + } else { + err = dsa_parse_member(pdata, &tree, &index); + if (err) + return err; + + err = dsa_parse_ports(pdata, ds); + if (err) + return err; + } dst = dsa_get_dst(tree); if (!dst) { @@ -623,6 +689,7 @@ static int _dsa_register_switch(struct dsa_switch *ds, struct device *dev) ds->dst = dst; ds->index = index; + ds->cd = pdata; /* Initialize the routing table */ for (i = 0; i < DSA_MAX_SWITCHES; ++i) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 061a49c29cef..c34872e1febc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -684,14 +684,10 @@ dsa_slave_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct dsa_slave_priv *p = netdev_priv(dev); - int err; + int err = -EOPNOTSUPP; - err = -EOPNOTSUPP; - if (p->phy != NULL) { - err = phy_read_status(p->phy); - if (err == 0) - err = phy_ethtool_ksettings_get(p->phy, cmd); - } + if (p->phy != NULL) + err = phy_ethtool_ksettings_get(p->phy, cmd); return err; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index c666ff0dd88b..1446810047f5 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -356,6 +356,7 @@ void ether_setup(struct net_device *dev) dev->header_ops = ð_header_ops; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; + dev->min_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_DATA_LEN; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e0878c3f66a8..91a2557942fa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -187,6 +187,7 @@ config NET_IPGRE_DEMUX config NET_IP_TUNNEL tristate select DST_CACHE + select GRO_CELLS default n config NET_IPGRE diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 89a8cac4726a..51b27ae09fbd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1263,7 +1263,7 @@ void __init arp_init(void) /* * ax25 -> ASCII conversion */ -static char *ax2asc2(ax25_address *a, char *buf) +static void ax2asc2(ax25_address *a, char *buf) { char c, *s; int n; @@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf) *s++ = n + '0'; *s++ = '\0'; - if (*buf == '\0' || *buf == '-') - return "*"; - - return buf; + if (*buf == '\0' || *buf == '-') { + buf[0] = '*'; + buf[1] = '\0'; + } } #endif /* CONFIG_AX25 */ @@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, } #endif sprintf(tbuf, "%pI4", n->primary_key); - seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", + seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 72d6f056d863..ae206163c273 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1587,6 +1587,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) goto validate_return_locked; } + if (opt_iter + 1 == opt_len) { + err_offset = opt_iter; + goto validate_return_locked; + } tag_len = tag[1]; if (tag_len > (opt_len - opt_iter)) { err_offset = opt_iter + 1; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6306a67880e8..317026a39cfa 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1355,6 +1355,36 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) return ret; } +static int call_fib_nh_notifiers(struct fib_nh *fib_nh, + enum fib_event_type event_type) +{ + struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); + struct fib_nh_notifier_info info = { + .fib_nh = fib_nh, + }; + + switch (event_type) { + case FIB_EVENT_NH_ADD: + if (fib_nh->nh_flags & RTNH_F_DEAD) + break; + if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) + break; + return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, + &info.info); + case FIB_EVENT_NH_DEL: + if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) || + (fib_nh->nh_flags & RTNH_F_DEAD)) + return call_fib_notifiers(dev_net(fib_nh->nh_dev), + event_type, &info.info); + default: + break; + } + + return NOTIFY_DONE; +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host @@ -1396,6 +1426,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; break; } + call_fib_nh_notifiers(nexthop_nh, + FIB_EVENT_NH_DEL); dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1550,6 +1582,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) continue; alive++; nexthop_nh->nh_flags &= ~nh_flags; + call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); } endfor_nexthops(fi) if (alive > 0) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2919d1a10cfd..d8cea210af0e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -124,7 +124,7 @@ static void fib_notify(struct net *net, struct notifier_block *nb, static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id, u32 nlflags) + u8 tos, u8 type, u32 tb_id) { struct fib_entry_notifier_info info = { .dst = dst, @@ -133,7 +133,6 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .tos = tos, .type = type, .tb_id = tb_id, - .nlflags = nlflags, }; return call_fib_notifier(nb, net, event_type, &info.info); } @@ -197,7 +196,7 @@ int call_fib_notifiers(struct net *net, enum fib_event_type event_type, static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id, u32 nlflags) + u8 tos, u8 type, u32 tb_id) { struct fib_entry_notifier_info info = { .dst = dst, @@ -206,7 +205,6 @@ static int call_fib_entry_notifiers(struct net *net, .tos = tos, .type = type, .tb_id = tb_id, - .nlflags = nlflags, }; return call_fib_notifiers(net, event_type, &info.info); } @@ -1198,6 +1196,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, int fib_table_insert(struct net *net, struct fib_table *tb, struct fib_config *cfg) { + enum fib_event_type event = FIB_EVENT_ENTRY_ADD; struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; @@ -1295,6 +1294,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, + key, plen, fi, + new_fa->fa_tos, cfg->fc_type, + tb->tb_id); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, + tb->tb_id, &cfg->fc_nlinfo, nlflags); + hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1303,13 +1309,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, - key, plen, fi, - new_fa->fa_tos, cfg->fc_type, - tb->tb_id, cfg->fc_nlflags); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, - tb->tb_id, &cfg->fc_nlinfo, nlflags); - goto succeeded; } /* Error if we find a perfect match which @@ -1319,10 +1318,12 @@ int fib_table_insert(struct net *net, struct fib_table *tb, if (fa_match) goto out; - if (cfg->fc_nlflags & NLM_F_APPEND) + if (cfg->fc_nlflags & NLM_F_APPEND) { + event = FIB_EVENT_ENTRY_APPEND; nlflags |= NLM_F_APPEND; - else + } else { fa = fa_first; + } } err = -ENOENT; if (!(cfg->fc_nlflags & NLM_F_CREATE)) @@ -1351,8 +1352,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb, tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, - cfg->fc_type, tb->tb_id, cfg->fc_nlflags); + call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type, + tb->tb_id); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1653,8 +1654,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb, return -ESRCH; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, - fa_to_delete->fa_info, tos, cfg->fc_type, - tb->tb_id, 0); + fa_to_delete->fa_info, tos, + fa_to_delete->fa_type, tb->tb_id); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1963,7 +1964,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { + if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || + tb->tb_id != fa->tb_id) { slen = fa->fa_slen; continue; } @@ -1972,7 +1974,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb) n->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, - tb->tb_id, 0); + tb->tb_id); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -2012,7 +2014,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, call_fib_entry_notifier(nb, net, event_type, l->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, - fa->fa_type, fa->tb_id, 0); + fa->fa_type, fa->tb_id); } } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5b15459955f8..44fd86de2823 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1172,6 +1172,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); + kfree(pmc); } spin_unlock_bh(&im->lock); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b67719f45953..737ce826d7ec 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -222,7 +222,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) { - int res = dst_neigh_output(dst, neigh, skb); + int res; + + sock_confirm_neigh(skb, neigh); + res = neigh_output(neigh, skb); rcu_read_unlock_bh(); return res; @@ -886,6 +889,9 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->csum = 0; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -1086,6 +1092,9 @@ alloc_new_skb: exthdrlen = 0; csummode = CHECKSUM_NONE; + if ((flags & MSG_CONFIRM) && !skb_prev) + skb_set_dst_pending_confirm(skb, 1); + /* * Put the packet on the pending queue. */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8a4409dd390a..ce1386a67e24 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1243,7 +1243,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + /* We need to keep the dst for __ip_options_echo() + * We could restrict the test to opt.ts_needtime || opt.srr, + * but the following is good enough as IP options are not often used. + */ + if (unlikely(IPCB(skb)->opt.optlen)) + skb_dst_force(skb); + else + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 592db6a3a0e9..2af6244b83e2 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -642,6 +642,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); + if (!skb) + return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); @@ -848,7 +850,8 @@ out: return err; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4e49e5cb001c..8119e1f66e03 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -383,6 +383,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + skb->transport_header = skb->network_header; err = -EFAULT; if (memcpy_from_msg(iph, msg, length)) @@ -666,7 +669,8 @@ out: return len; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b7c231c1aef..cb494a5050f7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -154,6 +154,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -168,6 +169,7 @@ static struct dst_ops ipv4_dst_ops = { .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, + .confirm_neigh = ipv4_confirm_neigh, }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -461,6 +463,23 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } +static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + const __be32 *pkey = daddr; + const struct rtable *rt; + + rt = (const struct rtable *)dst; + if (rt->rt_gateway) + pkey = (const __be32 *)&rt->rt_gateway; + else if (!daddr || + (rt->rt_flags & + (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) + return; + + __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); +} + #define IP_IDENTS_SZ 2048u static atomic_t *ip_idents __read_mostly; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b751abc56935..d44a6989e76d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -773,6 +773,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } + /* if __tcp_splice_read() got nothing while we have + * an skb in receive queue, we do not want to loop. + * This might happen with URG data. + */ + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 27c95acbb52f..2c0ff327b6df 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3644,11 +3644,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { - struct dst_entry *dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - } + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) + sk_dst_confirm(sk); if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); @@ -5995,7 +5992,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) break; case TCP_FIN_WAIT1: { - struct dst_entry *dst; int tmo; /* If we enter the TCP_FIN_WAIT1 state and we are a @@ -6022,9 +6018,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_FIN_WAIT2); sk->sk_shutdown |= SEND_SHUTDOWN; - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); + sk_dst_confirm(sk); if (!sock_flag(sk, SOCK_DEAD)) { /* Wake up lingering close() */ diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b9ed0d50aead..0f46e5fe31ad 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -375,12 +375,10 @@ void tcp_update_metrics(struct sock *sk) u32 val; int m; + sk_dst_confirm(sk); if (sysctl_tcp_nometrics_save || !dst) return; - if (dst->flags & DST_HOST) - dst_confirm(dst); - rcu_read_lock(); if (icsk->icsk_backoff || !tp->srtt_us) { /* This session failed to estimate rtt. Why? @@ -493,11 +491,10 @@ void tcp_init_metrics(struct sock *sk) struct tcp_metrics_block *tm; u32 val, crtt = 0; /* cached RTT scaled by 8 */ + sk_dst_confirm(sk); if (!dst) goto reset; - dst_confirm(dst); - rcu_read_lock(); tm = tcp_get_metrics(sk, dst, true); if (!tm) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ccf1ef4dcba4..61f272a99a49 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -980,6 +980,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); + skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; th->source = inet->inet_sport; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..3d063eb37848 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - spin_lock(&tcp_probe.lock); + spin_lock_bh(&tcp_probe.lock); /* If log fills, just silently drop */ if (tcp_probe_avail() > 1) { struct tcp_log *p = tcp_probe.log + tcp_probe.head; @@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); + spin_unlock_bh(&tcp_probe.lock); wake_up(&tcp_probe.wait); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf6ba3387401..ea6e4cff9faf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1101,7 +1101,8 @@ out: return err; do_confirm: - dst_confirm(&rt->dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(&rt->dst, &fl4->daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; @@ -1489,7 +1490,7 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index b2a85cca91f7..e2afe677a9d9 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -221,6 +221,7 @@ config IPV6_TUNNEL tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" select INET6_TUNNEL select DST_CACHE + select GRO_CELLS ---help--- Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in RFC 2473. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 156ed578d3c0..3a2025f5bf2c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3393,9 +3393,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } if (idev) { - if (idev->if_flags & IF_READY) - /* device is already configured. */ + if (idev->if_flags & IF_READY) { + /* device is already configured - + * but resend MLD reports, we might + * have roamed and need to update + * multicast snooping switches + */ + ipv6_mc_up(idev); break; + } idev->if_flags |= IF_READY; } @@ -4016,6 +4022,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id) if (bump_id) rt_genid_bump_ipv6(dev_net(dev)); + + /* Make sure that a new temporary address will be created + * before this temporary address becomes deprecated. + */ + if (ifp->flags & IFA_F_TEMPORARY) + addrconf_verify_rtnl(); } static void addrconf_dad_run(struct inet6_dev *idev) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a3eaafd87100..eec27f87efac 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (np->sndflow) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type == IPV6_ADDR_ANY) { + if (ipv6_addr_any(&usin->sin6_addr)) { /* * connect to self */ - usin->sin6_addr.s6_addr[15] = 0x01; + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; } + addr_type = ipv6_addr_type(&usin->sin6_addr); + daddr = &usin->sin6_addr; - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { struct sockaddr_in sin; if (__ipv6_only_sock(sk)) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index e4198502fd98..275cac628a95 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -327,7 +327,6 @@ static int ipv6_srh_rcv(struct sk_buff *skb) struct ipv6_sr_hdr *hdr; struct inet6_dev *idev; struct in6_addr *addr; - bool cleanup = false; int accept_seg6; hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); @@ -351,11 +350,7 @@ static int ipv6_srh_rcv(struct sk_buff *skb) #endif looped_back: - if (hdr->segments_left > 0) { - if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && - sr_has_cleanup(hdr)) - cleanup = true; - } else { + if (hdr->segments_left == 0) { if (hdr->nexthdr == NEXTHDR_IPV6) { int offset = (hdr->hdrlen + 1) << 3; @@ -418,21 +413,6 @@ looped_back: ipv6_hdr(skb)->daddr = *addr; - if (cleanup) { - int srhlen = (hdr->hdrlen + 1) << 3; - int nh = hdr->nexthdr; - - skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); - memmove(skb_network_header(skb) + srhlen, - skb_network_header(skb), - (unsigned char *)hdr - skb_network_header(skb)); - skb->network_header += srhlen; - ipv6_hdr(skb)->nexthdr = nh; - ipv6_hdr(skb)->payload_len = htons(skb->len - - sizeof(struct ipv6hdr)); - skb_push_rcsum(skb, sizeof(struct ipv6hdr)); - } - skb_dst_drop(skb); ip6_route_input(skb); @@ -453,13 +433,8 @@ looped_back: } ipv6_hdr(skb)->hop_limit--; - /* be sure that srh is still present before reinjecting */ - if (!cleanup) { - skb_pull(skb, sizeof(struct ipv6hdr)); - goto looped_back; - } - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; } dst_input(skb); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 51b9835b3176..6fcb7cb49bb2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -367,35 +367,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(skb->data + offset); - int grehlen = offset + 4; + const struct gre_base_hdr *greh; + const struct ipv6hdr *ipv6h; + int grehlen = sizeof(*greh); struct ip6_tnl *t; + int key_off = 0; __be16 flags; + __be32 key; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } + if (!pskb_may_pull(skb, offset + grehlen)) + return; + greh = (const struct gre_base_hdr *)(skb->data + offset); + flags = greh->flags; + if (flags & (GRE_VERSION | GRE_ROUTING)) + return; + if (flags & GRE_CSUM) + grehlen += 4; + if (flags & GRE_KEY) { + key_off = grehlen + offset; + grehlen += 4; } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (!pskb_may_pull(skb, grehlen)) + if (!pskb_may_pull(skb, offset + grehlen)) return; ipv6h = (const struct ipv6hdr *)skb->data; - p = (__be16 *)(skb->data + offset); + greh = (const struct gre_base_hdr *)(skb->data + offset); + key = key_off ? *(__be32 *)(skb->data + key_off) : 0; t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, - flags & GRE_KEY ? - *(((__be32 *)p) + (grehlen / 4) - 1) : 0, - p[1]); + key, greh->protocol); if (!t) return; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b6a94ff0bbd0..d0f51b420447 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -119,7 +119,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { - ret = dst_neigh_output(dst, neigh, skb); + sock_confirm_neigh(skb, neigh); + ret = neigh_output(neigh, skb); rcu_read_unlock_bh(); return ret; } @@ -1021,6 +1022,9 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } } #endif + if (ipv6_addr_v4mapped(&fl6->saddr) && + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) + return -EAFNOSUPPORT; return 0; @@ -1144,6 +1148,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -1516,6 +1523,9 @@ alloc_new_skb: exthdrlen = 0; dst_exthdrlen = 0; + if ((flags & MSG_CONFIRM) && !skb_prev) + skb_set_dst_pending_confirm(skb, 1); + /* * Put the packet on the pending queue */ diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7139fffd61b6..1bdc703cb966 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -779,6 +779,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) psf->sf_crcount = im->mca_crcount; } in6_dev_put(pmc->idev); + kfree(pmc); } spin_unlock_bh(&im->mca_lock); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ea89073c8247..f174e76e6505 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -654,6 +654,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->ip_summed = CHECKSUM_NONE; + if (flags & MSG_CONFIRM) + skb_set_dst_pending_confirm(skb, 1); + skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); if (err) @@ -934,7 +937,8 @@ out: txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8ffa24cc8899..f54f4265b37f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -223,6 +223,21 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, return neigh_create(&nd_tbl, daddr, dst->dev); } +static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + struct net_device *dev = dst->dev; + struct rt6_info *rt = (struct rt6_info *)dst; + + daddr = choose_neigh_daddr(rt, NULL, daddr); + if (!daddr) + return; + if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) + return; + if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) + return; + __ipv6_confirm_neigh(dev, daddr); +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .gc = ip6_dst_gc, @@ -239,6 +254,7 @@ static struct dst_ops ip6_dst_ops_template = { .redirect = rt6_do_redirect, .local_out = __ip6_local_out, .neigh_lookup = ip6_neigh_lookup, + .confirm_neigh = ip6_confirm_neigh, }; static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) @@ -1365,6 +1381,7 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, const struct ipv6hdr *iph, u32 mtu) { + const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; if (rt6->rt6i_flags & RTF_LOCAL) @@ -1373,26 +1390,26 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (dst_metric_locked(dst, RTAX_MTU)) return; - dst_confirm(dst); + if (iph) { + daddr = &iph->daddr; + saddr = &iph->saddr; + } else if (sk) { + daddr = &sk->sk_v6_daddr; + saddr = &inet6_sk(sk)->saddr; + } else { + daddr = NULL; + saddr = NULL; + } + dst_confirm_neigh(dst, daddr); mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; if (!rt6_cache_allowed_for_pmtu(rt6)) { rt6_do_update_pmtu(rt6, mtu); - } else { - const struct in6_addr *daddr, *saddr; + } else if (daddr) { struct rt6_info *nrt6; - if (iph) { - daddr = &iph->daddr; - saddr = &iph->saddr; - } else if (sk) { - daddr = &sk->sk_v6_daddr; - saddr = &inet6_sk(sk)->saddr; - } else { - return; - } nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); @@ -2316,7 +2333,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * Look, redirects are sent only in response to data packets, * so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->dst); + dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); if (!neigh) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index b274f1d95e03..f950cb53d5e3 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -174,7 +174,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, * hash function (RadioGatun) with up to 1216 bits */ - /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */ + /* saddr(16) + first_seg(1) + flags(1) + keyid(4) + seglist(16n) */ plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; /* this limit allows for 14 segments */ @@ -186,7 +186,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, * * 1. Source IPv6 address (128 bits) * 2. first_segment value (8 bits) - * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0) + * 3. Flags (8 bits) * 4. HMAC Key ID (32 bits) * 5. All segments in the segments list (n * 128 bits) */ @@ -202,8 +202,8 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, /* first_segment value */ *off++ = hdr->first_segment; - /* cleanup flag */ - *off++ = !!(sr_has_cleanup(hdr)) << 7; + /* flags */ + *off++ = hdr->flags; /* HMAC Key ID */ memcpy(off, &hmackeyid, 4); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 6124e159c882..85582257d3af 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -55,8 +55,8 @@ static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, }; -int nla_put_srh(struct sk_buff *skb, int attrtype, - struct seg6_iptunnel_encap *tuninfo) +static int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) { struct seg6_iptunnel_encap *data; struct nlattr *nla; @@ -235,7 +235,7 @@ static int seg6_do_srh(struct sk_buff *skb) return 0; } -int seg6_input(struct sk_buff *skb) +static int seg6_input(struct sk_buff *skb) { int err; @@ -251,7 +251,7 @@ int seg6_input(struct sk_buff *skb) return dst_input(skb); } -int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fad992ad4bc8..99853c6e33a8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1380,6 +1380,7 @@ static int ipip6_tunnel_init(struct net_device *dev) err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); if (err) { free_percpu(dev->tstats); + dev->tstats = NULL; return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6b9fc63fd4d2..21c719965b6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -149,8 +149,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 0x1; + if (ipv6_addr_any(&usin->sin6_addr)) { + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; + } addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -189,7 +194,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * TCP over IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; @@ -996,6 +1001,16 @@ drop: return 0; /* don't send reset */ } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, @@ -1184,8 +1199,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) + if (newnp->pktoptions) { + tcp_v6_restore_cb(newnp->pktoptions); skb_set_owner_r(newnp->pktoptions, newsk); + } } } @@ -1200,16 +1217,6 @@ out: return NULL; } -static void tcp_v6_restore_cb(struct sk_buff *skb) -{ - /* We need to move header back to the beginning if xfrm6_policy_check() - * and tcp_v6_fill_cb() are going to be called again. - * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. - */ - memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, - sizeof(struct inet6_skb_parm)); -} - /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b4c6516a3a0c..4e4c401e3bc6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -454,7 +454,7 @@ try_again: return err; csum_copy_err: - if (!__sk_queue_drop_skb(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) { if (is_udp4) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -1046,6 +1046,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; daddr = &sin6->sin6_addr; + if (ipv6_addr_any(daddr) && + ipv6_addr_v4mapped(&np->saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + daddr); break; case AF_INET: goto do_udp_sendmsg; @@ -1308,7 +1312,8 @@ out: return err; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 7e08a4d3d77d..a646f3481240 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -929,23 +929,25 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out_error; } - /* New message, alloc head skb */ - head = alloc_skb(0, sk->sk_allocation); - while (!head) { - kcm_push(kcm); - err = sk_stream_wait_memory(sk, &timeo); - if (err) - goto out_error; - + if (msg_data_left(msg)) { + /* New message, alloc head skb */ head = alloc_skb(0, sk->sk_allocation); - } + while (!head) { + kcm_push(kcm); + err = sk_stream_wait_memory(sk, &timeo); + if (err) + goto out_error; - skb = head; + head = alloc_skb(0, sk->sk_allocation); + } - /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling - * csum_and_copy_from_iter from skb_do_copy_data_nocache. - */ - skb->ip_summed = CHECKSUM_UNNECESSARY; + skb = head; + + /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling + * csum_and_copy_from_iter from skb_do_copy_data_nocache. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + } start: while (msg_data_left(msg)) { @@ -1018,10 +1020,12 @@ wait_for_memory: if (eor) { bool not_busy = skb_queue_empty(&sk->sk_write_queue); - /* Message complete, queue it on send buffer */ - __skb_queue_tail(&sk->sk_write_queue, head); - kcm->seq_skb = NULL; - KCM_STATS_INCR(kcm->stats.tx_msgs); + if (head) { + /* Message complete, queue it on send buffer */ + __skb_queue_tail(&sk->sk_write_queue, head); + kcm->seq_skb = NULL; + KCM_STATS_INCR(kcm->stats.tx_msgs); + } if (msg->msg_flags & MSG_BATCH) { kcm->tx_wait_more = true; @@ -1040,8 +1044,10 @@ wait_for_memory: } else { /* Message not complete, save state */ partial_message: - kcm->seq_skb = head; - kcm_tx_msg(head)->last_skb = skb; + if (head) { + kcm->seq_skb = head; + kcm_tx_msg(head)->last_skb = skb; + } } KCM_STATS_ADD(kcm->stats.tx_bytes, copied); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 8f560f7140a0..aebf281d09ee 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); /* Session reference counts. Incremented when code obtains a reference * to a session. diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b07a859f21bd..c59712057dc8 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <asm/ioctls.h> #include <linux/icmp.h> #include <linux/module.h> #include <linux/skbuff.h> @@ -560,6 +561,30 @@ out: return err ? err : copied; } +int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int amount; + + switch (cmd) { + case SIOCOUTQ: + amount = sk_wmem_alloc_get(sk); + break; + case SIOCINQ: + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + amount = skb ? skb->len : 0; + spin_unlock_bh(&sk->sk_receive_queue.lock); + break; + + default: + return -ENOIOCTLCMD; + } + + return put_user(amount, (int __user *)arg); +} +EXPORT_SYMBOL(l2tp_ioctl); + static struct proto l2tp_ip_prot = { .name = "L2TP/IP", .owner = THIS_MODULE, @@ -568,7 +593,7 @@ static struct proto l2tp_ip_prot = { .bind = l2tp_ip_bind, .connect = l2tp_ip_connect, .disconnect = l2tp_ip_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip_destroy_sock, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 4b06eb415f68..a4abcbc4c09a 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -658,7 +658,8 @@ out: return err < 0 ? err : len; do_confirm: - dst_confirm(dst); + if (msg->msg_flags & MSG_PROBE) + dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; @@ -730,7 +731,7 @@ static struct proto l2tp_ip6_prot = { .bind = l2tp_ip6_bind, .connect = l2tp_ip6_connect, .disconnect = l2tp_ip6_disconnect, - .ioctl = udp_ioctl, + .ioctl = l2tp_ioctl, .destroy = l2tp_ip6_destroy_sock, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 3e821daf9dd4..8bc5a1bd2d45 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) * another trick required to cope with how the PROCOM state * machine works. -acme */ + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; } if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d0e1e804ebd7..5404d0d195cc 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 3891cbd2adea..76e30f4797fb 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,6 +6,7 @@ config MAC80211 select CRYPTO_AES select CRYPTO_CCM select CRYPTO_GCM + select CRYPTO_CMAC select CRC32 ---help--- This option enables the hardware independent IEEE 802.11 diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index d0bd5fff5f0a..2fb65588490c 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -22,126 +22,50 @@ #define CMAC_TLEN_256 16 /* CMAC TLen = 128 bits (16 octets) */ #define AAD_LEN 20 +static const u8 zero[CMAC_TLEN_256]; -void gf_mulx(u8 *pad) -{ - int i, carry; - - carry = pad[0] & 0x80; - for (i = 0; i < AES_BLOCK_SIZE - 1; i++) - pad[i] = (pad[i] << 1) | (pad[i + 1] >> 7); - pad[AES_BLOCK_SIZE - 1] <<= 1; - if (carry) - pad[AES_BLOCK_SIZE - 1] ^= 0x87; -} - -void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, - const u8 *addr[], const size_t *len, u8 *mac, - size_t mac_len) -{ - u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE]; - const u8 *pos, *end; - size_t i, e, left, total_len; - - memset(cbc, 0, AES_BLOCK_SIZE); - - total_len = 0; - for (e = 0; e < num_elem; e++) - total_len += len[e]; - left = total_len; - - e = 0; - pos = addr[0]; - end = pos + len[0]; - - while (left >= AES_BLOCK_SIZE) { - for (i = 0; i < AES_BLOCK_SIZE; i++) { - cbc[i] ^= *pos++; - if (pos >= end) { - e++; - pos = addr[e]; - end = pos + len[e]; - } - } - if (left > AES_BLOCK_SIZE) - crypto_cipher_encrypt_one(tfm, cbc, cbc); - left -= AES_BLOCK_SIZE; - } - - memset(pad, 0, AES_BLOCK_SIZE); - crypto_cipher_encrypt_one(tfm, pad, pad); - gf_mulx(pad); - - if (left || total_len == 0) { - for (i = 0; i < left; i++) { - cbc[i] ^= *pos++; - if (pos >= end) { - e++; - pos = addr[e]; - end = pos + len[e]; - } - } - cbc[left] ^= 0x80; - gf_mulx(pad); - } - - for (i = 0; i < AES_BLOCK_SIZE; i++) - pad[i] ^= cbc[i]; - crypto_cipher_encrypt_one(tfm, pad, pad); - memcpy(mac, pad, mac_len); -} - - -void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, +void ieee80211_aes_cmac(struct crypto_shash *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic) { - const u8 *addr[3]; - size_t len[3]; - u8 zero[CMAC_TLEN]; + SHASH_DESC_ON_STACK(desc, tfm); + u8 out[AES_BLOCK_SIZE]; - memset(zero, 0, CMAC_TLEN); - addr[0] = aad; - len[0] = AAD_LEN; - addr[1] = data; - len[1] = data_len - CMAC_TLEN; - addr[2] = zero; - len[2] = CMAC_TLEN; + desc->tfm = tfm; - aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN); + crypto_shash_init(desc); + crypto_shash_update(desc, aad, AAD_LEN); + crypto_shash_update(desc, data, data_len - CMAC_TLEN); + crypto_shash_finup(desc, zero, CMAC_TLEN, out); + + memcpy(mic, out, CMAC_TLEN); } -void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad, +void ieee80211_aes_cmac_256(struct crypto_shash *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic) { - const u8 *addr[3]; - size_t len[3]; - u8 zero[CMAC_TLEN_256]; + SHASH_DESC_ON_STACK(desc, tfm); - memset(zero, 0, CMAC_TLEN_256); - addr[0] = aad; - len[0] = AAD_LEN; - addr[1] = data; - len[1] = data_len - CMAC_TLEN_256; - addr[2] = zero; - len[2] = CMAC_TLEN_256; + desc->tfm = tfm; - aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN_256); + crypto_shash_init(desc); + crypto_shash_update(desc, aad, AAD_LEN); + crypto_shash_update(desc, data, data_len - CMAC_TLEN_256); + crypto_shash_finup(desc, zero, CMAC_TLEN_256, mic); } -struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], - size_t key_len) +struct crypto_shash *ieee80211_aes_cmac_key_setup(const u8 key[], + size_t key_len) { - struct crypto_cipher *tfm; + struct crypto_shash *tfm; - tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_shash("cmac(aes)", 0, 0); if (!IS_ERR(tfm)) - crypto_cipher_setkey(tfm, key, key_len); + crypto_shash_setkey(tfm, key, key_len); return tfm; } - -void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm) +void ieee80211_aes_cmac_key_free(struct crypto_shash *tfm) { - crypto_free_cipher(tfm); + crypto_free_shash(tfm); } diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index c827e1d5de8b..fef531f42003 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -10,17 +10,14 @@ #define AES_CMAC_H #include <linux/crypto.h> +#include <crypto/hash.h> -void gf_mulx(u8 *pad); -void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, - const u8 *addr[], const size_t *len, u8 *mac, - size_t mac_len); -struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[], - size_t key_len); -void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, +struct crypto_shash *ieee80211_aes_cmac_key_setup(const u8 key[], + size_t key_len); +void ieee80211_aes_cmac(struct crypto_shash *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic); -void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad, +void ieee80211_aes_cmac_256(struct crypto_shash *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic); -void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm); +void ieee80211_aes_cmac_key_free(struct crypto_shash *tfm); #endif /* AES_CMAC_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a0be2f6cd121..ac879bb17870 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -208,8 +208,8 @@ static int ieee80211_nan_change_conf(struct wiphy *wiphy, if (changes & CFG80211_NAN_CONF_CHANGED_PREF) new_conf.master_pref = conf->master_pref; - if (changes & CFG80211_NAN_CONF_CHANGED_DUAL) - new_conf.dual = conf->dual; + if (changes & CFG80211_NAN_CONF_CHANGED_BANDS) + new_conf.bands = conf->bands; ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); if (!ret) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index f62cd0e13c58..5fae001f286c 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -249,12 +249,19 @@ static ssize_t misc_read(struct file *file, char __user *user_buf, struct ieee80211_local *local = file->private_data; /* Max len of each line is 16 characters, plus 9 for 'pending:\n' */ size_t bufsz = IEEE80211_MAX_QUEUES * 16 + 9; - char *buf = kzalloc(bufsz, GFP_KERNEL); - char *pos = buf, *end = buf + bufsz - 1; + char *buf; + char *pos, *end; ssize_t rv; int i; int ln; + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + pos = buf; + end = buf + bufsz - 1; + pos += scnprintf(pos, end - pos, "pending:\n"); for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { @@ -356,6 +363,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); + DEBUGFS_ADD(rate_ctrl_alg); DEBUGFS_ADD(queues); DEBUGFS_ADD(misc); #ifdef CONFIG_PM diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index f6003b8c2c33..42601820db20 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -522,6 +522,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) return; DEBUGFS_ADD(flags); + DEBUGFS_ADD(aid); DEBUGFS_ADD(num_ps_buf_frames); DEBUGFS_ADD(last_seq_ctrl); DEBUGFS_ADD(agg_status); diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c index ecfdd97758a3..3cfb1e2ab7ac 100644 --- a/net/mac80211/fils_aead.c +++ b/net/mac80211/fils_aead.c @@ -9,66 +9,58 @@ #include <crypto/aes.h> #include <crypto/algapi.h> +#include <crypto/hash.h> #include <crypto/skcipher.h> #include "ieee80211_i.h" #include "aes_cmac.h" #include "fils_aead.h" -static int aes_s2v(struct crypto_cipher *tfm, +static void gf_mulx(u8 *pad) +{ + u64 a = get_unaligned_be64(pad); + u64 b = get_unaligned_be64(pad + 8); + + put_unaligned_be64((a << 1) | (b >> 63), pad); + put_unaligned_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0), pad + 8); +} + +static int aes_s2v(struct crypto_shash *tfm, size_t num_elem, const u8 *addr[], size_t len[], u8 *v) { - u8 d[AES_BLOCK_SIZE], tmp[AES_BLOCK_SIZE]; + u8 d[AES_BLOCK_SIZE], tmp[AES_BLOCK_SIZE] = {}; + SHASH_DESC_ON_STACK(desc, tfm); size_t i; - const u8 *data[2]; - size_t data_len[2], data_elems; + + desc->tfm = tfm; /* D = AES-CMAC(K, <zero>) */ - memset(tmp, 0, AES_BLOCK_SIZE); - data[0] = tmp; - data_len[0] = AES_BLOCK_SIZE; - aes_cmac_vector(tfm, 1, data, data_len, d, AES_BLOCK_SIZE); + crypto_shash_digest(desc, tmp, AES_BLOCK_SIZE, d); for (i = 0; i < num_elem - 1; i++) { /* D = dbl(D) xor AES_CMAC(K, Si) */ gf_mulx(d); /* dbl */ - aes_cmac_vector(tfm, 1, &addr[i], &len[i], tmp, - AES_BLOCK_SIZE); + crypto_shash_digest(desc, addr[i], len[i], tmp); crypto_xor(d, tmp, AES_BLOCK_SIZE); } + crypto_shash_init(desc); + if (len[i] >= AES_BLOCK_SIZE) { /* len(Sn) >= 128 */ - size_t j; - const u8 *pos; - /* T = Sn xorend D */ - - /* Use a temporary buffer to perform xorend on Sn (addr[i]) to - * avoid modifying the const input argument. - */ - data[0] = addr[i]; - data_len[0] = len[i] - AES_BLOCK_SIZE; - pos = addr[i] + data_len[0]; - for (j = 0; j < AES_BLOCK_SIZE; j++) - tmp[j] = pos[j] ^ d[j]; - data[1] = tmp; - data_len[1] = AES_BLOCK_SIZE; - data_elems = 2; + crypto_shash_update(desc, addr[i], len[i] - AES_BLOCK_SIZE); + crypto_xor(d, addr[i] + len[i] - AES_BLOCK_SIZE, + AES_BLOCK_SIZE); } else { /* len(Sn) < 128 */ /* T = dbl(D) xor pad(Sn) */ gf_mulx(d); /* dbl */ - memset(tmp, 0, AES_BLOCK_SIZE); - memcpy(tmp, addr[i], len[i]); - tmp[len[i]] = 0x80; - crypto_xor(d, tmp, AES_BLOCK_SIZE); - data[0] = d; - data_len[0] = sizeof(d); - data_elems = 1; + crypto_xor(d, addr[i], len[i]); + d[len[i]] ^= 0x80; } /* V = AES-CMAC(K, T) */ - aes_cmac_vector(tfm, data_elems, data, data_len, v, AES_BLOCK_SIZE); + crypto_shash_finup(desc, d, AES_BLOCK_SIZE, v); return 0; } @@ -80,7 +72,7 @@ static int aes_siv_encrypt(const u8 *key, size_t key_len, size_t len[], u8 *out) { u8 v[AES_BLOCK_SIZE]; - struct crypto_cipher *tfm; + struct crypto_shash *tfm; struct crypto_skcipher *tfm2; struct skcipher_request *req; int res; @@ -95,14 +87,14 @@ static int aes_siv_encrypt(const u8 *key, size_t key_len, /* S2V */ - tfm = crypto_alloc_cipher("aes", 0, 0); + tfm = crypto_alloc_shash("cmac(aes)", 0, 0); if (IS_ERR(tfm)) return PTR_ERR(tfm); /* K1 for S2V */ - res = crypto_cipher_setkey(tfm, key, key_len); + res = crypto_shash_setkey(tfm, key, key_len); if (!res) res = aes_s2v(tfm, num_elem, addr, len, v); - crypto_free_cipher(tfm); + crypto_free_shash(tfm); if (res) return res; @@ -124,7 +116,7 @@ static int aes_siv_encrypt(const u8 *key, size_t key_len, /* CTR */ - tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm2)) { kfree(tmp); return PTR_ERR(tfm2); @@ -157,7 +149,7 @@ static int aes_siv_decrypt(const u8 *key, size_t key_len, size_t num_elem, const u8 *addr[], size_t len[], u8 *out) { - struct crypto_cipher *tfm; + struct crypto_shash *tfm; struct crypto_skcipher *tfm2; struct skcipher_request *req; struct scatterlist src[1], dst[1]; @@ -183,7 +175,7 @@ static int aes_siv_decrypt(const u8 *key, size_t key_len, /* CTR */ - tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0); + tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm2)) return PTR_ERR(tfm2); /* K2 for CTR */ @@ -210,14 +202,14 @@ static int aes_siv_decrypt(const u8 *key, size_t key_len, /* S2V */ - tfm = crypto_alloc_cipher("aes", 0, 0); + tfm = crypto_alloc_shash("cmac(aes)", 0, 0); if (IS_ERR(tfm)) return PTR_ERR(tfm); /* K1 for S2V */ - res = crypto_cipher_setkey(tfm, key, key_len); + res = crypto_shash_setkey(tfm, key, key_len); if (!res) res = aes_s2v(tfm, num_elem, addr, len, check); - crypto_free_cipher(tfm); + crypto_free_shash(tfm); if (res) return res; if (memcmp(check, frame_iv, AES_BLOCK_SIZE) != 0) @@ -272,7 +264,7 @@ int fils_encrypt_assoc_req(struct sk_buff *skb, crypt_len = skb->data + skb->len - encr; skb_put(skb, AES_BLOCK_SIZE); return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len, - encr, crypt_len, 1, addr, len, encr); + encr, crypt_len, 5, addr, len, encr); } int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a31d30713d08..98999d3d5262 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -487,14 +487,14 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp, *old_presp; struct cfg80211_bss *cbss; const struct cfg80211_bss_ies *ies; - u16 capability = 0; + u16 capability = WLAN_CAPABILITY_IBSS; u64 tsf; int ret = 0; sdata_assert_lock(sdata); if (ifibss->privacy) - capability = WLAN_CAPABILITY_PRIVACY; + capability |= WLAN_CAPABILITY_PRIVACY; cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 4aa20cef0859..ebdb80b85dc3 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -93,7 +93,7 @@ struct ieee80211_key { } ccmp; struct { u8 rx_pn[IEEE80211_CMAC_PN_LEN]; - struct crypto_cipher *tfm; + struct crypto_shash *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ } aes_cmac; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 9c23172feba0..c28b0af9c1f2 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -335,7 +335,7 @@ int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata, /* fast-forward to vendor IEs */ offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0); - if (offset) { + if (offset < ifmsh->ie_len) { len = ifmsh->ie_len - offset; data = ifmsh->ie + offset; if (skb_tailroom(skb) < len) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8a6344518674..6e90301154d5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3419,14 +3419,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_cqm_rssi_notify( &sdata->vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW, - GFP_KERNEL); + sig, GFP_KERNEL); } else if (sig > thold && (last_event == 0 || sig > last_event + hyst)) { ifmgd->last_cqm_event_signal = sig; ieee80211_cqm_rssi_notify( &sdata->vif, NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH, - GFP_KERNEL); + sig, GFP_KERNEL); } } @@ -5041,13 +5041,14 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - trace_api_cqm_rssi_notify(sdata, rssi_event); + trace_api_cqm_rssi_notify(sdata, rssi_event, rssi_level); - cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp); + cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, rssi_level, gfp); } EXPORT_SYMBOL(ieee80211_cqm_rssi_notify); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b791c4190564..50ca3828b124 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1391,7 +1391,7 @@ EXPORT_SYMBOL(ieee80211_sta_pspoll); void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - u8 ac = ieee802_1d_to_ac[tid & 7]; + int ac = ieee80211_ac_from_tid(tid); /* * If this AC is not trigger-enabled do nothing unless the diff --git a/net/mac80211/status.c b/net/mac80211/status.c index d6a1bfaa7a81..a3af6e1bfd98 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -95,7 +95,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, */ if (*p & IEEE80211_QOS_CTL_EOSP) *p &= ~IEEE80211_QOS_CTL_EOSP; - ac = ieee802_1d_to_ac[tid & 7]; + ac = ieee80211_ac_from_tid(tid); } else { ac = IEEE80211_AC_BE; } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 92a47afaa989..0d645bc148d0 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1736,21 +1736,21 @@ TRACE_EVENT(drv_start_nan, LOCAL_ENTRY VIF_ENTRY __field(u8, master_pref) - __field(u8, dual) + __field(u8, bands) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT - ", master preference: %u, dual: %d", + ", master preference: %u, bands: 0x%0x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, - __entry->dual + __entry->bands ) ); @@ -1787,7 +1787,7 @@ TRACE_EVENT(drv_nan_change_conf, LOCAL_ENTRY VIF_ENTRY __field(u8, master_pref) - __field(u8, dual) + __field(u8, bands) __field(u32, changes) ), @@ -1795,15 +1795,15 @@ TRACE_EVENT(drv_nan_change_conf, LOCAL_ASSIGN; VIF_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; __entry->changes = changes; ), TP_printk( LOCAL_PR_FMT VIF_PR_FMT - ", master preference: %u, dual: %d, changes: 0x%x", + ", master preference: %u, bands: 0x%0x, changes: 0x%x", LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, - __entry->dual, __entry->changes + __entry->bands, __entry->changes ) ); @@ -1996,23 +1996,26 @@ TRACE_EVENT(api_connection_loss, TRACE_EVENT(api_cqm_rssi_notify, TP_PROTO(struct ieee80211_sub_if_data *sdata, - enum nl80211_cqm_rssi_threshold_event rssi_event), + enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level), - TP_ARGS(sdata, rssi_event), + TP_ARGS(sdata, rssi_event, rssi_level), TP_STRUCT__entry( VIF_ENTRY __field(u32, rssi_event) + __field(s32, rssi_level) ), TP_fast_assign( VIF_ASSIGN; __entry->rssi_event = rssi_event; + __entry->rssi_level = rssi_level; ), TP_printk( - VIF_PR_FMT " event:%d", - VIF_PR_ARG, __entry->rssi_event + VIF_PR_FMT " event:%d rssi:%d", + VIF_PR_ARG, __entry->rssi_event, __entry->rssi_level ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 986de098803d..ba8d7db0a071 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1414,7 +1414,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, txqi->txq.sta = &sta->sta; sta->sta.txq[tid] = &txqi->txq; txqi->txq.tid = tid; - txqi->txq.ac = ieee802_1d_to_ac[tid & 7]; + txqi->txq.ac = ieee80211_ac_from_tid(tid); } else { sdata->vif.txq = &txqi->txq; txqi->txq.tid = 0; @@ -4659,7 +4659,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, enum nl80211_band band) { - int ac = ieee802_1d_to_ac[tid & 7]; + int ac = ieee80211_ac_from_tid(tid); skb_reset_mac_header(skb); skb_set_queue_mapping(skb, ac); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index dfbe9deeb8c4..9b28864cc36a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -467,10 +467,10 @@ config NF_TABLES_NETDEV This option enables support for the "netdev" table. config NFT_EXTHDR - tristate "Netfilter nf_tables IPv6 exthdr module" + tristate "Netfilter nf_tables exthdr module" help This option adds the "exthdr" expression that you can use to match - IPv6 extension headers. + IPv6 extension headers and tcp options. config NFT_META tristate "Netfilter nf_tables meta module" @@ -509,6 +509,12 @@ config NFT_SET_HASH This option adds the "hash" set type that is used to build one-way mappings between matchings and actions. +config NFT_SET_BITMAP + tristate "Netfilter nf_tables bitmap set module" + help + This option adds the "bitmap" set type that is used to build sets + whose keys are smaller or equal to 16 bits. + config NFT_COUNTER tristate "Netfilter nf_tables counter module" help diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6b3034f12661..c9b78e7b342f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o +obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index f8dbacf66795..e19a69787d99 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -353,7 +353,7 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp) } EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) +static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); struct nf_conntrack_helper *helper; @@ -380,7 +380,6 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) add_timer(&exp->timeout); NF_CT_STAT_INC(net, expect_create); - return 0; } /* Race with expectations being used means we could have none to find; OK. */ @@ -464,9 +463,8 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret <= 0) goto out; - ret = nf_ct_expect_insert(expect); - if (ret < 0) - goto out; + nf_ct_expect_insert(expect); + spin_unlock_bh(&nf_conntrack_expect_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report); return ret; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index c3fc14e021ec..24174c520239 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -809,13 +809,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct, exp->tuple.dst.protonum != proto || exp->tuple.dst.u.udp.port != port) continue; - if (!del_timer(&exp->timeout)) - continue; - exp->flags &= ~NF_CT_EXPECT_INACTIVE; - exp->timeout.expires = jiffies + expires * HZ; - add_timer(&exp->timeout); - found = 1; - break; + if (mod_timer_pending(&exp->timeout, jiffies + expires * HZ)) { + exp->flags &= ~NF_CT_EXPECT_INACTIVE; + found = 1; + break; + } } spin_unlock_bh(&nf_conntrack_expect_lock); return found; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 57eeae63f597..ff7304ae58ac 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -240,6 +240,10 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, if (trans == NULL) return NULL; + if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) { + nft_trans_rule_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); + } nft_trans_rule(trans) = rule; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -2293,6 +2297,22 @@ err1: return err; } +static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nlattr *nla) +{ + u32 id = ntohl(nla_get_be32(nla)); + struct nft_trans *trans; + + list_for_each_entry(trans, &net->nft.commit_list, list) { + struct nft_rule *rule = nft_trans_rule(trans); + + if (trans->msg_type == NFT_MSG_NEWRULE && + id == nft_trans_rule_id(trans)) + return rule; + } + return ERR_PTR(-ENOENT); +} + static int nf_tables_delrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -2331,6 +2351,12 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, return PTR_ERR(rule); err = nft_delrule(&ctx, rule); + } else if (nla[NFTA_RULE_ID]) { + rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + err = nft_delrule(&ctx, rule); } else { err = nft_delrule_by_chain(&ctx); } @@ -2398,12 +2424,14 @@ nft_select_set_ops(const struct nlattr * const nla[], features = 0; if (nla[NFTA_SET_FLAGS] != NULL) { features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT; + features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT | + NFT_SET_OBJECT; } - bops = NULL; - best.size = ~0; - best.class = ~0; + bops = NULL; + best.size = ~0; + best.lookup = ~0; + best.space = ~0; list_for_each_entry(ops, &nf_tables_set_ops, list) { if ((ops->features & features) != features) @@ -2413,16 +2441,27 @@ nft_select_set_ops(const struct nlattr * const nla[], switch (policy) { case NFT_SET_POL_PERFORMANCE: - if (est.class < best.class) - break; - if (est.class == best.class && est.size < best.size) + if (est.lookup < best.lookup) break; + if (est.lookup == best.lookup) { + if (!desc->size) { + if (est.space < best.space) + break; + } else if (est.size < best.size) { + break; + } + } continue; case NFT_SET_POL_MEMORY: - if (est.size < best.size) - break; - if (est.size == best.size && est.class < best.class) + if (!desc->size) { + if (est.space < best.space) + break; + if (est.space == best.space && + est.lookup < best.lookup) + break; + } else if (est.size < best.size) { break; + } continue; default: break; @@ -3121,6 +3160,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; + iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -3374,6 +3414,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; + args.iter.flush = false; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -3752,7 +3793,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return 0; err6: - set->ops->remove(set, &elem); + set->ops->remove(ctx->net, set, &elem); err5: kfree(trans); err4: @@ -3898,7 +3939,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, if (!trans) return -ENOMEM; - if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) { + if (!set->ops->flush(ctx->net, set, elem->priv)) { err = -ENOENT; goto err1; } @@ -3936,15 +3977,14 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, return -EBUSY; if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { - struct nft_set_dump_args args = { - .iter = { - .genmask = genmask, - .fn = nft_flush_set, - }, + struct nft_set_iter iter = { + .genmask = genmask, + .fn = nft_flush_set, + .flush = true, }; - set->ops->walk(&ctx, set, &args.iter); + set->ops->walk(&ctx, set, &iter); - return args.iter.err; + return iter.err; } nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { @@ -4804,7 +4844,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_DELSETELEM, 0); - te->set->ops->remove(te->set, &te->elem); + te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); te->set->ndeact--; break; @@ -4925,7 +4965,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(te->set, &te->elem); + te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); break; case NFT_MSG_DELSETELEM: @@ -4959,6 +4999,11 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) return 0; } +static bool nf_tables_valid_genid(struct net *net, u32 genid) +{ + return net->nft.base_seq == genid; +} + static const struct nfnetlink_subsystem nf_tables_subsys = { .name = "nf_tables", .subsys_id = NFNL_SUBSYS_NFTABLES, @@ -4966,6 +5011,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .cb = nf_tables_cb, .commit = nf_tables_commit, .abort = nf_tables_abort, + .valid_genid = nf_tables_valid_genid, }; int nft_chain_validate_dependency(const struct nft_chain *chain, @@ -5091,6 +5137,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, iter.count = 0; iter.err = 0; iter.fn = nf_tables_loop_check_setelem; + iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a09fa9fd8f3d..a2148d0bc50e 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -3,7 +3,7 @@ * * (C) 2001 by Jay Schulist <jschlst@samba.org>, * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> - * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2005-2017 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial netfilter messages via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -100,9 +100,9 @@ int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) } EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); -static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u16 type) { - u_int8_t subsys_id = NFNL_SUBSYS_ID(type); + u8 subsys_id = NFNL_SUBSYS_ID(type); if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; @@ -111,9 +111,9 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t } static inline const struct nfnl_callback * -nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) +nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss) { - u_int8_t cb_id = NFNL_MSG_TYPE(type); + u8 cb_id = NFNL_MSG_TYPE(type); if (cb_id >= ss->cb_count) return NULL; @@ -185,7 +185,7 @@ replay: { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; @@ -273,7 +273,7 @@ enum { }; static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, - u_int16_t subsys_id) + u16 subsys_id, u32 genid) { struct sk_buff *oskb = skb; struct net *net = sock_net(skb->sk); @@ -315,6 +315,12 @@ replay: return kfree_skb(skb); } + if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) { + nfnl_unlock(subsys_id); + netlink_ack(oskb, nlh, -ERESTART); + return kfree_skb(skb); + } + while (skb->len >= nlmsg_total_size(0)) { int msglen, type; @@ -365,7 +371,7 @@ replay: { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); - u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); + u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; @@ -436,11 +442,51 @@ done: kfree_skb(skb); } +static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = { + [NFNL_BATCH_GENID] = { .type = NLA_U32 }, +}; + +static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); + struct nlattr *attr = (void *)nlh + min_len; + struct nlattr *cda[NFNL_BATCH_MAX + 1]; + int attrlen = nlh->nlmsg_len - min_len; + struct nfgenmsg *nfgenmsg; + int msglen, err; + u32 gen_id = 0; + u16 res_id; + + msglen = NLMSG_ALIGN(nlh->nlmsg_len); + if (msglen > skb->len) + msglen = skb->len; + + if (nlh->nlmsg_len < NLMSG_HDRLEN || + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) + return; + + err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy); + if (err < 0) { + netlink_ack(skb, nlh, err); + return; + } + if (cda[NFNL_BATCH_GENID]) + gen_id = ntohl(nla_get_be32(cda[NFNL_BATCH_GENID])); + + nfgenmsg = nlmsg_data(nlh); + skb_pull(skb, msglen); + /* Work around old nft using host byte order */ + if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES) + res_id = NFNL_SUBSYS_NFTABLES; + else + res_id = ntohs(nfgenmsg->res_id); + + nfnetlink_rcv_batch(skb, nlh, res_id, gen_id); +} + static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - u_int16_t res_id; - int msglen; if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) @@ -451,28 +497,10 @@ static void nfnetlink_rcv(struct sk_buff *skb) return; } - if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { - struct nfgenmsg *nfgenmsg; - - msglen = NLMSG_ALIGN(nlh->nlmsg_len); - if (msglen > skb->len) - msglen = skb->len; - - if (nlh->nlmsg_len < NLMSG_HDRLEN || - skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) - return; - - nfgenmsg = nlmsg_data(nlh); - skb_pull(skb, msglen); - /* Work around old nft using host byte order */ - if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES) - res_id = NFNL_SUBSYS_NFTABLES; - else - res_id = ntohs(nfgenmsg->res_id); - nfnetlink_rcv_batch(skb, nlh, res_id); - } else { + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) + nfnetlink_rcv_skb_batch(skb, nlh); + else netlink_rcv_skb(skb, &nfnetlink_rcv_msg); - } } #ifdef CONFIG_MODULES diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 66a2377510e1..c6b8022c0e47 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -32,6 +32,11 @@ struct nft_ct { }; }; +#ifdef CONFIG_NF_CONNTRACK_ZONES +static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template); +static unsigned int nft_ct_pcpu_template_refcnt __read_mostly; +#endif + static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c, enum nft_ct_keys k, enum ip_conntrack_dir d) @@ -151,6 +156,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr, case NFT_CT_PROTOCOL: *dest = nf_ct_protonum(ct); return; +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: { + const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + + if (priv->dir < IP_CT_DIR_MAX) + *dest = nf_ct_zone_id(zone, priv->dir); + else + *dest = zone->id; + + return; + } +#endif default: break; } @@ -179,6 +196,53 @@ err: regs->verdict.code = NFT_BREAK; } +#ifdef CONFIG_NF_CONNTRACK_ZONES +static void nft_ct_set_zone_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR }; + const struct nft_ct *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + enum ip_conntrack_info ctinfo; + u16 value = regs->data[priv->sreg]; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) /* already tracked */ + return; + + zone.id = value; + + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + zone.dir = NF_CT_ZONE_DIR_ORIG; + break; + case IP_CT_DIR_REPLY: + zone.dir = NF_CT_ZONE_DIR_REPL; + break; + default: + break; + } + + ct = this_cpu_read(nft_ct_pcpu_template); + + if (likely(atomic_read(&ct->ct_general.use) == 1)) { + nf_ct_zone_add(ct, &zone); + } else { + /* previous skb got queued to userspace */ + ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); + if (!ct) { + regs->verdict.code = NF_DROP; + return; + } + } + + atomic_inc(&ct->ct_general.use); + nf_ct_set(skb, ct, IP_CT_NEW); +} +#endif + static void nft_ct_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -257,6 +321,45 @@ static void nft_ct_netns_put(struct net *net, uint8_t family) nf_ct_netns_put(net, family); } +#ifdef CONFIG_NF_CONNTRACK_ZONES +static void nft_ct_tmpl_put_pcpu(void) +{ + struct nf_conn *ct; + int cpu; + + for_each_possible_cpu(cpu) { + ct = per_cpu(nft_ct_pcpu_template, cpu); + if (!ct) + break; + nf_ct_put(ct); + per_cpu(nft_ct_pcpu_template, cpu) = NULL; + } +} + +static bool nft_ct_tmpl_alloc_pcpu(void) +{ + struct nf_conntrack_zone zone = { .id = 0 }; + struct nf_conn *tmp; + int cpu; + + if (nft_ct_pcpu_template_refcnt) + return true; + + for_each_possible_cpu(cpu) { + tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL); + if (!tmp) { + nft_ct_tmpl_put_pcpu(); + return false; + } + + atomic_set(&tmp->ct_general.use, 1); + per_cpu(nft_ct_pcpu_template, cpu) = tmp; + } + + return true; +} +#endif + static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -266,6 +369,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); + priv->dir = IP_CT_DIR_MAX; switch (priv->key) { case NFT_CT_DIRECTION: if (tb[NFTA_CT_DIRECTION] != NULL) @@ -333,11 +437,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case NFT_CT_BYTES: case NFT_CT_PKTS: case NFT_CT_AVGPKT: - /* no direction? return sum of original + reply */ - if (tb[NFTA_CT_DIRECTION] == NULL) - priv->dir = IP_CT_DIR_MAX; len = sizeof(u64); break; +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: + len = sizeof(u16); + break; +#endif default: return -EOPNOTSUPP; } @@ -371,15 +477,33 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, return 0; } +static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) +{ + switch (priv->key) { +#ifdef CONFIG_NF_CONNTRACK_LABELS + case NFT_CT_LABELS: + nf_connlabels_put(ctx->net); + break; +#endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: + if (--nft_ct_pcpu_template_refcnt == 0) + nft_ct_tmpl_put_pcpu(); +#endif + default: + break; + } +} + static int nft_ct_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); - bool label_got = false; unsigned int len; int err; + priv->dir = IP_CT_DIR_MAX; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK @@ -397,13 +521,30 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); if (err) return err; - label_got = true; + break; +#endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + case NFT_CT_ZONE: + if (!nft_ct_tmpl_alloc_pcpu()) + return -ENOMEM; + nft_ct_pcpu_template_refcnt++; break; #endif default: return -EOPNOTSUPP; } + if (tb[NFTA_CT_DIRECTION]) { + priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]); + switch (priv->dir) { + case IP_CT_DIR_ORIGINAL: + case IP_CT_DIR_REPLY: + break; + default: + return -EINVAL; + } + } + priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); err = nft_validate_register_load(priv->sreg, len); if (err < 0) @@ -416,8 +557,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, return 0; err1: - if (label_got) - nf_connlabels_put(ctx->net); + __nft_ct_set_destroy(ctx, priv); return err; } @@ -432,16 +572,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, { struct nft_ct *priv = nft_expr_priv(expr); - switch (priv->key) { -#ifdef CONFIG_NF_CONNTRACK_LABELS - case NFT_CT_LABELS: - nf_connlabels_put(ctx->net); - break; -#endif - default: - break; - } - + __nft_ct_set_destroy(ctx, priv); nft_ct_netns_put(ctx->net, ctx->afi->family); } @@ -465,6 +596,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) case NFT_CT_BYTES: case NFT_CT_PKTS: case NFT_CT_AVGPKT: + case NFT_CT_ZONE: if (priv->dir < IP_CT_DIR_MAX && nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) goto nla_put_failure; @@ -487,6 +619,17 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; + + switch (priv->key) { + case NFT_CT_ZONE: + if (priv->dir < IP_CT_DIR_MAX && + nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir)) + goto nla_put_failure; + break; + default: + break; + } + return 0; nla_put_failure: @@ -512,6 +655,17 @@ static const struct nft_expr_ops nft_ct_set_ops = { .dump = nft_ct_set_dump, }; +#ifdef CONFIG_NF_CONNTRACK_ZONES +static const struct nft_expr_ops nft_ct_set_zone_ops = { + .type = &nft_ct_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), + .eval = nft_ct_set_zone_eval, + .init = nft_ct_set_init, + .destroy = nft_ct_set_destroy, + .dump = nft_ct_set_dump, +}; +#endif + static const struct nft_expr_ops * nft_ct_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -525,8 +679,13 @@ nft_ct_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_CT_DREG]) return &nft_ct_get_ops; - if (tb[NFTA_CT_SREG]) + if (tb[NFTA_CT_SREG]) { +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE)) + return &nft_ct_set_zone_ops; +#endif return &nft_ct_set_ops; + } return ERR_PTR(-EINVAL); } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 47beb3abcc9d..c308920b194c 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -15,19 +15,29 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> -// FIXME: -#include <net/ipv6.h> +#include <net/tcp.h> struct nft_exthdr { u8 type; u8 offset; u8 len; + u8 op; enum nft_registers dreg:8; + u8 flags; }; -static void nft_exthdr_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static unsigned int optlen(const u8 *opt, unsigned int offset) +{ + /* Beware zero-length options: make finite progress */ + if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0) + return 1; + else + return opt[offset + 1]; +} + +static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; @@ -35,8 +45,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr, int err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); - if (err < 0) + if (priv->flags & NFT_EXTHDR_F_PRESENT) { + *dest = (err >= 0); + return; + } else if (err < 0) { goto err; + } offset += priv->offset; dest[priv->len / NFT_REG32_SIZE] = 0; @@ -47,11 +61,59 @@ err: regs->verdict.code = NFT_BREAK; } +static void nft_exthdr_tcp_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; + struct nft_exthdr *priv = nft_expr_priv(expr); + unsigned int i, optl, tcphdr_len, offset; + u32 *dest = ®s->data[priv->dreg]; + struct tcphdr *tcph; + u8 *opt; + + if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP) + goto err; + + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff); + if (!tcph) + goto err; + + tcphdr_len = __tcp_hdrlen(tcph); + if (tcphdr_len < sizeof(*tcph)) + goto err; + + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff); + if (!tcph) + goto err; + + opt = (u8 *)tcph; + for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { + optl = optlen(opt, i); + + if (priv->type != opt[i]) + continue; + + if (i + optl > tcphdr_len || priv->len + priv->offset > optl) + goto err; + + offset = i + priv->offset; + dest[priv->len / NFT_REG32_SIZE] = 0; + memcpy(dest, opt + offset, priv->len); + + return; + } + +err: + regs->verdict.code = NFT_BREAK; +} + static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, + [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, }; static int nft_exthdr_init(const struct nft_ctx *ctx, @@ -59,13 +121,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len; + u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; int err; - if (tb[NFTA_EXTHDR_DREG] == NULL || - tb[NFTA_EXTHDR_TYPE] == NULL || - tb[NFTA_EXTHDR_OFFSET] == NULL || - tb[NFTA_EXTHDR_LEN] == NULL) + if (!tb[NFTA_EXTHDR_DREG] || + !tb[NFTA_EXTHDR_TYPE] || + !tb[NFTA_EXTHDR_OFFSET] || + !tb[NFTA_EXTHDR_LEN]) return -EINVAL; err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); @@ -76,10 +138,27 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (tb[NFTA_EXTHDR_FLAGS]) { + err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags); + if (err < 0) + return err; + + if (flags & ~NFT_EXTHDR_F_PRESENT) + return -EINVAL; + } + + if (tb[NFTA_EXTHDR_OP]) { + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); + if (err < 0) + return err; + } + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); + priv->flags = flags; + priv->op = op; return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, priv->len); @@ -97,6 +176,10 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op))) + goto nla_put_failure; return 0; nla_put_failure: @@ -104,17 +187,45 @@ nla_put_failure: } static struct nft_expr_type nft_exthdr_type; -static const struct nft_expr_ops nft_exthdr_ops = { +static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), - .eval = nft_exthdr_eval, + .eval = nft_exthdr_ipv6_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, }; +static const struct nft_expr_ops nft_exthdr_tcp_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_tcp_eval, + .init = nft_exthdr_init, + .dump = nft_exthdr_dump, +}; + +static const struct nft_expr_ops * +nft_exthdr_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + u32 op; + + if (!tb[NFTA_EXTHDR_OP]) + return &nft_exthdr_ipv6_ops; + + op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP])); + switch (op) { + case NFT_EXTHDR_OP_TCPOPT: + return &nft_exthdr_tcp_ops; + case NFT_EXTHDR_OP_IPV6: + return &nft_exthdr_ipv6_ops; + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_exthdr_type __read_mostly = { .name = "exthdr", - .ops = &nft_exthdr_ops, + .select_ops = &nft_exthdr_select_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c new file mode 100644 index 000000000000..97f9649bcc7e --- /dev/null +++ b/net/netfilter/nft_set_bitmap.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +/* This bitmap uses two bits to represent one element. These two bits determine + * the element state in the current and the future generation. + * + * An element can be in three states. The generation cursor is represented using + * the ^ character, note that this cursor shifts on every succesful transaction. + * If no transaction is going on, we observe all elements are in the following + * state: + * + * 11 = this element is active in the current generation. In case of no updates, + * ^ it stays active in the next generation. + * 00 = this element is inactive in the current generation. In case of no + * ^ updates, it stays inactive in the next generation. + * + * On transaction handling, we observe these two temporary states: + * + * 01 = this element is inactive in the current generation and it becomes active + * ^ in the next one. This happens when the element is inserted but commit + * path has not yet been executed yet, so activation is still pending. On + * transaction abortion, the element is removed. + * 10 = this element is active in the current generation and it becomes inactive + * ^ in the next one. This happens when the element is deactivated but commit + * path has not yet been executed yet, so removal is still pending. On + * transation abortion, the next generation bit is reset to go back to + * restore its previous state. + */ +struct nft_bitmap { + u16 bitmap_size; + u8 bitmap[]; +}; + +static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off) +{ + u32 k = (key << 1); + + *idx = k / BITS_PER_BYTE; + *off = k % BITS_PER_BYTE; +} + +/* Fetch the two bits that represent the element and check if it is active based + * on the generation mask. + */ +static inline bool +nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask) +{ + return (bitmap[idx] & (0x3 << off)) & (genmask << off); +} + +static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + u32 idx, off; + + nft_bitmap_location(*key, &idx, &off); + + return nft_bitmap_active(priv->bitmap, idx, off, genmask); +} + +static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **_ext) +{ + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext *ext = elem->priv; + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + if (nft_bitmap_active(priv->bitmap, idx, off, genmask)) + return -EEXIST; + + /* Enter 01 state. */ + priv->bitmap[idx] |= (genmask << off); + + return 0; +} + +static void nft_bitmap_remove(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext *ext = elem->priv; + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + /* Enter 00 state. */ + priv->bitmap[idx] &= ~(genmask << off); +} + +static void nft_bitmap_activate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext *ext = elem->priv; + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + /* Enter 11 state. */ + priv->bitmap[idx] |= (genmask << off); +} + +static bool nft_bitmap_flush(const struct net *net, + const struct nft_set *set, void *ext) +{ + struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); + u32 idx, off; + + nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + /* Enter 10 state, similar to deactivation. */ + priv->bitmap[idx] &= ~(genmask << off); + + return true; +} + +static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_set_ext_tmpl tmpl; + struct nft_set_ext *ext; + + nft_set_ext_prepare(&tmpl); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + + ext = kzalloc(tmpl.len, GFP_KERNEL); + if (!ext) + return NULL; + + nft_set_ext_init(ext, &tmpl); + memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen); + + return ext; +} + +static void *nft_bitmap_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); + struct nft_set_ext *ext; + u32 idx, off, key = 0; + + memcpy(&key, elem->key.val.data, set->klen); + nft_bitmap_location(key, &idx, &off); + + if (!nft_bitmap_active(priv->bitmap, idx, off, genmask)) + return NULL; + + /* We have no real set extension since this is a bitmap, allocate this + * dummy object that is released from the commit/abort path. + */ + ext = nft_bitmap_ext_alloc(set, elem); + if (!ext) + return NULL; + + /* Enter 10 state. */ + priv->bitmap[idx] &= ~(genmask << off); + + return ext; +} + +static void nft_bitmap_walk(const struct nft_ctx *ctx, + struct nft_set *set, + struct nft_set_iter *iter) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + struct nft_set_ext_tmpl tmpl; + struct nft_set_elem elem; + struct nft_set_ext *ext; + int idx, off; + u16 key; + + nft_set_ext_prepare(&tmpl); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + + for (idx = 0; idx < priv->bitmap_size; idx++) { + for (off = 0; off < BITS_PER_BYTE; off += 2) { + if (iter->count < iter->skip) + goto cont; + + if (!nft_bitmap_active(priv->bitmap, idx, off, + iter->genmask)) + goto cont; + + ext = kzalloc(tmpl.len, GFP_KERNEL); + if (!ext) { + iter->err = -ENOMEM; + return; + } + nft_set_ext_init(ext, &tmpl); + key = ((idx * BITS_PER_BYTE) + off) >> 1; + memcpy(nft_set_ext_key(ext), &key, set->klen); + + elem.priv = ext; + iter->err = iter->fn(ctx, set, iter, &elem); + + /* On set flush, this dummy extension object is released + * from the commit/abort path. + */ + if (!iter->flush) + kfree(ext); + + if (iter->err < 0) + return; +cont: + iter->count++; + } + } +} + +/* The bitmap size is pow(2, key length in bits) / bits per byte. This is + * multiplied by two since each element takes two bits. For 8 bit keys, the + * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes. + */ +static inline u32 nft_bitmap_size(u32 klen) +{ + return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1; +} + +static inline u32 nft_bitmap_total_size(u32 klen) +{ + return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); +} + +static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[]) +{ + u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); + + return nft_bitmap_total_size(klen); +} + +static int nft_bitmap_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const nla[]) +{ + struct nft_bitmap *priv = nft_set_priv(set); + + priv->bitmap_size = nft_bitmap_total_size(set->klen); + + return 0; +} + +static void nft_bitmap_destroy(const struct nft_set *set) +{ +} + +static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + /* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */ + if (desc->klen > 2) + return false; + + est->size = nft_bitmap_total_size(desc->klen); + est->lookup = NFT_SET_CLASS_O_1; + est->space = NFT_SET_CLASS_O_1; + + return true; +} + +static struct nft_set_ops nft_bitmap_ops __read_mostly = { + .privsize = nft_bitmap_privsize, + .estimate = nft_bitmap_estimate, + .init = nft_bitmap_init, + .destroy = nft_bitmap_destroy, + .insert = nft_bitmap_insert, + .remove = nft_bitmap_remove, + .deactivate = nft_bitmap_deactivate, + .flush = nft_bitmap_flush, + .activate = nft_bitmap_activate, + .lookup = nft_bitmap_lookup, + .walk = nft_bitmap_walk, + .owner = THIS_MODULE, +}; + +static int __init nft_bitmap_module_init(void) +{ + return nft_register_set(&nft_bitmap_ops); +} + +static void __exit nft_bitmap_module_exit(void) +{ + nft_unregister_set(&nft_bitmap_ops); +} + +module_init(nft_bitmap_module_init); +module_exit(nft_bitmap_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index e36069fb76ae..5f652720fc78 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -167,8 +167,8 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_clear_busy(&he->ext); } -static bool nft_hash_deactivate_one(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_hash_flush(const struct net *net, + const struct nft_set *set, void *priv) { struct nft_hash_elem *he = priv; @@ -195,7 +195,7 @@ static void *nft_hash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he != NULL && - !nft_hash_deactivate_one(net, set, he)) + !nft_hash_flush(net, set, he)) he = NULL; rcu_read_unlock(); @@ -203,7 +203,8 @@ static void *nft_hash_deactivate(const struct net *net, return he; } -static void nft_hash_remove(const struct nft_set *set, +static void nft_hash_remove(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); @@ -383,7 +384,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, est->size = esize + 2 * sizeof(struct nft_hash_elem *); } - est->class = NFT_SET_CLASS_O_1; + est->lookup = NFT_SET_CLASS_O_1; + est->space = NFT_SET_CLASS_O_N; return true; } @@ -397,12 +399,12 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, - .deactivate_one = nft_hash_deactivate_one, + .flush = nft_hash_flush, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .update = nft_hash_update, .walk = nft_hash_walk, - .features = NFT_SET_MAP | NFT_SET_TIMEOUT, + .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index f06f55ee516d..71e8fb886a73 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -151,7 +151,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, return err; } -static void nft_rbtree_remove(const struct nft_set *set, +static void nft_rbtree_remove(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree *priv = nft_set_priv(set); @@ -171,8 +172,8 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } -static bool nft_rbtree_deactivate_one(const struct net *net, - const struct nft_set *set, void *priv) +static bool nft_rbtree_flush(const struct net *net, + const struct nft_set *set, void *priv) { struct nft_rbtree_elem *rbe = priv; @@ -213,7 +214,7 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_right; continue; } - nft_rbtree_deactivate_one(net, set, rbe); + nft_rbtree_flush(net, set, rbe); return rbe; } } @@ -290,7 +291,8 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, else est->size = nsize; - est->class = NFT_SET_CLASS_O_LOG_N; + est->lookup = NFT_SET_CLASS_O_LOG_N; + est->space = NFT_SET_CLASS_O_N; return true; } @@ -304,11 +306,11 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, .deactivate = nft_rbtree_deactivate, - .deactivate_one = nft_rbtree_deactivate_one, + .flush = nft_rbtree_flush, .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, - .features = NFT_SET_INTERVAL | NFT_SET_MAP, + .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, .owner = THIS_MODULE, }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index efa9a8858cc6..b1beb2b94ec7 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1074,6 +1074,8 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_ZONE: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: + case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: err = -EINVAL; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index fbffe0ea4c4f..c2d452eab0c5 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -65,6 +65,7 @@ struct ovs_conntrack_info { struct nf_conn *ct; u8 commit : 1; u8 nat : 3; /* enum ovs_ct_nat */ + u8 force : 1; u16 family; struct md_mark mark; struct md_labels labels; @@ -73,6 +74,8 @@ struct ovs_conntrack_info { #endif }; +static bool labels_nonzero(const struct ovs_key_ct_labels *labels); + static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); static u16 key_to_nfproto(const struct sw_flow_key *key) @@ -129,21 +132,33 @@ static u32 ovs_ct_get_mark(const struct nf_conn *ct) #endif } +/* Guard against conntrack labels max size shrinking below 128 bits. */ +#if NF_CT_LABELS_MAX_SIZE < 16 +#error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes +#endif + static void ovs_ct_get_labels(const struct nf_conn *ct, struct ovs_key_ct_labels *labels) { struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; - if (cl) { - size_t len = sizeof(cl->bits); + if (cl) + memcpy(labels, cl->bits, OVS_CT_LABELS_LEN); + else + memset(labels, 0, OVS_CT_LABELS_LEN); +} - if (len > OVS_CT_LABELS_LEN) - len = OVS_CT_LABELS_LEN; - else if (len < OVS_CT_LABELS_LEN) - memset(labels, 0, OVS_CT_LABELS_LEN); - memcpy(labels, cl->bits, len); +static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key, + const struct nf_conntrack_tuple *orig, + u8 icmp_proto) +{ + key->ct_orig_proto = orig->dst.protonum; + if (orig->dst.protonum == icmp_proto) { + key->ct.orig_tp.src = htons(orig->dst.u.icmp.type); + key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code); } else { - memset(labels, 0, OVS_CT_LABELS_LEN); + key->ct.orig_tp.src = orig->src.u.all; + key->ct.orig_tp.dst = orig->dst.u.all; } } @@ -151,13 +166,42 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, const struct nf_conntrack_zone *zone, const struct nf_conn *ct) { - key->ct.state = state; - key->ct.zone = zone->id; + key->ct_state = state; + key->ct_zone = zone->id; key->ct.mark = ovs_ct_get_mark(ct); ovs_ct_get_labels(ct, &key->ct.labels); + + if (ct) { + const struct nf_conntrack_tuple *orig; + + /* Use the master if we have one. */ + if (ct->master) + ct = ct->master; + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + + /* IP version must match with the master connection. */ + if (key->eth.type == htons(ETH_P_IP) && + nf_ct_l3num(ct) == NFPROTO_IPV4) { + key->ipv4.ct_orig.src = orig->src.u3.ip; + key->ipv4.ct_orig.dst = orig->dst.u3.ip; + __ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP); + return; + } else if (key->eth.type == htons(ETH_P_IPV6) && + !sw_flow_key_is_nd(key) && + nf_ct_l3num(ct) == NFPROTO_IPV6) { + key->ipv6.ct_orig.src = orig->src.u3.in6; + key->ipv6.ct_orig.dst = orig->dst.u3.in6; + __ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP); + return; + } + } + /* Clear 'ct_orig_proto' to mark the non-existence of conntrack + * original direction key fields. + */ + key->ct_orig_proto = 0; } -/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has +/* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has * previously sent the packet to conntrack via the ct action. If * 'keep_nat_flags' is true, the existing NAT flags retained, else they are * initialized from the connection status. @@ -184,7 +228,7 @@ static void ovs_ct_update_key(const struct sk_buff *skb, if (ct->master) state |= OVS_CS_F_RELATED; if (keep_nat_flags) { - state |= key->ct.state & OVS_CS_F_NAT_MASK; + state |= key->ct_state & OVS_CS_F_NAT_MASK; } else { if (ct->status & IPS_SRC_NAT) state |= OVS_CS_F_SRC_NAT; @@ -208,44 +252,69 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) ovs_ct_update_key(skb, NULL, key, false, false); } -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) +#define IN6_ADDR_INITIALIZER(ADDR) \ + { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ + (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } + +int ovs_ct_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) { - if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) + if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct_state)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && - nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) + nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct_zone)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && - nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) + nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && - nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), - &key->ct.labels)) + nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels), + &output->ct.labels)) return -EMSGSIZE; + if (swkey->ct_orig_proto) { + if (swkey->eth.type == htons(ETH_P_IP)) { + struct ovs_key_ct_tuple_ipv4 orig = { + output->ipv4.ct_orig.src, + output->ipv4.ct_orig.dst, + output->ct.orig_tp.src, + output->ct.orig_tp.dst, + output->ct_orig_proto, + }; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, + sizeof(orig), &orig)) + return -EMSGSIZE; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { + struct ovs_key_ct_tuple_ipv6 orig = { + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), + IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), + output->ct.orig_tp.src, + output->ct.orig_tp.dst, + output->ct_orig_proto, + }; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, + sizeof(orig), &orig)) + return -EMSGSIZE; + } + } + return 0; } -static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, +static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, u32 ct_mark, u32 mask) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; u32 new_mark; - /* The connection could be invalid, in which case set_mark is no-op. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct) - return 0; - new_mark = ct_mark | (ct->mark & ~(mask)); if (ct->mark != new_mark) { ct->mark = new_mark; - nf_conntrack_event_cache(IPCT_MARK, ct); + if (nf_ct_is_confirmed(ct)) + nf_conntrack_event_cache(IPCT_MARK, ct); key->ct.mark = new_mark; } @@ -255,34 +324,80 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, #endif } -static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, - const struct ovs_key_ct_labels *labels, - const struct ovs_key_ct_labels *mask) +static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct) { - enum ip_conntrack_info ctinfo; struct nf_conn_labels *cl; - struct nf_conn *ct; - int err; - - /* The connection could be invalid, in which case set_label is no-op.*/ - ct = nf_ct_get(skb, &ctinfo); - if (!ct) - return 0; cl = nf_ct_labels_find(ct); if (!cl) { nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } - if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN) + + return cl; +} + +/* Initialize labels for a new, yet to be committed conntrack entry. Note that + * since the new connection is not yet confirmed, and thus no-one else has + * access to it's labels, we simply write them over. Also, we refrain from + * triggering events, as receiving change events before the create event would + * be confusing. + */ +static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key, + const struct ovs_key_ct_labels *labels, + const struct ovs_key_ct_labels *mask) +{ + struct nf_conn_labels *cl, *master_cl; + bool have_mask = labels_nonzero(mask); + + /* Inherit master's labels to the related connection? */ + master_cl = ct->master ? nf_ct_labels_find(ct->master) : NULL; + + if (!master_cl && !have_mask) + return 0; /* Nothing to do. */ + + cl = ovs_ct_get_conn_labels(ct); + if (!cl) + return -ENOSPC; + + /* Inherit the master's labels, if any. */ + if (master_cl) + *cl = *master_cl; + + if (have_mask) { + u32 *dst = (u32 *)cl->bits; + int i; + + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) + dst[i] = (dst[i] & ~mask->ct_labels_32[i]) | + (labels->ct_labels_32[i] + & mask->ct_labels_32[i]); + } + + memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); + + return 0; +} + +static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key, + const struct ovs_key_ct_labels *labels, + const struct ovs_key_ct_labels *mask) +{ + struct nf_conn_labels *cl; + int err; + + cl = ovs_ct_get_conn_labels(ct); + if (!cl) return -ENOSPC; - err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, - OVS_CT_LABELS_LEN / sizeof(u32)); + err = nf_connlabels_replace(ct, labels->ct_labels_32, + mask->ct_labels_32, + OVS_CT_LABELS_LEN_32); if (err) return err; - ovs_ct_get_labels(ct, &key->ct.labels); + memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN); + return 0; } @@ -421,16 +536,16 @@ ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) /* Find an existing connection which this packet belongs to without * re-attributing statistics or modifying the connection state. This allows an - * skb->nfct lost due to an upcall to be recovered during actions execution. + * skb->_nfct lost due to an upcall to be recovered during actions execution. * * Must be called with rcu_read_lock. * - * On success, populates skb->nfct and skb->nfctinfo, and returns the - * connection. Returns NULL if there is no existing entry. + * On success, populates skb->_nfct and returns the connection. Returns NULL + * if there is no existing entry. */ static struct nf_conn * ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, - u8 l3num, struct sk_buff *skb) + u8 l3num, struct sk_buff *skb, bool natted) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -453,6 +568,17 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, return NULL; } + /* Must invert the tuple if skb has been transformed by NAT. */ + if (natted) { + struct nf_conntrack_tuple inverse; + + if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) { + pr_debug("ovs_ct_find_existing: Inversion failed!\n"); + return NULL; + } + tuple = inverse; + } + /* look for tuple match */ h = nf_conntrack_find_get(net, zone, &tuple); if (!h) @@ -460,11 +586,18 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ct = nf_ct_tuplehash_to_ctrack(h); + /* Inverted packet tuple matches the reverse direction conntrack tuple, + * select the other tuplehash to get the right 'ctinfo' bits for this + * packet. + */ + if (natted) + h = &ct->tuplehash[!h->tuple.dst.dir]; + nf_ct_set(skb, ct, ovs_ct_get_info(h)); return ct; } -/* Determine whether skb->nfct is equal to the result of conntrack lookup. */ +/* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ static bool skb_nfct_cached(struct net *net, const struct sw_flow_key *key, const struct ovs_conntrack_info *info, @@ -475,14 +608,19 @@ static bool skb_nfct_cached(struct net *net, ct = nf_ct_get(skb, &ctinfo); /* If no ct, check if we have evidence that an existing conntrack entry - * might be found for this skb. This happens when we lose a skb->nfct + * might be found for this skb. This happens when we lose a skb->_nfct * due to an upcall. If the connection was not confirmed, it is not * cached and needs to be run through conntrack again. */ - if (!ct && key->ct.state & OVS_CS_F_TRACKED && - !(key->ct.state & OVS_CS_F_INVALID) && - key->ct.zone == info->zone.id) - ct = ovs_ct_find_existing(net, &info->zone, info->family, skb); + if (!ct && key->ct_state & OVS_CS_F_TRACKED && + !(key->ct_state & OVS_CS_F_INVALID) && + key->ct_zone == info->zone.id) { + ct = ovs_ct_find_existing(net, &info->zone, info->family, skb, + !!(key->ct_state + & OVS_CS_F_NAT_MASK)); + if (ct) + nf_ct_get(skb, &ctinfo); + } if (!ct) return false; if (!net_eq(net, read_pnet(&ct->ct_net))) @@ -496,6 +634,18 @@ static bool skb_nfct_cached(struct net *net, if (help && rcu_access_pointer(help->helper) != info->helper) return false; } + /* Force conntrack entry direction to the current packet? */ + if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { + /* Delete the conntrack entry if confirmed, else just release + * the reference. + */ + if (nf_ct_is_confirmed(ct)) + nf_ct_delete(ct, 0, 0); + else + nf_conntrack_put(&ct->ct_general); + nf_ct_set(skb, NULL, 0); + return false; + } return true; } @@ -590,7 +740,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key, if (maniptype == NF_NAT_MANIP_SRC) { __be16 src; - key->ct.state |= OVS_CS_F_SRC_NAT; + key->ct_state |= OVS_CS_F_SRC_NAT; if (key->eth.type == htons(ETH_P_IP)) key->ipv4.addr.src = ip_hdr(skb)->saddr; else if (key->eth.type == htons(ETH_P_IPV6)) @@ -612,7 +762,7 @@ static void ovs_nat_update_key(struct sw_flow_key *key, } else { __be16 dst; - key->ct.state |= OVS_CS_F_DST_NAT; + key->ct_state |= OVS_CS_F_DST_NAT; if (key->eth.type == htons(ETH_P_IP)) key->ipv4.addr.dst = ip_hdr(skb)->daddr; else if (key->eth.type == htons(ETH_P_IPV6)) @@ -699,7 +849,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if * not done already. Update key with new CT state after passing the packet * through conntrack. - * Note that if the packet is deemed invalid by conntrack, skb->nfct will be + * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be * set to NULL and 0 will be returned. */ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, @@ -736,7 +886,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * NAT after the nf_conntrack_in() call. We can actually clear * the whole state, as it will be re-initialized below. */ - key->ct.state = 0; + key->ct_state = 0; /* Update the key, but keep the NAT flags. */ ovs_ct_update_key(skb, info, key, true, true); @@ -752,9 +902,9 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * * NAT will be done only if the CT action has NAT, and only * once per packet (per zone), as guarded by the NAT bits in - * the key->ct.state. + * the key->ct_state. */ - if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) && + if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) && (nf_ct_is_confirmed(ct) || info->commit) && ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { return -EINVAL; @@ -830,8 +980,8 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels) { size_t i; - for (i = 0; i < sizeof(*labels); i++) - if (labels->ct_labels[i]) + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) + if (labels->ct_labels_32[i]) return true; return false; @@ -842,24 +992,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; int err; err = __ovs_ct_lookup(net, key, info, skb); if (err) return err; + /* The connection could be invalid, in which case this is a no-op.*/ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return 0; + /* Apply changes before confirming the connection so that the initial * conntrack NEW netlink event carries the values given in the CT * action. */ if (info->mark.mask) { - err = ovs_ct_set_mark(skb, key, info->mark.value, + err = ovs_ct_set_mark(ct, key, info->mark.value, info->mark.mask); if (err) return err; } - if (labels_nonzero(&info->labels.mask)) { - err = ovs_ct_set_labels(skb, key, &info->labels.value, + if (!nf_ct_is_confirmed(ct)) { + err = ovs_ct_init_labels(ct, key, &info->labels.value, + &info->labels.mask); + if (err) + return err; + } else if (labels_nonzero(&info->labels.mask)) { + err = ovs_ct_set_labels(ct, key, &info->labels.value, &info->labels.mask); if (err) return err; @@ -1061,6 +1223,7 @@ static int parse_nat(const struct nlattr *attr, static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, + [OVS_CT_ATTR_FORCE_COMMIT] = { .minlen = 0, .maxlen = 0 }, [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), .maxlen = sizeof(u16) }, [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), @@ -1100,6 +1263,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, } switch (type) { + case OVS_CT_ATTR_FORCE_COMMIT: + info->force = true; + /* fall through. */ case OVS_CT_ATTR_COMMIT: info->commit = true; break; @@ -1326,7 +1492,9 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, if (!start) return -EMSGSIZE; - if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT)) + if (ct_info->commit && nla_put_flag(skb, ct_info->force + ? OVS_CT_ATTR_FORCE_COMMIT + : OVS_CT_ATTR_COMMIT)) return -EMSGSIZE; if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 8f6230bd6183..bc7efd1867ab 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -32,7 +32,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, const struct ovs_conntrack_info *); void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); -int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb); +int ovs_ct_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ @@ -75,13 +76,18 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, static inline void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) { - key->ct.state = 0; - key->ct.zone = 0; + key->ct_state = 0; + key->ct_zone = 0; key->ct.mark = 0; memset(&key->ct.labels, 0, sizeof(key->ct.labels)); + /* Clear 'ct_orig_proto' to mark the non-existence of original + * direction key fields. + */ + key->ct_orig_proto = 0; } -static inline int ovs_ct_put_key(const struct sw_flow_key *key, +static inline int ovs_ct_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) { return 0; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2c0a00f7f1b7..9d4bb8eb63f2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -765,7 +765,7 @@ static int key_extract_mac_proto(struct sk_buff *skb) int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { - int res; + int res, err; /* Extract metadata from packet. */ if (tun_info) { @@ -792,7 +792,6 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->phy.priority = skb->priority; key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.skb_mark = skb->mark; - ovs_ct_fill_key(skb, key); key->ovs_flow_hash = 0; res = key_extract_mac_proto(skb); if (res < 0) @@ -800,17 +799,26 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->mac_proto = res; key->recirc_id = 0; - return key_extract(skb, key); + err = key_extract(skb, key); + if (!err) + ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */ + return err; } int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, struct sk_buff *skb, struct sw_flow_key *key, bool log) { + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; + u64 attrs = 0; int err; + err = parse_flow_nlattrs(attr, a, &attrs, log); + if (err) + return -EINVAL; + /* Extract metadata from netlink attributes. */ - err = ovs_nla_get_flow_metadata(net, attr, key, log); + err = ovs_nla_get_flow_metadata(net, a, attrs, key, log); if (err) return err; @@ -824,5 +832,21 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, */ skb->protocol = key->eth.type; - return key_extract(skb, key); + err = key_extract(skb, key); + if (err) + return err; + + /* Check that we have conntrack original direction tuple metadata only + * for packets for which it makes sense. Otherwise the key may be + * corrupted due to overlapping key fields. + */ + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) && + key->eth.type != htons(ETH_P_IP)) + return -EINVAL; + if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) && + (key->eth.type != htons(ETH_P_IPV6) || + sw_flow_key_is_nd(key))) + return -EINVAL; + + return 0; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index f61cae7f9030..a9bc1c875965 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -85,6 +85,11 @@ struct sw_flow_key { struct vlan_head cvlan; __be16 type; /* Ethernet frame type. */ } eth; + /* Filling a hole of two bytes. */ + u8 ct_state; + u8 ct_orig_proto; /* CT original direction tuple IP + * protocol. + */ union { struct { __be32 top_lse; /* top label stack entry */ @@ -96,6 +101,7 @@ struct sw_flow_key { u8 frag; /* One of OVS_FRAG_TYPE_*. */ } ip; }; + u16 ct_zone; /* Conntrack zone. */ struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ @@ -107,10 +113,16 @@ struct sw_flow_key { __be32 src; /* IP source address. */ __be32 dst; /* IP destination address. */ } addr; - struct { - u8 sha[ETH_ALEN]; /* ARP source hardware address. */ - u8 tha[ETH_ALEN]; /* ARP target hardware address. */ - } arp; + union { + struct { + __be32 src; + __be32 dst; + } ct_orig; /* Conntrack original direction fields. */ + struct { + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ + } arp; + }; } ipv4; struct { struct { @@ -118,23 +130,40 @@ struct sw_flow_key { struct in6_addr dst; /* IPv6 destination address. */ } addr; __be32 label; /* IPv6 flow label. */ - struct { - struct in6_addr target; /* ND target address. */ - u8 sll[ETH_ALEN]; /* ND source link layer address. */ - u8 tll[ETH_ALEN]; /* ND target link layer address. */ - } nd; + union { + struct { + struct in6_addr src; + struct in6_addr dst; + } ct_orig; /* Conntrack original direction fields. */ + struct { + struct in6_addr target; /* ND target address. */ + u8 sll[ETH_ALEN]; /* ND source link layer address. */ + u8 tll[ETH_ALEN]; /* ND target link layer address. */ + } nd; + }; } ipv6; }; struct { - /* Connection tracking fields. */ - u16 zone; + /* Connection tracking fields not packed above. */ + struct { + __be16 src; /* CT orig tuple tp src port. */ + __be16 dst; /* CT orig tuple tp dst port. */ + } orig_tp; u32 mark; - u8 state; struct ovs_key_ct_labels labels; } ct; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ +static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key) +{ + return key->eth.type == htons(ETH_P_IPV6) && + key->ip.proto == NEXTHDR_ICMP && + key->tp.dst == 0 && + (key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || + key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)); +} + struct sw_flow_key_range { unsigned short int start; unsigned short int end; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c87d359b9b37..6f5fa50f716d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -129,7 +129,9 @@ static bool match_validate(const struct sw_flow_match *match, /* The following mask attributes allowed only if they * pass the validation tests. */ mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) + | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) | (1 << OVS_KEY_ATTR_IPV6) + | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) | (1 << OVS_KEY_ATTR_TCP) | (1 << OVS_KEY_ATTR_TCP_FLAGS) | (1 << OVS_KEY_ATTR_UDP) @@ -161,8 +163,10 @@ static bool match_validate(const struct sw_flow_match *match, if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1 << OVS_KEY_ATTR_IPV4; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + if (match->mask && match->mask->key.eth.type == htons(0xffff)) { mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; + mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4; + } if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { @@ -196,8 +200,10 @@ static bool match_validate(const struct sw_flow_match *match, if (match->key->eth.type == htons(ETH_P_IPV6)) { key_expected |= 1 << OVS_KEY_ATTR_IPV6; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + if (match->mask && match->mask->key.eth.type == htons(0xffff)) { mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; + mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6; + } if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { @@ -230,6 +236,12 @@ static bool match_validate(const struct sw_flow_match *match, htons(NDISC_NEIGHBOUR_SOLICITATION) || match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1 << OVS_KEY_ATTR_ND; + /* Original direction conntrack tuple + * uses the same space as the ND fields + * in the key, so both are not allowed + * at the same time. + */ + mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); if (match->mask && (match->mask->key.tp.src == htons(0xff))) mask_allowed |= 1 << OVS_KEY_ATTR_ND; } @@ -282,7 +294,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -295,6 +307,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -355,6 +368,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { + .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, + [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { + .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -430,9 +447,8 @@ static int parse_flow_mask_nlattrs(const struct nlattr *attr, return __parse_flow_nlattrs(attr, a, attrsp, log, true); } -static int parse_flow_nlattrs(const struct nlattr *attr, - const struct nlattr *a[], u64 *attrsp, - bool log) +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], + u64 *attrsp, bool log) { return __parse_flow_nlattrs(attr, a, attrsp, log, false); } @@ -1056,14 +1072,14 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, return -EINVAL; } - SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask); + SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); - SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask); + SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && @@ -1082,6 +1098,34 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, sizeof(*cl), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) { + const struct ovs_key_ct_tuple_ipv4 *ct; + + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]); + + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask); + SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); + SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); + } + if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { + const struct ovs_key_ct_tuple_ipv6 *ct; + + ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]); + + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src, + sizeof(match->key->ipv6.ct_orig.src), + is_mask); + SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst, + sizeof(match->key->ipv6.ct_orig.dst), + is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); + SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); + SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); + } /* For layer 3 packets the Ethernet type is provided * and treated as metadata but no MAC addresses are provided. @@ -1493,9 +1537,12 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. - * @key: Receives extracted in_port, priority, tun_key and skb_mark. - * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute - * sequence. + * @net: Network namespace. + * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack + * metadata. + * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink + * attributes. + * @attrs: Bit mask for the netlink attributes included in @a. * @log: Boolean to allow kernel error logging. Normally true, but when * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. @@ -1504,25 +1551,26 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) * take the same form accepted by flow_from_nlattrs(), but only enough of it to * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. + * + * This must be called before the packet key fields are filled in 'key'. */ -int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, - struct sw_flow_key *key, - bool log) +int ovs_nla_get_flow_metadata(struct net *net, + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], + u64 attrs, struct sw_flow_key *key, bool log) { - const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; struct sw_flow_match match; - u64 attrs = 0; - int err; - - err = parse_flow_nlattrs(attr, a, &attrs, log); - if (err) - return -EINVAL; memset(&match, 0, sizeof(match)); match.key = key; + key->ct_state = 0; + key->ct_zone = 0; + key->ct_orig_proto = 0; memset(&key->ct, 0, sizeof(key->ct)); + memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); + memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig)); + key->phy.in_port = DP_MAX_PORTS; return metadata_from_nlattrs(net, &match, &attrs, a, false, log); @@ -1584,7 +1632,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; - if (ovs_ct_put_key(output, skb)) + if (ovs_ct_put_key(swkey, output, skb)) goto nla_put_failure; if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 45f9769e5aac..929c665ac3aa 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -46,8 +46,11 @@ void ovs_match_init(struct sw_flow_match *match, int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); -int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *, - struct sw_flow_key *, bool log); +int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], + u64 *attrsp, bool log); +int ovs_nla_get_flow_metadata(struct net *net, + const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], + u64 attrs, struct sw_flow_key *key, bool log); int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb); int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 09141a18ee2d..89193a634da4 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -149,6 +149,8 @@ static void do_setup(struct net_device *netdev) { ether_setup(netdev); + netdev->max_mtu = ETH_MAX_MTU; + netdev->netdev_ops = &internal_dev_netdev_ops; netdev->priv_flags &= ~IFF_TX_SKB_SHARING; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9854baad66ab..7098e2a457e4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1627,6 +1627,7 @@ static void fanout_release_data(struct packet_fanout *f) static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { + struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; @@ -1649,23 +1650,28 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) return -EINVAL; } + mutex_lock(&fanout_mutex); + + err = -EINVAL; if (!po->running) - return -EINVAL; + goto out; + err = -EALREADY; if (po->fanout) - return -EALREADY; + goto out; if (type == PACKET_FANOUT_ROLLOVER || (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { - po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); - if (!po->rollover) - return -ENOMEM; - atomic_long_set(&po->rollover->num, 0); - atomic_long_set(&po->rollover->num_huge, 0); - atomic_long_set(&po->rollover->num_failed, 0); + err = -ENOMEM; + rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); + if (!rollover) + goto out; + atomic_long_set(&rollover->num, 0); + atomic_long_set(&rollover->num_huge, 0); + atomic_long_set(&rollover->num_failed, 0); + po->rollover = rollover; } - mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -1712,11 +1718,11 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } out: - mutex_unlock(&fanout_mutex); - if (err) { - kfree(po->rollover); + if (err && rollover) { + kfree(rollover); po->rollover = NULL; } + mutex_unlock(&fanout_mutex); return err; } @@ -1725,23 +1731,22 @@ static void fanout_release(struct sock *sk) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; - f = po->fanout; - if (!f) - return; - mutex_lock(&fanout_mutex); - po->fanout = NULL; + f = po->fanout; + if (f) { + po->fanout = NULL; + + if (atomic_dec_and_test(&f->sk_ref)) { + list_del(&f->list); + dev_remove_pack(&f->prot_hook); + fanout_release_data(f); + kfree(f); + } - if (atomic_dec_and_test(&f->sk_ref)) { - list_del(&f->list); - dev_remove_pack(&f->prot_hook); - fanout_release_data(f); - kfree(f); + if (po->rollover) + kfree_rcu(po->rollover, rcu); } mutex_unlock(&fanout_mutex); - - if (po->rollover) - kfree_rcu(po->rollover, rcu); } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, @@ -2776,7 +2781,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); - int hlen, tlen; + int hlen, tlen, linear; int extra_len = 0; /* @@ -2837,8 +2842,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = -ENOBUFS; hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; - skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, - __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len), + linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); + linear = max(linear, min_t(int, len, dev->hard_header_len)); + skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out_unlock; diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 868f1ad0415a..060600b03fad 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -23,17 +23,6 @@ config RFKILL_INPUT depends on INPUT = y || RFKILL = INPUT default y if !EXPERT -config RFKILL_REGULATOR - tristate "Generic rfkill regulator driver" - depends on RFKILL || !RFKILL - depends on REGULATOR - help - This options enable controlling radio transmitters connected to - voltage regulator using the regulator framework. - - To compile this driver as a module, choose M here: the module will - be called rfkill-regulator. - config RFKILL_GPIO tristate "GPIO RFKILL driver" depends on RFKILL diff --git a/net/rfkill/Makefile b/net/rfkill/Makefile index 311768783f4a..87a80aded0b3 100644 --- a/net/rfkill/Makefile +++ b/net/rfkill/Makefile @@ -5,5 +5,4 @@ rfkill-y += core.o rfkill-$(CONFIG_RFKILL_INPUT) += input.o obj-$(CONFIG_RFKILL) += rfkill.o -obj-$(CONFIG_RFKILL_REGULATOR) += rfkill-regulator.o obj-$(CONFIG_RFKILL_GPIO) += rfkill-gpio.o diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c deleted file mode 100644 index 50cd26a48e87..000000000000 --- a/net/rfkill/rfkill-regulator.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * rfkill-regulator.c - Regulator consumer driver for rfkill - * - * Copyright (C) 2009 Guiming Zhuo <gmzhuo@gmail.com> - * Copyright (C) 2011 Antonio Ospite <ospite@studenti.unina.it> - * - * Implementation inspired by leds-regulator driver. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/module.h> -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/platform_device.h> -#include <linux/regulator/consumer.h> -#include <linux/rfkill.h> -#include <linux/rfkill-regulator.h> - -struct rfkill_regulator_data { - struct rfkill *rf_kill; - bool reg_enabled; - - struct regulator *vcc; -}; - -static int rfkill_regulator_set_block(void *data, bool blocked) -{ - struct rfkill_regulator_data *rfkill_data = data; - int ret = 0; - - pr_debug("%s: blocked: %d\n", __func__, blocked); - - if (blocked) { - if (rfkill_data->reg_enabled) { - regulator_disable(rfkill_data->vcc); - rfkill_data->reg_enabled = false; - } - } else { - if (!rfkill_data->reg_enabled) { - ret = regulator_enable(rfkill_data->vcc); - if (!ret) - rfkill_data->reg_enabled = true; - } - } - - pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__, - regulator_is_enabled(rfkill_data->vcc)); - - return ret; -} - -static struct rfkill_ops rfkill_regulator_ops = { - .set_block = rfkill_regulator_set_block, -}; - -static int rfkill_regulator_probe(struct platform_device *pdev) -{ - struct rfkill_regulator_platform_data *pdata = pdev->dev.platform_data; - struct rfkill_regulator_data *rfkill_data; - struct regulator *vcc; - struct rfkill *rf_kill; - int ret = 0; - - if (pdata == NULL) { - dev_err(&pdev->dev, "no platform data\n"); - return -ENODEV; - } - - if (pdata->name == NULL || pdata->type == 0) { - dev_err(&pdev->dev, "invalid name or type in platform data\n"); - return -EINVAL; - } - - vcc = regulator_get_exclusive(&pdev->dev, "vrfkill"); - if (IS_ERR(vcc)) { - dev_err(&pdev->dev, "Cannot get vcc for %s\n", pdata->name); - ret = PTR_ERR(vcc); - goto out; - } - - rfkill_data = kzalloc(sizeof(*rfkill_data), GFP_KERNEL); - if (rfkill_data == NULL) { - ret = -ENOMEM; - goto err_data_alloc; - } - - rf_kill = rfkill_alloc(pdata->name, &pdev->dev, - pdata->type, - &rfkill_regulator_ops, rfkill_data); - if (rf_kill == NULL) { - ret = -ENOMEM; - goto err_rfkill_alloc; - } - - if (regulator_is_enabled(vcc)) { - dev_dbg(&pdev->dev, "Regulator already enabled\n"); - rfkill_data->reg_enabled = true; - } - rfkill_data->vcc = vcc; - rfkill_data->rf_kill = rf_kill; - - ret = rfkill_register(rf_kill); - if (ret) { - dev_err(&pdev->dev, "Cannot register rfkill device\n"); - goto err_rfkill_register; - } - - platform_set_drvdata(pdev, rfkill_data); - dev_info(&pdev->dev, "%s initialized\n", pdata->name); - - return 0; - -err_rfkill_register: - rfkill_destroy(rf_kill); -err_rfkill_alloc: - kfree(rfkill_data); -err_data_alloc: - regulator_put(vcc); -out: - return ret; -} - -static int rfkill_regulator_remove(struct platform_device *pdev) -{ - struct rfkill_regulator_data *rfkill_data = platform_get_drvdata(pdev); - struct rfkill *rf_kill = rfkill_data->rf_kill; - - rfkill_unregister(rf_kill); - rfkill_destroy(rf_kill); - regulator_put(rfkill_data->vcc); - kfree(rfkill_data); - - return 0; -} - -static struct platform_driver rfkill_regulator_driver = { - .probe = rfkill_regulator_probe, - .remove = rfkill_regulator_remove, - .driver = { - .name = "rfkill-regulator", - }, -}; - -module_platform_driver(rfkill_regulator_driver); - -MODULE_AUTHOR("Guiming Zhuo <gmzhuo@gmail.com>"); -MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>"); -MODULE_DESCRIPTION("Regulator consumer driver for rfkill"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:rfkill-regulator"); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3c5e29ba6594..f219ff325ed4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -529,7 +529,7 @@ errout: return err; } -int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb) +static int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb) { a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL); if (!a->act_cookie) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 84682f02b611..af49c7dca860 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,8 +28,6 @@ #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#include <linux/if_arp.h> - #define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b27c4daec88f..c1310472f620 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -22,6 +22,7 @@ #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> #include <net/tc_act/tc_pedit.h> +#include <uapi/linux/tc_act/tc_pedit.h> #define PEDIT_TAB_MASK 15 @@ -30,18 +31,117 @@ static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, + [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED }, }; +static const struct nla_policy pedit_key_ex_policy[TCA_PEDIT_KEY_EX_MAX + 1] = { + [TCA_PEDIT_KEY_EX_HTYPE] = { .type = NLA_U16 }, + [TCA_PEDIT_KEY_EX_CMD] = { .type = NLA_U16 }, +}; + +static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, + u8 n) +{ + struct tcf_pedit_key_ex *keys_ex; + struct tcf_pedit_key_ex *k; + const struct nlattr *ka; + int err = -EINVAL; + int rem; + + if (!nla || !n) + return NULL; + + keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); + if (!keys_ex) + return ERR_PTR(-ENOMEM); + + k = keys_ex; + + nla_for_each_nested(ka, nla, rem) { + struct nlattr *tb[TCA_PEDIT_KEY_EX_MAX + 1]; + + if (!n) { + err = -EINVAL; + goto err_out; + } + n--; + + if (nla_type(ka) != TCA_PEDIT_KEY_EX) { + err = -EINVAL; + goto err_out; + } + + err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, + pedit_key_ex_policy); + if (err) + goto err_out; + + if (!tb[TCA_PEDIT_KEY_EX_HTYPE] || + !tb[TCA_PEDIT_KEY_EX_CMD]) { + err = -EINVAL; + goto err_out; + } + + k->htype = nla_get_u16(tb[TCA_PEDIT_KEY_EX_HTYPE]); + k->cmd = nla_get_u16(tb[TCA_PEDIT_KEY_EX_CMD]); + + if (k->htype > TCA_PEDIT_HDR_TYPE_MAX || + k->cmd > TCA_PEDIT_CMD_MAX) { + err = -EINVAL; + goto err_out; + } + + k++; + } + + if (n) + goto err_out; + + return keys_ex; + +err_out: + kfree(keys_ex); + return ERR_PTR(err); +} + +static int tcf_pedit_key_ex_dump(struct sk_buff *skb, + struct tcf_pedit_key_ex *keys_ex, int n) +{ + struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + + for (; n > 0; n--) { + struct nlattr *key_start; + + key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + + if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) || + nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) { + nlmsg_trim(skb, keys_start); + return -EINVAL; + } + + nla_nest_end(skb, key_start); + + keys_ex++; + } + + nla_nest_end(skb, keys_start); + + return 0; +} + static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; + struct nlattr *pattr; struct tc_pedit *parm; int ret = 0, err; struct tcf_pedit *p; struct tc_pedit_key *keys = NULL; + struct tcf_pedit_key_ex *keys_ex; int ksize; if (nla == NULL) @@ -51,13 +151,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (err < 0) return err; - if (tb[TCA_PEDIT_PARMS] == NULL) + pattr = tb[TCA_PEDIT_PARMS]; + if (!pattr) + pattr = tb[TCA_PEDIT_PARMS_EX]; + if (!pattr) return -EINVAL; - parm = nla_data(tb[TCA_PEDIT_PARMS]); + + parm = nla_data(pattr); ksize = parm->nkeys * sizeof(struct tc_pedit_key); - if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize) + if (nla_len(pattr) < sizeof(*parm) + ksize) return -EINVAL; + keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys); + if (IS_ERR(keys_ex)) + return PTR_ERR(keys_ex); + if (!tcf_hash_check(tn, parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; @@ -69,6 +177,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { tcf_hash_cleanup(*a, est); + kfree(keys_ex); return -ENOMEM; } ret = ACT_P_CREATED; @@ -81,8 +190,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p = to_pedit(*a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); - if (keys == NULL) + if (!keys) { + kfree(keys_ex); return -ENOMEM; + } } } @@ -95,6 +206,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + + kfree(p->tcfp_keys_ex); + p->tcfp_keys_ex = keys_ex; + spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, *a); @@ -106,6 +221,7 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind) struct tcf_pedit *p = to_pedit(a); struct tc_pedit_key *keys = p->tcfp_keys; kfree(keys); + kfree(p->tcfp_keys_ex); } static bool offset_valid(struct sk_buff *skb, int offset) @@ -119,38 +235,88 @@ static bool offset_valid(struct sk_buff *skb, int offset) return true; } +static int pedit_skb_hdr_offset(struct sk_buff *skb, + enum pedit_header_type htype, int *hoffset) +{ + int ret = -EINVAL; + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + if (skb_mac_header_was_set(skb)) { + *hoffset = skb_mac_offset(skb); + ret = 0; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK: + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + *hoffset = skb_network_offset(skb); + ret = 0; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + if (skb_transport_header_was_set(skb)) { + *hoffset = skb_transport_offset(skb); + ret = 0; + } + break; + default: + ret = -EINVAL; + break; + }; + + return ret; +} + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); int i; - unsigned int off; if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; - off = skb_network_offset(skb); - spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { struct tc_pedit_key *tkey = p->tcfp_keys; + struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex; + enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr, _data; int offset = tkey->off; + int hoffset; + u32 val; + int rc; + + if (tkey_ex) { + htype = tkey_ex->htype; + cmd = tkey_ex->cmd; + + tkey_ex++; + } + + rc = pedit_skb_hdr_offset(skb, htype, &hoffset); + if (rc) { + pr_info("tc filter pedit bad header type specified (0x%x)\n", + htype); + goto bad; + } if (tkey->offmask) { char *d, _d; - if (!offset_valid(skb, off + tkey->at)) { + if (!offset_valid(skb, hoffset + tkey->at)) { pr_info("tc filter pedit 'at' offset %d out of bounds\n", - off + tkey->at); + hoffset + tkey->at); goto bad; } - d = skb_header_pointer(skb, off + tkey->at, 1, + d = skb_header_pointer(skb, hoffset + tkey->at, 1, &_d); if (!d) goto bad; @@ -163,19 +329,32 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, goto bad; } - if (!offset_valid(skb, off + offset)) { + if (!offset_valid(skb, hoffset + offset)) { pr_info("tc filter pedit offset %d out of bounds\n", - offset); + hoffset + offset); goto bad; } - ptr = skb_header_pointer(skb, off + offset, 4, &_data); + ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data); if (!ptr) goto bad; /* just do it, baby */ - *ptr = ((*ptr & tkey->mask) ^ tkey->val); + switch (cmd) { + case TCA_PEDIT_KEY_EX_CMD_SET: + val = tkey->val; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + val = (*ptr + tkey->val) & ~tkey->mask; + break; + default: + pr_info("tc filter pedit bad command (%d)\n", + cmd); + goto bad; + } + + *ptr = ((*ptr & tkey->mask) ^ val); if (ptr == &_data) - skb_store_bits(skb, off + offset, ptr, 4); + skb_store_bits(skb, hoffset + offset, ptr, 4); } goto done; @@ -215,8 +394,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, opt->refcnt = p->tcf_refcnt - ref; opt->bindcnt = p->tcf_bindcnt - bind; - if (nla_put(skb, TCA_PEDIT_PARMS, s, opt)) - goto nla_put_failure; + if (p->tcfp_keys_ex) { + tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys); + + if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt)) + goto nla_put_failure; + } else { + if (nla_put(skb, TCA_PEDIT_PARMS, s, opt)) + goto nla_put_failure; + } tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD)) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1ecdf809b5fa..732f7cae459d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/err.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> @@ -38,14 +39,14 @@ static DEFINE_RWLOCK(cls_mod_lock); /* Find classifier type by string name */ -static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) +static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind) { const struct tcf_proto_ops *t, *res = NULL; if (kind) { read_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { - if (nla_strcmp(kind, t->kind) == 0) { + if (strcmp(kind, t->kind) == 0) { if (try_module_get(t->owner)) res = t; break; @@ -127,6 +128,77 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return first; } +static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, + u32 prio, u32 parent, struct Qdisc *q) +{ + struct tcf_proto *tp; + int err; + + tp = kzalloc(sizeof(*tp), GFP_KERNEL); + if (!tp) + return ERR_PTR(-ENOBUFS); + + err = -ENOENT; + tp->ops = tcf_proto_lookup_ops(kind); + if (!tp->ops) { +#ifdef CONFIG_MODULES + rtnl_unlock(); + request_module("cls_%s", kind); + rtnl_lock(); + tp->ops = tcf_proto_lookup_ops(kind); + /* We dropped the RTNL semaphore in order to perform + * the module load. So, even if we succeeded in loading + * the module we have to replay the request. We indicate + * this using -EAGAIN. + */ + if (tp->ops) { + module_put(tp->ops->owner); + err = -EAGAIN; + } else { + err = -ENOENT; + } + goto errout; +#endif + } + tp->classify = tp->ops->classify; + tp->protocol = protocol; + tp->prio = prio; + tp->classid = parent; + tp->q = q; + + err = tp->ops->init(tp); + if (err) { + module_put(tp->ops->owner); + goto errout; + } + return tp; + +errout: + kfree(tp); + return ERR_PTR(err); +} + +static bool tcf_proto_destroy(struct tcf_proto *tp, bool force) +{ + if (tp->ops->destroy(tp, force)) { + module_put(tp->ops->owner); + kfree_rcu(tp, rcu); + return true; + } + return false; +} + +void tcf_destroy_chain(struct tcf_proto __rcu **fl) +{ + struct tcf_proto *tp; + + while ((tp = rtnl_dereference(*fl)) != NULL) { + RCU_INIT_POINTER(*fl, tp->next); + tcf_proto_destroy(tp, true); + } +} +EXPORT_SYMBOL(tcf_destroy_chain); + /* Add/change/delete/get a filter node */ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) @@ -142,8 +214,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *q; struct tcf_proto __rcu **back; struct tcf_proto __rcu **chain; + struct tcf_proto *next; struct tcf_proto *tp; - const struct tcf_proto_ops *tp_ops; const struct Qdisc_class_ops *cops; unsigned long cl; unsigned long fh; @@ -222,9 +294,10 @@ replay: /* And the last stroke */ chain = cops->tcf_chain(q, cl); - err = -EINVAL; - if (chain == NULL) + if (chain == NULL) { + err = -EINVAL; goto errout; + } if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); tcf_destroy_chain(chain); @@ -239,10 +312,13 @@ replay: if (tp->prio >= prio) { if (tp->prio == prio) { if (!nprio || - (tp->protocol != protocol && protocol)) + (tp->protocol != protocol && protocol)) { + err = -EINVAL; goto errout; - } else + } + } else { tp = NULL; + } break; } } @@ -250,109 +326,69 @@ replay: if (tp == NULL) { /* Proto-tcf does not exist, create new one */ - if (tca[TCA_KIND] == NULL || !protocol) + if (tca[TCA_KIND] == NULL || !protocol) { + err = -EINVAL; goto errout; + } - err = -ENOENT; if (n->nlmsg_type != RTM_NEWTFILTER || - !(n->nlmsg_flags & NLM_F_CREATE)) + !(n->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; goto errout; + } + if (!nprio) + nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back))); - /* Create new proto tcf */ - - err = -ENOBUFS; - tp = kzalloc(sizeof(*tp), GFP_KERNEL); - if (tp == NULL) - goto errout; - err = -ENOENT; - tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); - if (tp_ops == NULL) { -#ifdef CONFIG_MODULES - struct nlattr *kind = tca[TCA_KIND]; - char name[IFNAMSIZ]; - - if (kind != NULL && - nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { - rtnl_unlock(); - request_module("cls_%s", name); - rtnl_lock(); - tp_ops = tcf_proto_lookup_ops(kind); - /* We dropped the RTNL semaphore in order to - * perform the module load. So, even if we - * succeeded in loading the module we have to - * replay the request. We indicate this using - * -EAGAIN. - */ - if (tp_ops != NULL) { - module_put(tp_ops->owner); - err = -EAGAIN; - } - } -#endif - kfree(tp); - goto errout; - } - tp->ops = tp_ops; - tp->protocol = protocol; - tp->prio = nprio ? : - TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back))); - tp->q = q; - tp->classify = tp_ops->classify; - tp->classid = parent; - - err = tp_ops->init(tp); - if (err != 0) { - module_put(tp_ops->owner); - kfree(tp); + tp = tcf_proto_create(nla_data(tca[TCA_KIND]), + protocol, nprio, parent, q); + if (IS_ERR(tp)) { + err = PTR_ERR(tp); goto errout; } - tp_created = 1; - - } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) + } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + err = -EINVAL; goto errout; + } fh = tp->ops->get(tp, t->tcm_handle); if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - struct tcf_proto *next = rtnl_dereference(tp->next); - + next = rtnl_dereference(tp->next); RCU_INIT_POINTER(*back, next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER, false); - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); err = 0; goto errout; } - err = -ENOENT; if (n->nlmsg_type != RTM_NEWTFILTER || - !(n->nlmsg_flags & NLM_F_CREATE)) + !(n->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; goto errout; + } } else { switch (n->nlmsg_type) { case RTM_NEWTFILTER: - err = -EEXIST; if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); + err = -EEXIST; goto errout; } break; case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); - if (err == 0) { - struct tcf_proto *next = rtnl_dereference(tp->next); - - tfilter_notify(net, skb, n, tp, - t->tcm_handle, - RTM_DELTFILTER, false); - if (tcf_destroy(tp, false)) - RCU_INIT_POINTER(*back, next); - } + if (err) + goto errout; + next = rtnl_dereference(tp->next); + tfilter_notify(net, skb, n, tp, t->tcm_handle, + RTM_DELTFILTER, false); + if (tcf_proto_destroy(tp, false)) + RCU_INIT_POINTER(*back, next); goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, @@ -374,7 +410,7 @@ replay: tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); } errout: diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ef53ede11590..a13c15e8f087 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1019,6 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, return sch; } + /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ + ops->destroy(sch); err_out3: dev_put(dev); kfree((char *) sch - sch->padded); @@ -1900,28 +1902,6 @@ reset: } EXPORT_SYMBOL(tc_classify); -bool tcf_destroy(struct tcf_proto *tp, bool force) -{ - if (tp->ops->destroy(tp, force)) { - module_put(tp->ops->owner); - kfree_rcu(tp, rcu); - return true; - } - - return false; -} - -void tcf_destroy_chain(struct tcf_proto __rcu **fl) -{ - struct tcf_proto *tp; - - while ((tp = rtnl_dereference(*fl)) != NULL) { - RCU_INIT_POINTER(*fl, tp->next); - tcf_destroy(tp, true); - } -} -EXPORT_SYMBOL(tcf_destroy_chain); - #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 481e4f12aeb4..2209c2ddacbf 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -15,6 +15,7 @@ #include <linux/file.h> /* for fput */ #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* * The ATM queuing discipline provides a framework for invoking classifiers diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index f1207582cbf3..d6ca18dc04c3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -19,6 +19,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* Class-Based Queueing (CBQ) algorithm. diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 3b6d5bd69101..3b86a97bc67c 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -16,6 +16,7 @@ #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/inet_ecn.h> #include <net/red.h> #include <net/flow_dissector.h> diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1308bbf460f7..802ac7c2e5e8 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -13,6 +13,7 @@ #include <linux/rtnetlink.h> #include <linux/bitops.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <asm/byteorder.h> diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 2f50e4c72fb4..9f3a884d1590 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -23,6 +23,7 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/codel.h> #include <net/codel_impl.h> #include <net/codel_qdisc.h> diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index e3d0458af17b..2fae8b5f1b80 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -627,7 +627,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * sizeof(u32)); if (!q->hhf_arrays[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } @@ -638,7 +640,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / BITS_PER_BYTE); if (!q->hhf_valid_bits[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 760f39e7caee..4cd5fb134bc9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -40,6 +40,7 @@ #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* HTB algorithm. Author: devik@cdi.cz diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 8fe6999b642a..3bab5f66c392 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -16,6 +16,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 2bc8d7f8df16..20b7f1646f69 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -52,7 +52,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) + if (!priv->qdiscs) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { @@ -60,18 +60,14 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); - if (qdisc == NULL) - goto err; + if (!qdisc) + return -ENOMEM; priv->qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mq_destroy(sch); - return -ENOMEM; } static void mq_attach(struct Qdisc *sch) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b5c502c78143..922683418e53 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -118,10 +118,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) { - err = -ENOMEM; - goto err; - } + if (!priv->qdiscs) + return -ENOMEM; for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); @@ -129,10 +127,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) get_default_qdisc_ops(dev, i), TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(i + 1))); - if (qdisc == NULL) { - err = -ENOMEM; - goto err; - } + if (!qdisc) + return -ENOMEM; + priv->qdiscs[i] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } @@ -148,7 +145,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err) - goto err; + return err; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -162,10 +159,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mqprio_destroy(sch); - return err; } static void mqprio_attach(struct Qdisc *sch) diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 9ffbb025b37e..e7839a0d0eaa 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -25,7 +25,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_sched.h> - +#include <net/pkt_cls.h> struct multiq_sched_data { u16 bands; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 8f575899adfa..d4d7db267b6e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -20,7 +20,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_sched.h> - +#include <net/pkt_cls.h> struct prio_sched_data { int bands; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 20a350bd1b1d..fe6963d21519 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -25,6 +25,7 @@ #include <linux/jhash.h> #include <net/ip.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/inet_ecn.h> /* diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 7f195ed4d568..42e8c8615e65 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -23,6 +23,7 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/red.h> @@ -742,9 +743,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); if (!q->ht || !q->slots) { - sfq_destroy(sch); + /* Note: sfq_destroy() will be called by our caller */ return -ENOMEM; } + for (i = 0; i < q->divisor; i++) q->ht[i] = SFQ_EMPTY_SLOT; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index e50dc6d7543f..2a6835b4562b 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -832,8 +832,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, if (transport->state != SCTP_UNCONFIRMED) transport->state = SCTP_INACTIVE; else { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); ulp_notify = false; } diff --git a/net/sctp/output.c b/net/sctp/output.c index 07ab5062e541..814eac047467 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -546,6 +546,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; + int confirm; struct dst_entry *dst; struct sk_buff *head; struct sctphdr *sh; @@ -624,7 +625,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; - tp->af_specific->sctp_xmit(head, tp); + confirm = tp->dst_pending_confirm; + if (confirm) + skb_set_dst_pending_confirm(head, 1); + /* neighbour should be confirmed on successful transmission or + * positive error + */ + if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm) + tp->dst_pending_confirm = 0; out: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 65abe22d8691..db352e5d61f8 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1654,7 +1654,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, if (forward_progress) { if (transport->dst) - dst_confirm(transport->dst); + sctp_transport_dst_confirm(transport); } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ad3445b3408e..7f8dbf2c6cee 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3333,8 +3333,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); } break; case SCTP_PARAM_DEL_IP: @@ -3348,8 +3347,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); - transport->dst = NULL; + sctp_transport_dst_release(transport); } break; default: @@ -3660,3 +3658,78 @@ struct sctp_chunk *sctp_make_strreset_req( return retval; } + +/* RE-CONFIG 4.3 (SSN/TSN RESET ALL) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 15 | Parameter Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Request Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_strreset_tsnreq( + const struct sctp_association *asoc) +{ + struct sctp_strreset_tsnreq tsnreq; + __u16 length = sizeof(tsnreq); + struct sctp_chunk *retval; + + retval = sctp_make_reconf(asoc, length); + if (!retval) + return NULL; + + tsnreq.param_hdr.type = SCTP_PARAM_RESET_TSN_REQUEST; + tsnreq.param_hdr.length = htons(length); + tsnreq.request_seq = htonl(asoc->strreset_outseq); + + sctp_addto_chunk(retval, sizeof(tsnreq), &tsnreq); + + return retval; +} + +/* RE-CONFIG 4.5/4.6 (ADD STREAM) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type = 17 | Parameter Length = 12 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Re-configuration Request Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of new streams | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_strreset_addstrm( + const struct sctp_association *asoc, + __u16 out, __u16 in) +{ + struct sctp_strreset_addstrm addstrm; + __u16 size = sizeof(addstrm); + struct sctp_chunk *retval; + + retval = sctp_make_reconf(asoc, (!!out + !!in) * size); + if (!retval) + return NULL; + + if (out) { + addstrm.param_hdr.type = SCTP_PARAM_RESET_ADD_OUT_STREAMS; + addstrm.param_hdr.length = htons(size); + addstrm.number_of_streams = htons(out); + addstrm.request_seq = htonl(asoc->strreset_outseq); + addstrm.reserved = 0; + + sctp_addto_chunk(retval, size, &addstrm); + } + + if (in) { + addstrm.param_hdr.type = SCTP_PARAM_RESET_ADD_IN_STREAMS; + addstrm.param_hdr.length = htons(size); + addstrm.number_of_streams = htons(in); + addstrm.request_seq = htonl(asoc->strreset_outseq + !!out); + addstrm.reserved = 0; + + sctp_addto_chunk(retval, size, &addstrm); + } + + return retval; +} diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index a4552712b882..51abcc90fe75 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -755,7 +755,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, * forward progress. */ if (t->dst) - dst_confirm(t->dst); + sctp_transport_dst_confirm(t); /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fc7122c76de..75f35cea4371 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -239,7 +239,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, union sctp_addr *laddr = (union sctp_addr *)addr; struct sctp_transport *transport; - if (sctp_verify_addr(sk, laddr, af->sockaddr_len)) + if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len)) return NULL; addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, @@ -592,7 +592,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { /* Clear the source and route cache */ - dst_release(trans->dst); + sctp_transport_dst_release(trans); trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); trans->ssthresh = asoc->peer.i.a_rwnd; @@ -843,7 +843,7 @@ skip_mkasconf: */ list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { - dst_release(transport->dst); + sctp_transport_dst_release(transport); sctp_transport_route(transport, NULL, sctp_sk(asoc->base.sk)); } @@ -3818,6 +3818,58 @@ out: return retval; } +static int sctp_setsockopt_reset_assoc(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + sctp_assoc_t associd; + int retval = -EINVAL; + + if (optlen != sizeof(associd)) + goto out; + + if (copy_from_user(&associd, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, associd); + if (!asoc) + goto out; + + retval = sctp_send_reset_assoc(asoc); + +out: + return retval; +} + +static int sctp_setsockopt_add_streams(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_add_streams params; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.sas_assoc_id); + if (!asoc) + goto out; + + retval = sctp_send_add_streams(asoc, ¶ms); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3990,6 +4042,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_RESET_STREAMS: retval = sctp_setsockopt_reset_streams(sk, optval, optlen); break; + case SCTP_RESET_ASSOC: + retval = sctp_setsockopt_reset_assoc(sk, optval, optlen); + break; + case SCTP_ADD_STREAMS: + retval = sctp_setsockopt_add_streams(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -7540,7 +7598,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, */ release_sock(sk); current_timeo = schedule_timeout(current_timeo); - BUG_ON(sk != asoc->base.sk); + if (sk != asoc->base.sk) + goto do_error; lock_sock(sk); *timeo_p = current_timeo; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 13d5e07dcd7d..eb02490245ba 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -136,8 +136,10 @@ int sctp_send_reset_streams(struct sctp_association *asoc, goto out; chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in); - if (!chunk) + if (!chunk) { + retval = -ENOMEM; goto out; + } if (out) { if (str_nums) @@ -149,7 +151,54 @@ int sctp_send_reset_streams(struct sctp_association *asoc, stream->out[i].state = SCTP_STREAM_CLOSED; } + asoc->strreset_chunk = chunk; + sctp_chunk_hold(asoc->strreset_chunk); + + retval = sctp_send_reconf(asoc, chunk); + if (retval) { + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + if (!out) + goto out; + + if (str_nums) + for (i = 0; i < str_nums; i++) + stream->out[str_list[i]].state = + SCTP_STREAM_OPEN; + else + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + goto out; + } + asoc->strreset_outstanding = out + in; + +out: + return retval; +} + +int sctp_send_reset_assoc(struct sctp_association *asoc) +{ + struct sctp_chunk *chunk = NULL; + int retval; + __u16 i; + + if (!asoc->peer.reconf_capable || + !(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ)) + return -ENOPROTOOPT; + + if (asoc->strreset_outstanding) + return -EINPROGRESS; + + chunk = sctp_make_strreset_tsnreq(asoc); + if (!chunk) + return -ENOMEM; + + /* Block further xmit of data until this request is completed */ + for (i = 0; i < asoc->stream->outcnt; i++) + asoc->stream->out[i].state = SCTP_STREAM_CLOSED; + asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); @@ -157,8 +206,91 @@ int sctp_send_reset_streams(struct sctp_association *asoc, if (retval) { sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; + + for (i = 0; i < asoc->stream->outcnt; i++) + asoc->stream->out[i].state = SCTP_STREAM_OPEN; + + return retval; } + asoc->strreset_outstanding = 1; + + return 0; +} + +int sctp_send_add_streams(struct sctp_association *asoc, + struct sctp_add_streams *params) +{ + struct sctp_stream *stream = asoc->stream; + struct sctp_chunk *chunk = NULL; + int retval = -ENOMEM; + __u32 outcnt, incnt; + __u16 out, in; + + if (!asoc->peer.reconf_capable || + !(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) { + retval = -ENOPROTOOPT; + goto out; + } + + if (asoc->strreset_outstanding) { + retval = -EINPROGRESS; + goto out; + } + + out = params->sas_outstrms; + in = params->sas_instrms; + outcnt = stream->outcnt + out; + incnt = stream->incnt + in; + if (outcnt > SCTP_MAX_STREAM || incnt > SCTP_MAX_STREAM || + (!out && !in)) { + retval = -EINVAL; + goto out; + } + + if (out) { + struct sctp_stream_out *streamout; + + streamout = krealloc(stream->out, outcnt * sizeof(*streamout), + GFP_KERNEL); + if (!streamout) + goto out; + + memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); + stream->out = streamout; + } + + if (in) { + struct sctp_stream_in *streamin; + + streamin = krealloc(stream->in, incnt * sizeof(*streamin), + GFP_KERNEL); + if (!streamin) + goto out; + + memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); + stream->in = streamin; + } + + chunk = sctp_make_strreset_addstrm(asoc, out, in); + if (!chunk) + goto out; + + asoc->strreset_chunk = chunk; + sctp_chunk_hold(asoc->strreset_chunk); + + retval = sctp_send_reconf(asoc, chunk); + if (retval) { + sctp_chunk_put(asoc->strreset_chunk); + asoc->strreset_chunk = NULL; + goto out; + } + + stream->incnt = incnt; + stream->outcnt = outcnt; + + asoc->strreset_outstanding = !!out + !!in; + out: return retval; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index baa1ac00d7b5..5b63ceb3bf37 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -240,7 +240,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ if (!transport->dst || transport->dst->obsolete) { - dst_release(transport->dst); + sctp_transport_dst_release(transport); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); } @@ -672,3 +672,17 @@ void sctp_transport_immediate_rtx(struct sctp_transport *t) sctp_transport_hold(t); } } + +/* Drop dst */ +void sctp_transport_dst_release(struct sctp_transport *t) +{ + dst_release(t->dst); + t->dst = NULL; + t->dst_pending_confirm = 0; +} + +/* Schedule neighbour confirm */ +void sctp_transport_dst_confirm(struct sctp_transport *t) +{ + t->dst_pending_confirm = 1; +} diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index dc6fb79a361f..25d9a9cf7b66 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr, if (!oa->data) return -ENOMEM; - creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL); + creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL); if (!creds) { kfree(oa->data); return -ENOMEM; diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 288e35c2d8f4..cb1e48e54eb1 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -4,6 +4,7 @@ * Support for backward direction RPCs on RPC/RDMA (server-side). */ +#include <linux/module.h> #include <linux/sunrpc/svc_rdma.h> #include "xprt_rdma.h" diff --git a/net/wireless/core.c b/net/wireless/core.c index 903fc419217a..e55e05bc4805 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -626,7 +626,8 @@ int wiphy_register(struct wiphy *wiphy) if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && (!rdev->ops->start_nan || !rdev->ops->stop_nan || - !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) + !rdev->ops->add_nan_func || !rdev->ops->del_nan_func || + !(wiphy->nan_supported_bands & BIT(NL80211_BAND_2GHZ))))) return -EINVAL; #ifndef CONFIG_WIRELESS_WDS diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 5d453916a417..30fc6eb352bc 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -17,7 +17,7 @@ static ssize_t name## _read(struct file *file, char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - struct wiphy *wiphy= file->private_data; \ + struct wiphy *wiphy = file->private_data; \ char buf[buflen]; \ int res; \ \ @@ -29,14 +29,14 @@ static const struct file_operations name## _ops = { \ .read = name## _read, \ .open = simple_open, \ .llseek = generic_file_llseek, \ -}; +} DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", - wiphy->rts_threshold) + wiphy->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", wiphy->frag_threshold); DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d", - wiphy->retry_short) + wiphy->retry_short); DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d", wiphy->retry_long); @@ -103,7 +103,7 @@ static const struct file_operations ht40allow_map_ops = { }; #define DEBUGFS_ADD(name) \ - debugfs_create_file(#name, S_IRUGO, phyd, &rdev->wiphy, &name## _ops); + debugfs_create_file(#name, 0444, phyd, &rdev->wiphy, &name## _ops) void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 63dfa60a29ef..d7f8be4e321a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3,7 +3,7 @@ * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015-2016 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH */ #include <linux/if.h> @@ -398,7 +398,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, - [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, + [NL80211_ATTR_BANDS] = { .type = NLA_U32 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, .len = FILS_MAX_KEK_LEN }, @@ -1886,6 +1886,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, } } + if (nla_put_u32(msg, NL80211_ATTR_BANDS, + rdev->wiphy.nan_supported_bands)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -3743,6 +3747,49 @@ static int nl80211_parse_beacon(struct nlattr *attrs[], return 0; } +static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, + const u8 *rates) +{ + int i; + + if (!rates) + return; + + for (i = 0; i < rates[1]; i++) { + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) + params->ht_required = true; + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) + params->vht_required = true; + } +} + +/* + * Since the nl80211 API didn't include, from the beginning, attributes about + * HT/VHT requirements/capabilities, we parse them out of the IEs for the + * benefit of drivers that rebuild IEs in the firmware. + */ +static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) +{ + const struct cfg80211_beacon_data *bcn = ¶ms->beacon; + size_t ies_len = bcn->beacon_ies_len; + const u8 *ies = bcn->beacon_ies; + const u8 *rates; + const u8 *cap; + + rates = cfg80211_find_ie(WLAN_EID_SUPP_RATES, ies, ies_len); + nl80211_check_ap_rate_selectors(params, rates); + + rates = cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); + nl80211_check_ap_rate_selectors(params, rates); + + cap = cfg80211_find_ie(WLAN_EID_HT_CAPABILITY, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->ht_cap)) + params->ht_cap = (void *)(cap + 2); + cap = cfg80211_find_ie(WLAN_EID_VHT_CAPABILITY, ies, ies_len); + if (cap && cap[1] >= sizeof(*params->vht_cap)) + params->vht_cap = (void *)(cap + 2); +} + static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct cfg80211_ap_settings *params) { @@ -3971,6 +4018,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(params.acl); } + nl80211_calculate_ap_params(¶ms); + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { @@ -5921,6 +5970,7 @@ do { \ break; } cfg->ht_opmode = ht_opmode; + mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); } FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, 1, 65535, mask, @@ -6867,7 +6917,7 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, static struct cfg80211_sched_scan_request * nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, - struct nlattr **attrs) + struct nlattr **attrs, int max_match_sets) { struct cfg80211_sched_scan_request *request; struct nlattr *attr; @@ -6932,7 +6982,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, if (!n_match_sets && default_match_rssi != NL80211_SCAN_RSSI_THOLD_OFF) n_match_sets = 1; - if (n_match_sets > wiphy->max_match_sets) + if (n_match_sets > max_match_sets) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_IE]) @@ -7232,7 +7282,8 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, return -EINPROGRESS; sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, - info->attrs); + info->attrs, + rdev->wiphy.max_match_sets); err = PTR_ERR_OR_ZERO(sched_scan_req); if (err) @@ -8585,6 +8636,12 @@ static int nl80211_testmode_dump(struct sk_buff *skb, * so we need to offset by 1. */ phy_idx = cb->args[0] - 1; + + rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); + if (!rdev) { + err = -ENOENT; + goto out_err; + } } else { struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); @@ -8599,7 +8656,6 @@ static int nl80211_testmode_dump(struct sk_buff *skb, goto out_err; } phy_idx = rdev->wiphy_idx; - rdev = NULL; if (attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA]; @@ -8610,12 +8666,6 @@ static int nl80211_testmode_dump(struct sk_buff *skb, data_len = nla_len((void *)cb->args[1]); } - rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); - if (!rdev) { - err = -ENOENT; - goto out_err; - } - if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; goto out_err; @@ -9429,6 +9479,7 @@ nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = { [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_PKTS] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_INTVL] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_RSSI_LEVEL] = { .type = NLA_S32 }, }; static int nl80211_set_cqm_txe(struct genl_info *info, @@ -10045,7 +10096,8 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, if (err) goto out; - trig->nd_config = nl80211_parse_sched_scan(&rdev->wiphy, NULL, tb); + trig->nd_config = nl80211_parse_sched_scan(&rdev->wiphy, NULL, tb, + wowlan->max_nd_match_sets); err = PTR_ERR_OR_ZERO(trig->nd_config); if (err) trig->nd_config = NULL; @@ -10730,15 +10782,22 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_NAN_DUAL]) - return -EINVAL; - conf.master_pref = nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); if (!conf.master_pref) return -EINVAL; - conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + if (info->attrs[NL80211_ATTR_BANDS]) { + u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); + + if (bands & ~(u32)wdev->wiphy->nan_supported_bands) + return -EOPNOTSUPP; + + if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) + return -EINVAL; + + conf.bands = bands; + } err = rdev_start_nan(rdev, wdev, &conf); if (err) @@ -11103,9 +11162,17 @@ static int nl80211_nan_change_config(struct sk_buff *skb, changed |= CFG80211_NAN_CONF_CHANGED_PREF; } - if (info->attrs[NL80211_ATTR_NAN_DUAL]) { - conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); - changed |= CFG80211_NAN_CONF_CHANGED_DUAL; + if (info->attrs[NL80211_ATTR_BANDS]) { + u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); + + if (bands & ~(u32)wdev->wiphy->nan_supported_bands) + return -EOPNOTSUPP; + + if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) + return -EINVAL; + + conf.bands = bands; + changed |= CFG80211_NAN_CONF_CHANGED_BANDS; } if (!changed) @@ -13913,11 +13980,11 @@ static void cfg80211_send_cqm(struct sk_buff *msg, gfp_t gfp) void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, - gfp_t gfp) + s32 rssi_level, gfp_t gfp) { struct sk_buff *msg; - trace_cfg80211_cqm_rssi_notify(dev, rssi_event); + trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); if (WARN_ON(rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW && rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) @@ -13931,6 +13998,10 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, rssi_event)) goto nla_put_failure; + if (rssi_level && nla_put_s32(msg, NL80211_ATTR_CQM_RSSI_LEVEL, + rssi_level)) + goto nla_put_failure; + cfg80211_send_cqm(msg, gfp); return; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ea1b47e04fa4..776e80cef9b4 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1915,18 +1915,18 @@ TRACE_EVENT(rdev_start_nan, WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) - __field(u8, dual); + __field(u8, bands); ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT - ", master preference: %u, dual: %d", + ", master preference: %u, bands: 0x%0x", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, - __entry->dual) + __entry->bands) ); TRACE_EVENT(rdev_nan_change_conf, @@ -1937,20 +1937,20 @@ TRACE_EVENT(rdev_nan_change_conf, WIPHY_ENTRY WDEV_ENTRY __field(u8, master_pref) - __field(u8, dual); + __field(u8, bands); __field(u32, changes); ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; __entry->master_pref = conf->master_pref; - __entry->dual = conf->dual; + __entry->bands = conf->bands; __entry->changes = changes; ), TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT - ", master preference: %u, dual: %d, changes: %x", + ", master preference: %u, bands: 0x%0x, changes: %x", WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, - __entry->dual, __entry->changes) + __entry->bands, __entry->changes) ); DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, @@ -2490,18 +2490,21 @@ TRACE_EVENT(cfg80211_mgmt_tx_status, TRACE_EVENT(cfg80211_cqm_rssi_notify, TP_PROTO(struct net_device *netdev, - enum nl80211_cqm_rssi_threshold_event rssi_event), - TP_ARGS(netdev, rssi_event), + enum nl80211_cqm_rssi_threshold_event rssi_event, + s32 rssi_level), + TP_ARGS(netdev, rssi_event, rssi_level), TP_STRUCT__entry( NETDEV_ENTRY __field(enum nl80211_cqm_rssi_threshold_event, rssi_event) + __field(s32, rssi_level) ), TP_fast_assign( NETDEV_ASSIGN; __entry->rssi_event = rssi_event; + __entry->rssi_level = rssi_level; ), - TP_printk(NETDEV_PR_FMT ", rssi event: %d", - NETDEV_PR_ARG, __entry->rssi_event) + TP_printk(NETDEV_PR_FMT ", rssi event: %d, level: %d", + NETDEV_PR_ARG, __entry->rssi_event, __entry->rssi_level) ); TRACE_EVENT(cfg80211_reg_can_beacon, diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index a484451c72ec..286ed25c1a69 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -4,6 +4,7 @@ config XFRM bool depends on NET + select GRO_CELLS config XFRM_OFFLOAD bool diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 04ed1a1ae019..5f3e87866438 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2843,6 +2843,23 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, return dst->path->ops->neigh_lookup(dst, skb, daddr); } +static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) +{ + const struct dst_entry *path = dst->path; + + for (; dst != path; dst = dst->child) { + const struct xfrm_state *xfrm = dst->xfrm; + + if (xfrm->props.mode == XFRM_MODE_TRANSPORT) + continue; + if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) + daddr = xfrm->coaddr; + else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) + daddr = &xfrm->id.daddr; + } + path->ops->confirm_neigh(path, daddr); +} + int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family) { int err = 0; @@ -2869,6 +2886,8 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam dst_ops->link_failure = xfrm_link_failure; if (likely(dst_ops->neigh_lookup == NULL)) dst_ops->neigh_lookup = xfrm_neigh_lookup; + if (likely(!dst_ops->confirm_neigh)) + dst_ops->confirm_neigh = xfrm_confirm_neigh; rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo); } spin_unlock(&xfrm_policy_afinfo_lock); |