diff options
Diffstat (limited to 'net')
320 files changed, 2800 insertions, 1900 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index 4c1f4c0aa58a..d8fc459492b0 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -2,7 +2,7 @@ menuconfig 6LOWPAN tristate "6LoWPAN Support" depends on IPV6 - ---help--- + help This enables IPv6 over Low power Wireless Personal Area Network - "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. @@ -10,7 +10,7 @@ config 6LOWPAN_DEBUGFS bool "6LoWPAN debugfs support" depends on 6LOWPAN depends on DEBUG_FS - ---help--- + help This enables 6LoWPAN debugfs support. For example to manipulate IPHC context information at runtime. @@ -18,7 +18,7 @@ menuconfig 6LOWPAN_NHC tristate "Next Header and Generic Header Compression Support" depends on 6LOWPAN default y - ---help--- + help Support for next header and generic header compression defined in RFC6282 and RFC7400. @@ -27,78 +27,78 @@ if 6LOWPAN_NHC config 6LOWPAN_NHC_DEST tristate "Destination Options Header Support" default y - ---help--- + help 6LoWPAN IPv6 Destination Options Header compression according to RFC6282. config 6LOWPAN_NHC_FRAGMENT tristate "Fragment Header Support" default y - ---help--- + help 6LoWPAN IPv6 Fragment Header compression according to RFC6282. config 6LOWPAN_NHC_HOP tristate "Hop-by-Hop Options Header Support" default y - ---help--- + help 6LoWPAN IPv6 Hop-by-Hop Options Header compression according to RFC6282. config 6LOWPAN_NHC_IPV6 tristate "IPv6 Header Support" default y - ---help--- + help 6LoWPAN IPv6 Header compression according to RFC6282. config 6LOWPAN_NHC_MOBILITY tristate "Mobility Header Support" default y - ---help--- + help 6LoWPAN IPv6 Mobility Header compression according to RFC6282. config 6LOWPAN_NHC_ROUTING tristate "Routing Header Support" default y - ---help--- + help 6LoWPAN IPv6 Routing Header compression according to RFC6282. config 6LOWPAN_NHC_UDP tristate "UDP Header Support" default y - ---help--- + help 6LoWPAN IPv6 UDP Header compression according to RFC6282. config 6LOWPAN_GHC_EXT_HDR_HOP tristate "GHC Hop-by-Hop Options Header Support" - ---help--- + help 6LoWPAN IPv6 Hop-by-Hop option generic header compression according to RFC7400. config 6LOWPAN_GHC_UDP tristate "GHC UDP Support" - ---help--- + help 6LoWPAN IPv6 UDP generic header compression according to RFC7400. config 6LOWPAN_GHC_ICMPV6 tristate "GHC ICMPv6 Support" - ---help--- + help 6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400. config 6LOWPAN_GHC_EXT_HDR_DEST tristate "GHC Destination Options Header Support" - ---help--- + help 6LoWPAN IPv6 destination option generic header compression according to RFC7400. config 6LOWPAN_GHC_EXT_HDR_FRAG tristate "GHC Fragmentation Options Header Support" - ---help--- + help 6LoWPAN IPv6 fragmentation option generic header compression according to RFC7400. config 6LOWPAN_GHC_EXT_HDR_ROUTE tristate "GHC Routing Options Header Support" - ---help--- + help 6LoWPAN IPv6 routing option generic header compression according to RFC7400. diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index 5510b4b90ff0..8bf7a1765b78 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -5,7 +5,7 @@ config VLAN_8021Q tristate "802.1Q/802.1ad VLAN Support" - ---help--- + help Select this and you will be able to create 802.1Q VLAN interfaces on your Ethernet interfaces. 802.1Q VLAN supports almost everything a regular Ethernet interface does, including diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f00bb57f0f60..3dd7c972677b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -494,6 +494,7 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) * separate class since they always nest. */ static struct lock_class_key vlan_netdev_xmit_lock_key; +static struct lock_class_key vlan_netdev_addr_lock_key; static void vlan_dev_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -504,6 +505,8 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev, static void vlan_dev_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, + &vlan_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); } diff --git a/net/9p/mod.c b/net/9p/mod.c index c1b62428da7b..5126566850bd 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -189,3 +189,4 @@ MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Plan 9 Resource Sharing Support (9P2000)"); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 13cd683a658a..12ecacf0c55f 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -362,6 +362,10 @@ static void p9_read_work(struct work_struct *work) if (m->rreq->status == REQ_STATUS_SENT) { list_del(&m->rreq->req_list); p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); + } else if (m->rreq->status == REQ_STATUS_FLSHD) { + /* Ignore replies associated with a cancelled request. */ + p9_debug(P9_DEBUG_TRANS, + "Ignore replies associated with a cancelled request\n"); } else { spin_unlock(&m->client->lock); p9_debug(P9_DEBUG_ERROR, @@ -703,11 +707,20 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + spin_lock(&client->lock); + /* Ignore cancelled request if message has been received + * before lock. + */ + if (req->status == REQ_STATUS_RCVD) { + spin_unlock(&client->lock); + return 0; + } + /* we haven't received a response for oldreq, * remove it from the list. */ - spin_lock(&client->lock); list_del(&req->req_list); + req->status = REQ_STATUS_FLSHD; spin_unlock(&client->lock); p9_req_put(req); @@ -803,20 +816,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) return -ENOMEM; ts->rd = fget(rfd); + if (!ts->rd) + goto out_free_ts; + if (!(ts->rd->f_mode & FMODE_READ)) + goto out_put_rd; ts->wr = fget(wfd); - if (!ts->rd || !ts->wr) { - if (ts->rd) - fput(ts->rd); - if (ts->wr) - fput(ts->wr); - kfree(ts); - return -EIO; - } + if (!ts->wr) + goto out_put_rd; + if (!(ts->wr->f_mode & FMODE_WRITE)) + goto out_put_wr; client->trans = ts; client->status = Connected; return 0; + +out_put_wr: + fput(ts->wr); +out_put_rd: + fput(ts->rd); +out_free_ts: + kfree(ts); + return -EIO; } static int p9_socket_open(struct p9_client *client, struct socket *csocket) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 3963eb11c3fb..3debad93be1a 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -43,8 +43,8 @@ #include <net/9p/transport.h> #define XEN_9PFS_NUM_RINGS 2 -#define XEN_9PFS_RING_ORDER 6 -#define XEN_9PFS_RING_SIZE XEN_FLEX_RING_SIZE(XEN_9PFS_RING_ORDER) +#define XEN_9PFS_RING_ORDER 9 +#define XEN_9PFS_RING_SIZE(ring) XEN_FLEX_RING_SIZE(ring->intf->ring_order) struct xen_9pfs_header { uint32_t size; @@ -132,8 +132,8 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size) prod = ring->intf->out_prod; virt_mb(); - return XEN_9PFS_RING_SIZE - - xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) >= size; + return XEN_9PFS_RING_SIZE(ring) - + xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) >= size; } static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) @@ -167,17 +167,18 @@ again: prod = ring->intf->out_prod; virt_mb(); - if (XEN_9PFS_RING_SIZE - xen_9pfs_queued(prod, cons, - XEN_9PFS_RING_SIZE) < size) { + if (XEN_9PFS_RING_SIZE(ring) - + xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) < size) { spin_unlock_irqrestore(&ring->lock, flags); goto again; } - masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE); - masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE(ring)); + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring)); xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size, - &masked_prod, masked_cons, XEN_9PFS_RING_SIZE); + &masked_prod, masked_cons, + XEN_9PFS_RING_SIZE(ring)); p9_req->status = REQ_STATUS_SENT; virt_wmb(); /* write ring before updating pointer */ @@ -207,19 +208,19 @@ static void p9_xen_response(struct work_struct *work) prod = ring->intf->in_prod; virt_rmb(); - if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) < + if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) < sizeof(h)) { notify_remote_via_irq(ring->irq); return; } - masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE); - masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE(ring)); + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring)); /* First, read just the header */ xen_9pfs_read_packet(&h, ring->data.in, sizeof(h), masked_prod, &masked_cons, - XEN_9PFS_RING_SIZE); + XEN_9PFS_RING_SIZE(ring)); req = p9_tag_lookup(priv->client, h.tag); if (!req || req->status != REQ_STATUS_SENT) { @@ -233,11 +234,11 @@ static void p9_xen_response(struct work_struct *work) memcpy(&req->rc, &h, sizeof(h)); req->rc.offset = 0; - masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE); + masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring)); /* Then, read the whole packet (including the header) */ xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size, masked_prod, &masked_cons, - XEN_9PFS_RING_SIZE); + XEN_9PFS_RING_SIZE(ring)); virt_mb(); cons += h.size; @@ -267,7 +268,7 @@ static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r) static struct p9_trans_module p9_xen_trans = { .name = "xen", - .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT), + .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT - 2), .def = 1, .create = p9_xen_create, .close = p9_xen_close, @@ -295,14 +296,16 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) if (priv->rings[i].irq > 0) unbind_from_irqhandler(priv->rings[i].irq, priv->dev); if (priv->rings[i].data.in) { - for (j = 0; j < (1 << XEN_9PFS_RING_ORDER); j++) { + for (j = 0; + j < (1 << priv->rings[i].intf->ring_order); + j++) { grant_ref_t ref; ref = priv->rings[i].intf->ref[j]; gnttab_end_foreign_access(ref, 0, 0); } free_pages((unsigned long)priv->rings[i].data.in, - XEN_9PFS_RING_ORDER - + priv->rings[i].intf->ring_order - (PAGE_SHIFT - XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); @@ -323,7 +326,8 @@ static int xen_9pfs_front_remove(struct xenbus_device *dev) } static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, - struct xen_9pfs_dataring *ring) + struct xen_9pfs_dataring *ring, + unsigned int order) { int i = 0; int ret = -ENOMEM; @@ -342,21 +346,21 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, goto out; ring->ref = ret; bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + order - (PAGE_SHIFT - XEN_PAGE_SHIFT)); if (!bytes) { ret = -ENOMEM; goto out; } - for (; i < (1 << XEN_9PFS_RING_ORDER); i++) { + for (; i < (1 << order); i++) { ret = gnttab_grant_foreign_access( dev->otherend_id, virt_to_gfn(bytes) + i, 0); if (ret < 0) goto out; ring->intf->ref[i] = ret; } - ring->intf->ring_order = XEN_9PFS_RING_ORDER; + ring->intf->ring_order = order; ring->data.in = bytes; - ring->data.out = bytes + XEN_9PFS_RING_SIZE; + ring->data.out = bytes + XEN_FLEX_RING_SIZE(order); ret = xenbus_alloc_evtchn(dev, &ring->evtchn); if (ret) @@ -374,7 +378,7 @@ out: for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); free_pages((unsigned long)bytes, - XEN_9PFS_RING_ORDER - + ring->intf->ring_order - (PAGE_SHIFT - XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(ring->ref, 0, 0); @@ -404,8 +408,10 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, return -EINVAL; max_ring_order = xenbus_read_unsigned(dev->otherend, "max-ring-page-order", 0); - if (max_ring_order < XEN_9PFS_RING_ORDER) - return -EINVAL; + if (max_ring_order > XEN_9PFS_RING_ORDER) + max_ring_order = XEN_9PFS_RING_ORDER; + if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order)) + p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -422,7 +428,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, for (i = 0; i < priv->num_rings; i++) { priv->rings[i].priv = priv; - ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i]); + ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i], + max_ring_order); if (ret < 0) goto error; } diff --git a/net/Kconfig b/net/Kconfig index 5c524c6ee75d..d1672280d6a4 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -8,7 +8,7 @@ menuconfig NET select NLATTR select GENERIC_NET_UTILS select BPF - ---help--- + help Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even when running on a stand-alone machine that isn't connected to any @@ -70,7 +70,7 @@ source "net/xdp/Kconfig" config INET bool "TCP/IP networking" - ---help--- + help These are the protocols used on the Internet and on most local Ethernets. It is highly recommended to say Y here (this will enlarge your kernel by about 400 KB), since some programs (e.g. the X window @@ -121,7 +121,7 @@ config NETWORK_PHY_TIMESTAMPING menuconfig NETFILTER bool "Network packet filtering framework (Netfilter)" - ---help--- + help Netfilter is a framework for filtering and mangling network packets that pass through your Linux box. @@ -192,7 +192,7 @@ config BRIDGE_NETFILTER depends on NETFILTER_ADVANCED select NETFILTER_FAMILY_BRIDGE select SKB_EXTENSIONS - ---help--- + help Enabling this option will let arptables resp. iptables see bridged ARP resp. IP traffic. If you want a bridging firewall, you probably want this option enabled. @@ -268,7 +268,7 @@ config CGROUP_NET_PRIO bool "Network priority cgroup" depends on CGROUPS select SOCK_CGROUP_DATA - ---help--- + help Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis. @@ -276,7 +276,7 @@ config CGROUP_NET_CLASSID bool "Network classid cgroup" depends on CGROUPS select SOCK_CGROUP_DATA - ---help--- + help Cgroup subsystem for use as general purpose socket classid marker that is being used in cls_cgroup and for netfilter matching. @@ -294,7 +294,7 @@ config BPF_JIT bool "enable BPF Just In Time compiler" depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT depends on MODULES - ---help--- + help Berkeley Packet Filter filtering capabilities are normally handled by an interpreter. This option allows kernel to generate a native code when filter is loaded in memory. This should speedup @@ -312,7 +312,7 @@ config BPF_STREAM_PARSER depends on CGROUP_BPF select STREAM_PARSER select NET_SOCK_MSG - ---help--- + help Enabling this allows a stream parser to be used with BPF_MAP_TYPE_SOCKMAP. @@ -324,7 +324,7 @@ config NET_FLOW_LIMIT bool depends on RPS default y - ---help--- + help The network stack has to drop packets when a receive processing CPU's backlog reaches netdev_max_backlog. If a few out of many active flows generate the vast majority of load, drop their traffic earlier to @@ -337,7 +337,7 @@ menu "Network testing" config NET_PKTGEN tristate "Packet Generator (USE WITH CAUTION)" depends on INET && PROC_FS - ---help--- + help This module will inject preconfigured packets, at a configurable rate, out of a given interface. It is used for network interface stress testing and performance analysis. If you don't understand @@ -352,7 +352,7 @@ config NET_PKTGEN config NET_DROP_MONITOR tristate "Network packet drop alerting service" depends on INET && TRACEPOINTS - ---help--- + help This feature provides an alerting service to userspace in the event that packets are discarded in the network stack. Alerts are broadcast via netlink socket to any listening user space @@ -398,7 +398,7 @@ source "net/ife/Kconfig" config LWTUNNEL bool "Network light weight tunnels" - ---help--- + help This feature provides an infrastructure to support light weight tunnels like mpls. There is no netdevice associated with a light weight tunnel endpoint. Tunnel encapsulation parameters are stored @@ -408,7 +408,7 @@ config LWTUNNEL_BPF bool "Execute BPF program as route nexthop action" depends on LWTUNNEL && INET default y if LWTUNNEL=y - ---help--- + help Allows to run BPF programs as a nexthop action following a route lookup for incoming and outgoing packets. diff --git a/net/atm/Kconfig b/net/atm/Kconfig index e61dcc9f85b2..77343d57ff2a 100644 --- a/net/atm/Kconfig +++ b/net/atm/Kconfig @@ -5,7 +5,7 @@ config ATM tristate "Asynchronous Transfer Mode (ATM)" - ---help--- + help ATM is a high-speed networking technology for Local Area Networks and Wide Area Networks. It uses a fixed packet size and is connection oriented, allowing for the negotiation of minimum diff --git a/net/atm/lec.c b/net/atm/lec.c index ca37f5a71f5e..875fc0bc1780 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1536,10 +1536,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, struct lec_arp_table *to_return; to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC); - if (!to_return) { - pr_info("LEC: Arp entry kmalloc failed\n"); + if (!to_return) return NULL; - } ether_addr_copy(to_return->mac_addr, mac_addr); INIT_HLIST_NODE(&to_return->next); timer_setup(&to_return->timer, lec_arp_expire_arp, 0); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 45d8e1d5d033..579b66da1d95 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -393,7 +393,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg) * Each PPPoATM instance has its own tasklet - this is just a * prototypical one used to initialize them */ - static const DECLARE_TASKLET(tasklet_proto, pppoatm_wakeup_sender, 0); + static const DECLARE_TASKLET_OLD(tasklet_proto, pppoatm_wakeup_sender); if (copy_from_user(&be, arg, sizeof be)) return -EFAULT; if (be.encaps != PPPOATM_ENCAPS_AUTODETECT && diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd91cd34f25e..dec3f35467c9 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1187,7 +1187,10 @@ static int __must_check ax25_connect(struct socket *sock, if (addr_len > sizeof(struct sockaddr_ax25) && fsa->fsa_ax25.sax25_ndigis != 0) { /* Valid number of digipeaters ? */ - if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) { + if (fsa->fsa_ax25.sax25_ndigis < 1 || + fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS || + addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) { err = -EINVAL; goto out_release; } @@ -1507,7 +1510,10 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; /* Valid number of digipeaters ? */ - if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) { + if (usax->sax25_ndigis < 1 || + usax->sax25_ndigis > AX25_MAX_DIGIS || + addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * usax->sax25_ndigis) { err = -EINVAL; goto out; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0ddd80130ea3..f1f1c86f3419 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -745,6 +745,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, * separate class since they always nest. */ static struct lock_class_key batadv_netdev_xmit_lock_key; +static struct lock_class_key batadv_netdev_addr_lock_key; /** * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue @@ -765,6 +766,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev, */ static void batadv_set_lockdep_class(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cfeaee347db3..af9d7f2ff8ba 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1338,6 +1338,9 @@ static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr, { struct discovery_state *d = &hdev->discovery; + if (len > HCI_MAX_AD_LENGTH) + return; + bacpy(&d->last_adv_addr, bdaddr); d->last_adv_addr_type = bdaddr_type; d->last_adv_rssi = rssi; @@ -5355,7 +5358,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u8 bdaddr_type, bdaddr_t *direct_addr, - u8 direct_addr_type, s8 rssi, u8 *data, u8 len) + u8 direct_addr_type, s8 rssi, u8 *data, u8 len, + bool ext_adv) { struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; @@ -5377,6 +5381,11 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, return; } + if (!ext_adv && len > HCI_MAX_AD_LENGTH) { + bt_dev_err_ratelimited(hdev, "legacy adv larger than 31 bytes"); + return; + } + /* Find the end of the data in case the report contains padded zero * bytes at the end causing an invalid length value. * @@ -5437,7 +5446,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type, direct_addr); - if (conn && type == LE_ADV_IND) { + if (!ext_adv && conn && type == LE_ADV_IND && len <= HCI_MAX_AD_LENGTH) { /* Store report for later inclusion by * mgmt_device_connected */ @@ -5491,7 +5500,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * event or send an immediate device found event if the data * should not be stored for later. */ - if (!has_pending_adv_report(hdev)) { + if (!ext_adv && !has_pending_adv_report(hdev)) { /* If the report will trigger a SCAN_REQ store it for * later merging. */ @@ -5526,7 +5535,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, /* If the new report will trigger a SCAN_REQ store it for * later merging. */ - if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + if (!ext_adv && (type == LE_ADV_IND || + type == LE_ADV_SCAN_IND)) { store_pending_adv_report(hdev, bdaddr, bdaddr_type, rssi, flags, data, len); return; @@ -5566,7 +5576,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) rssi = ev->data[ev->length]; process_adv_report(hdev, ev->evt_type, &ev->bdaddr, ev->bdaddr_type, NULL, 0, rssi, - ev->data, ev->length); + ev->data, ev->length, false); } else { bt_dev_err(hdev, "Dropping invalid advertising data"); } @@ -5638,7 +5648,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &ev->bdaddr, ev->bdaddr_type, NULL, 0, ev->rssi, - ev->data, ev->length); + ev->data, ev->length, + !(evt_type & LE_EXT_ADV_LEGACY_PDU)); } ptr += sizeof(*ev) + ev->length; @@ -5836,7 +5847,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, process_adv_report(hdev, ev->evt_type, &ev->bdaddr, ev->bdaddr_type, &ev->direct_addr, - ev->direct_addr_type, ev->rssi, NULL, 0); + ev->direct_addr_type, ev->rssi, NULL, 0, + false); ptr += sizeof(*ev); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index bfd4ccd80847..b03c469cd01f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -147,6 +147,20 @@ int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) return a + (long)b + c + d + (long)e + f; } +struct bpf_fentry_test_t { + struct bpf_fentry_test_t *a; +}; + +int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) +{ + return (long)arg; +} + +int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) +{ + return (long)arg->a; +} + int noinline bpf_modify_return_test(int a, int *b) { *b += 1; @@ -185,6 +199,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { + struct bpf_fentry_test_t arg = {}; u16 side_effect = 0, ret = 0; int b = 2, err = -EFAULT; u32 retval = 0; @@ -197,7 +212,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test3(4, 5, 6) != 15 || bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || + bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || + bpf_fentry_test8(&arg) != 0) goto out; break; case BPF_MODIFY_RETURN: diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index c0f0990f30b6..c3146b2700a0 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -15,15 +15,13 @@ extern char bpfilter_umh_end; static void shutdown_umh(void) { - struct task_struct *tsk; + struct umd_info *info = &bpfilter_ops.info; + struct pid *tgid = info->tgid; - if (bpfilter_ops.stop) - return; - - tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID); - if (tsk) { - send_sig(SIGKILL, tsk, 1); - put_task_struct(tsk); + if (tgid) { + kill_pid(tgid, SIGKILL, 1); + wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); + bpfilter_umh_cleanup(info); } } @@ -39,7 +37,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, { struct mbox_request req; struct mbox_reply reply; - loff_t pos; + loff_t pos = 0; ssize_t n; int ret = -EFAULT; @@ -48,9 +46,9 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.cmd = optname; req.addr = (long __force __user)optval; req.len = optlen; - if (!bpfilter_ops.info.pid) + if (!bpfilter_ops.info.tgid) goto out; - n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), + n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), &pos); if (n != sizeof(req)) { pr_err("write fail %zd\n", n); @@ -77,13 +75,10 @@ static int start_umh(void) int err; /* fork usermode process */ - err = fork_usermode_blob(&bpfilter_umh_start, - &bpfilter_umh_end - &bpfilter_umh_start, - &bpfilter_ops.info); + err = fork_usermode_driver(&bpfilter_ops.info); if (err) return err; - bpfilter_ops.stop = false; - pr_info("Loaded bpfilter_umh pid %d\n", bpfilter_ops.info.pid); + pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid)); /* health check that usermode process started correctly */ if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { @@ -98,18 +93,21 @@ static int __init load_umh(void) { int err; + err = umd_load_blob(&bpfilter_ops.info, + &bpfilter_umh_start, + &bpfilter_umh_end - &bpfilter_umh_start); + if (err) + return err; + mutex_lock(&bpfilter_ops.lock); - if (!bpfilter_ops.stop) { - err = -EFAULT; - goto out; - } err = start_umh(); if (!err && IS_ENABLED(CONFIG_INET)) { bpfilter_ops.sockopt = &__bpfilter_process_sockopt; bpfilter_ops.start = &start_umh; } -out: mutex_unlock(&bpfilter_ops.lock); + if (err) + umd_unload_blob(&bpfilter_ops.info); return err; } @@ -122,6 +120,8 @@ static void __exit fini_umh(void) bpfilter_ops.sockopt = NULL; } mutex_unlock(&bpfilter_ops.lock); + + umd_unload_blob(&bpfilter_ops.info); } module_init(load_umh); module_exit(fini_umh); diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S index 9ea6100dca87..40311d10d2f2 100644 --- a/net/bpfilter/bpfilter_umh_blob.S +++ b/net/bpfilter/bpfilter_umh_blob.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .rodata, "a" + .section .init.rodata, "a" .global bpfilter_umh_start bpfilter_umh_start: .incbin "net/bpfilter/bpfilter_umh" diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 51a6414145d2..80879196560c 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -8,7 +8,7 @@ config BRIDGE select LLC select STP depends on IPV6 || IPV6=n - ---help--- + help If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it is connected to will appear as one Ethernet to the participants. @@ -39,7 +39,7 @@ config BRIDGE_IGMP_SNOOPING depends on BRIDGE depends on INET default y - ---help--- + help If you say Y here, then the Ethernet bridge will be able selectively forward multicast traffic based on IGMP/MLD traffic received from each port. @@ -53,7 +53,7 @@ config BRIDGE_VLAN_FILTERING depends on BRIDGE depends on VLAN_8021Q default n - ---help--- + help If you say Y here, then the Ethernet bridge will be able selectively receive and forward traffic based on VLAN information in the packet any VLAN information configured on the bridge port or bridge device. diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8ec1362588af..8c7b78f8bc23 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -105,6 +105,13 @@ out: return NETDEV_TX_OK; } +static struct lock_class_key bridge_netdev_addr_lock_key; + +static void br_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key); +} + static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -143,6 +150,7 @@ static int br_dev_init(struct net_device *dev) br_fdb_hash_fini(br); } + br_set_lockdep_class(dev); return err; } diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index 24986ec7d38c..90592af9db61 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -86,7 +86,7 @@ static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p, { struct ethhdr *eth_hdr; struct sk_buff *skb; - u16 *version; + __be16 *version; skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH); if (!skb) @@ -411,10 +411,16 @@ int br_mrp_set_port_role(struct net_bridge_port *p, if (!mrp) return -EINVAL; - if (role == BR_MRP_PORT_ROLE_PRIMARY) + switch (role) { + case BR_MRP_PORT_ROLE_PRIMARY: rcu_assign_pointer(mrp->p_port, p); - else + break; + case BR_MRP_PORT_ROLE_SECONDARY: rcu_assign_pointer(mrp->s_port, p); + break; + default: + return -EINVAL; + } br_mrp_port_switchdev_set_role(p, role); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 83490bf73a13..4c4a93abde68 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1007,7 +1007,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); if (skb_transport_offset(skb) + ipv6_transport_len(skb) < - nsrcs_offset + sizeof(_nsrcs)) + nsrcs_offset + sizeof(__nsrcs)) return -EINVAL; _nsrcs = skb_header_pointer(skb, nsrcs_offset, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 7501be4eeba0..e0ea6dbbc97e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -217,8 +217,8 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char eth_addr[ETH_ALEN] __aligned(2); unsigned char flags; - unsigned char eth_addr[ETH_ALEN]; }; struct net_bridge_mdb_entry { @@ -430,7 +430,7 @@ struct net_bridge { struct hlist_head fdb_list; #if IS_ENABLED(CONFIG_BRIDGE_MRP) - struct list_head __rcu mrp_list; + struct list_head mrp_list; #endif }; diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 33b255e38ffe..315eb37d89f0 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -8,7 +8,7 @@ struct br_mrp { /* list of mrp instances */ - struct list_head __rcu list; + struct list_head list; struct net_bridge_port __rcu *p_port; struct net_bridge_port __rcu *s_port; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c83ffe912163..b13b49b9f75c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1047,7 +1047,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, vfree(counterstmp); audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, - AUDIT_XT_OP_REPLACE); + AUDIT_XT_OP_REPLACE, GFP_KERNEL); return ret; free_unlock: @@ -1123,7 +1123,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table) list_del(&table->list); mutex_unlock(&ebt_mutex); audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries, - AUDIT_XT_OP_UNREGISTER); + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, ebt_cleanup_entry, net, NULL); if (table->private->nentries) @@ -1218,7 +1218,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, } audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, - AUDIT_XT_OP_REGISTER); + AUDIT_XT_OP_REGISTER, GFP_KERNEL); return ret; free_unlock: mutex_unlock(&ebt_mutex); diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 7c9e92b2f806..8e8ffac037cd 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -155,3 +155,4 @@ module_exit(nft_meta_bridge_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("wenxu <wenxu@ucloud.cn>"); MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); +MODULE_DESCRIPTION("Support for bridge dedicated meta key"); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index f48cf4cfb80f..deae2c9a0f69 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -455,3 +455,4 @@ module_exit(nft_reject_bridge_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "reject"); +MODULE_DESCRIPTION("Reject packets from bridge via nftables"); diff --git a/net/caif/Kconfig b/net/caif/Kconfig index b7532a79ca7a..87205251cc25 100644 --- a/net/caif/Kconfig +++ b/net/caif/Kconfig @@ -7,7 +7,7 @@ menuconfig CAIF tristate "CAIF support" select CRC_CCITT default n - ---help--- + help The "Communication CPU to Application CPU Interface" (CAIF) is a packet based connection-oriented MUX protocol developed by ST-Ericsson for use with its modems. It is accessed from user space as sockets (PF_CAIF). @@ -26,7 +26,7 @@ config CAIF_DEBUG bool "Enable Debug" depends on CAIF default n - ---help--- + help Enable the inclusion of debug code in the CAIF stack. Be aware that doing this will impact performance. If unsure say N. @@ -35,7 +35,7 @@ config CAIF_NETDEV tristate "CAIF GPRS Network device" depends on CAIF default CAIF - ---help--- + help Say Y if you will be using a CAIF based GPRS network device. This can be either built-in or a loadable module. If you select to build it as a built-in then the main CAIF device must @@ -46,7 +46,7 @@ config CAIF_USB tristate "CAIF USB support" depends on CAIF default n - ---help--- + help Say Y if you are using CAIF over USB CDC NCM. This can be either built-in or a loadable module. If you select to build it as a built-in then the main CAIF device must diff --git a/net/can/Kconfig b/net/can/Kconfig index d77042752457..25436a715db3 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -6,7 +6,7 @@ menuconfig CAN depends on NET tristate "CAN bus subsystem support" - ---help--- + help Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial communications protocol. Development of the CAN bus started in 1983 at Robert Bosch GmbH, and the protocol was officially @@ -23,7 +23,7 @@ if CAN config CAN_RAW tristate "Raw CAN Protocol (raw access with CAN-ID filtering)" default y - ---help--- + help The raw CAN protocol option offers access to the CAN bus via the BSD socket API. You probably want to use the raw socket in most cases where no higher level protocol is being used. The raw @@ -33,7 +33,7 @@ config CAN_RAW config CAN_BCM tristate "Broadcast Manager CAN Protocol (with content filtering)" default y - ---help--- + help The Broadcast Manager offers content filtering, timeout monitoring, sending of RTR frames, and cyclic CAN messages without permanent user interaction. The BCM can be 'programmed' via the BSD socket API and @@ -45,7 +45,7 @@ config CAN_BCM config CAN_GW tristate "CAN Gateway/Router (with netlink configuration)" default y - ---help--- + help The CAN Gateway/Router is used to route (and modify) CAN frames. It is based on the PF_CAN core infrastructure for msg filtering and msg sending and can optionally modify routed CAN frames on the fly. diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index afe0e8184c23..4e7edd707a14 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -332,6 +332,7 @@ struct ceph_options *ceph_alloc_options(void) opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; + opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT; return opt; } EXPORT_SYMBOL(ceph_alloc_options); @@ -490,16 +491,13 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_read_from_replica: switch (result.uint_32) { case Opt_read_from_replica_no: - opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS | - CEPH_OSD_FLAG_LOCALIZE_READS); + opt->read_from_replica = 0; break; case Opt_read_from_replica_balance: - opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS; - opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS; + opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS; break; case Opt_read_from_replica_localize: - opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; - opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS; + opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS; break; default: BUG(); @@ -613,9 +611,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, } seq_putc(m, ','); } - if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) { + if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) { seq_puts(m, "read_from_replica=balance,"); - } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) { + } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) { seq_puts(m, "read_from_replica=localize,"); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4fea3c33af2a..2db8b44e70c2 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -445,8 +445,10 @@ static void target_copy(struct ceph_osd_request_target *dest, dest->size = src->size; dest->min_size = src->min_size; dest->sort_bitwise = src->sort_bitwise; + dest->recovery_deletes = src->recovery_deletes; dest->flags = src->flags; + dest->used_replica = src->used_replica; dest->paused = src->paused; dest->epoch = src->epoch; @@ -1117,10 +1119,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, truncate_size, truncate_seq); } - req->r_flags = flags; req->r_base_oloc.pool = layout->pool_id; req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns); ceph_oid_printf(&req->r_base_oid, "%llx.%08llx", vino.ino, objnum); + req->r_flags = flags | osdc->client->options->read_from_replica; req->r_snapid = vino.snap; if (flags & CEPH_OSD_FLAG_WRITE) @@ -2431,14 +2433,11 @@ promote: static void account_request(struct ceph_osd_request *req) { - struct ceph_osd_client *osdc = req->r_osdc; - WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK)); WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE))); req->r_flags |= CEPH_OSD_FLAG_ONDISK; - req->r_flags |= osdc->client->options->osd_req_flags; - atomic_inc(&osdc->num_requests); + atomic_inc(&req->r_osdc->num_requests); req->r_start_stamp = jiffies; req->r_start_latency = ktime_get(); diff --git a/net/compat.c b/net/compat.c index 5e3041a2c37d..77f3a0e98fd0 100644 --- a/net/compat.c +++ b/net/compat.c @@ -202,7 +202,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, /* Advance. */ kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); - ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); + ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len); } /* @@ -281,39 +281,31 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat return 0; } -void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) +static int scm_max_fds_compat(struct msghdr *msg) { - struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; - int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); - int fdnum = scm->fp->count; - struct file **fp = scm->fp->fp; - int __user *cmfptr; - int err = 0, i; + if (msg->msg_controllen <= sizeof(struct compat_cmsghdr)) + return 0; + return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); +} - if (fdnum < fdmax) - fdmax = fdnum; +void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm) +{ + struct compat_cmsghdr __user *cm = + (struct compat_cmsghdr __user *)msg->msg_control; + unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; + int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count); + int __user *cmsg_data = CMSG_USER_DATA(cm); + int err = 0, i; - for (i = 0, cmfptr = (int __user *) CMSG_COMPAT_DATA(cm); i < fdmax; i++, cmfptr++) { - int new_fd; - err = security_file_receive(fp[i]); - if (err) - break; - err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags - ? O_CLOEXEC : 0); + for (i = 0; i < fdmax; i++) { + err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags); if (err < 0) break; - new_fd = err; - err = put_user(new_fd, cmfptr); - if (err) { - put_unused_fd(new_fd); - break; - } - /* Bump the usage count and install the file. */ - fd_install(new_fd, get_file(fp[i])); } if (i > 0) { int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); + err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); @@ -321,16 +313,19 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_COMPAT_SPACE(i * sizeof(int)); - kmsg->msg_control += cmlen; - kmsg->msg_controllen -= cmlen; + if (msg->msg_controllen < cmlen) + cmlen = msg->msg_controllen; + msg->msg_control += cmlen; + msg->msg_controllen -= cmlen; } } - if (i < fdnum) - kmsg->msg_flags |= MSG_CTRUNC; + + if (i < scm->fp->count || (scm->fp->count && fdmax <= 0)) + msg->msg_flags |= MSG_CTRUNC; /* - * All of the files that fit in the message have had their - * usage counts incremented, so we just free the list. + * All of the files that fit in the message have had their usage counts + * incremented, so we just free the list. */ __scm_destroy(scm); } diff --git a/net/core/dev.c b/net/core/dev.c index 061496a1f640..7a774ebf64e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -439,6 +439,7 @@ static const char *const netdev_lock_name[] = { "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; static inline unsigned short netdev_lock_pos(unsigned short dev_type) { @@ -460,11 +461,25 @@ static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i], netdev_lock_name[i]); } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ + int i; + + i = netdev_lock_pos(dev->type); + lockdep_set_class_and_name(&dev->addr_list_lock, + &netdev_addr_lock_key[i], + netdev_lock_name[i]); +} #else static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock, unsigned short dev_type) { } + +static inline void netdev_set_addr_lockdep_class(struct net_device *dev) +{ +} #endif /******************************************************************************* @@ -4177,10 +4192,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) local_bh_disable(); + dev_xmit_recursion_inc(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); + dev_xmit_recursion_dec(); local_bh_enable(); @@ -5584,7 +5601,7 @@ static void flush_backlog(struct work_struct *work) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + dev_kfree_skb_irq(skb); input_queue_head_incr(sd); } } @@ -9373,15 +9390,6 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); -void netdev_update_lockdep_key(struct net_device *dev) -{ - lockdep_unregister_key(&dev->addr_list_lock_key); - lockdep_register_key(&dev->addr_list_lock_key); - - lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); -} -EXPORT_SYMBOL(netdev_update_lockdep_key); - /** * register_netdevice - register a network device * @dev: device to register @@ -9420,7 +9428,7 @@ int register_netdevice(struct net_device *dev) return ret; spin_lock_init(&dev->addr_list_lock); - lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key); + netdev_set_addr_lockdep_class(dev); ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) @@ -9541,6 +9549,13 @@ int register_netdevice(struct net_device *dev) rcu_barrier(); dev->reg_state = NETREG_UNREGISTERED; + /* We should put the kobject that hold in + * netdev_unregister_kobject(), otherwise + * the net device cannot be freed when + * driver calls free_netdev(), because the + * kobject is being hold. + */ + kobject_put(&dev->dev.kobj); } /* * Prevent userspace races by waiting until the network @@ -9939,8 +9954,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - lockdep_register_key(&dev->addr_list_lock_key); - dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; @@ -10028,8 +10041,6 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; - lockdep_unregister_key(&dev->addr_list_lock_key); - /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 2f949b5a1eb9..54cd568e7c2f 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -690,8 +690,17 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two + * reasons: + * 1) This is always called without any addr_list_lock, so as the + * outermost one here, it must be 0. + * 2) This is called by some callers after unlinking the upper device, + * so the dev->lower_level becomes 1 again. + * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or + * larger. + */ netif_addr_lock_bh(from); - netif_addr_lock(to); + netif_addr_lock_nested(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -858,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -888,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock(to); + netif_addr_lock_nested(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -911,8 +920,9 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); - netif_addr_lock(to); + netif_addr_lock_nested(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); diff --git a/net/core/devlink.c b/net/core/devlink.c index 2cafbc808b09..47f14a2f25fb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1065,7 +1065,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -1266,7 +1268,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -1498,7 +1502,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -3299,7 +3305,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -3569,7 +3577,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -4518,7 +4528,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); mutex_unlock(&devlink->lock); - if (err && err != -EOPNOTSUPP) + if (err == -EOPNOTSUPP) + err = 0; + else if (err) break; idx++; } @@ -8567,6 +8579,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(PIM), DEVLINK_TRAP_GROUP(UC_LB), DEVLINK_TRAP_GROUP(LOCAL_DELIVERY), + DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY), DEVLINK_TRAP_GROUP(IPV6), DEVLINK_TRAP_GROUP(PTP_EVENT), DEVLINK_TRAP_GROUP(PTP_GENERAL), diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 2ee7bc4c9e03..b09bebeadf0b 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1721,3 +1721,4 @@ module_exit(exit_net_drop_monitor); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); MODULE_ALIAS_GENL_FAMILY("NET_DM"); +MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts"); diff --git a/net/core/filter.c b/net/core/filter.c index 209482a4eaa2..82e1b5b06167 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1755,25 +1755,27 @@ BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, u32, offset, void *, to, u32, len, u32, start_header) { u8 *end = skb_tail_pointer(skb); - u8 *net = skb_network_header(skb); - u8 *mac = skb_mac_header(skb); - u8 *ptr; + u8 *start, *ptr; - if (unlikely(offset > 0xffff || len > (end - mac))) + if (unlikely(offset > 0xffff)) goto err_clear; switch (start_header) { case BPF_HDR_START_MAC: - ptr = mac + offset; + if (unlikely(!skb_mac_header_was_set(skb))) + goto err_clear; + start = skb_mac_header(skb); break; case BPF_HDR_START_NET: - ptr = net + offset; + start = skb_network_header(skb); break; default: goto err_clear; } - if (likely(ptr >= mac && ptr + len <= end)) { + ptr = start + offset; + + if (likely(ptr + len <= end)) { memcpy(to, ptr, len); return 0; } @@ -4340,8 +4342,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, } break; case SO_BINDTODEVICE: - ret = -ENOPROTOOPT; -#ifdef CONFIG_NETDEVICES optlen = min_t(long, optlen, IFNAMSIZ - 1); strncpy(devname, optval, optlen); devname[optlen] = 0; @@ -4360,7 +4360,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, dev_put(dev); } ret = sock_bindtoindex(sk, ifindex, false); -#endif break; default: ret = -EINVAL; @@ -5854,12 +5853,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; - if (skb->protocol == cpu_to_be16(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): iphdr_len = sizeof(struct iphdr); - else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) + break; + case cpu_to_be16(ETH_P_IPV6): iphdr_len = sizeof(struct ipv6hdr); - else + break; + default: return 0; + } if (skb_headlen(skb) < iphdr_len) return 0; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d02df0b6d0d9..142a8824f0a8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, EXPORT_SYMBOL(skb_flow_dissector_init); #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; - struct bpf_prog *attached; if (net == &init_net) { /* BPF flow dissector in the root namespace overrides @@ -86,26 +86,17 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) for_each_net(ns) { if (ns == &init_net) continue; - if (rcu_access_pointer(ns->bpf.progs[type])) + if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST; } } else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace. */ - if (rcu_access_pointer(init_net.bpf.progs[type])) + if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST; } - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (attached == prog) - /* The same program cannot be attached twice */ - return -EINVAL; - - rcu_assign_pointer(net->bpf.progs[type], prog); - if (attached) - bpf_prog_put(attached); return 0; } #endif /* CONFIG_BPF_SYSCALL */ @@ -903,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; @@ -960,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; + struct bpf_prog_array *run_array; rcu_read_lock(); - attached = rcu_dereference(init_net.bpf.progs[type]); - - if (!attached) - attached = rcu_dereference(net->bpf.progs[type]); + run_array = rcu_dereference(init_net.bpf.run_array[type]); + if (!run_array) + run_array = rcu_dereference(net->bpf.run_array[type]); - if (attached) { + if (run_array) { struct bpf_flow_keys flow_keys; struct bpf_flow_dissector ctx = { .flow_keys = &flow_keys, @@ -975,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net, .data_end = data + hlen, }; __be16 n_proto = proto; + struct bpf_prog *prog; if (skb) { ctx.skb = skb; @@ -985,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net, n_proto = skb->protocol; } - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + prog = READ_ONCE(run_array->items[0].prog); + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 0cfc35e6be28..2076219b8ba5 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -4,6 +4,7 @@ #include <net/flow_offload.h> #include <linux/rtnetlink.h> #include <linux/mutex.h> +#include <linux/rhashtable.h> struct flow_rule *flow_rule_alloc(unsigned int num_actions) { @@ -372,14 +373,15 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) } EXPORT_SYMBOL(flow_indr_dev_register); -static void __flow_block_indr_cleanup(flow_setup_cb_t *setup_cb, void *cb_priv, +static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), + void *cb_priv, struct list_head *cleanup_list) { struct flow_block_cb *this, *next; list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { - if (this->cb == setup_cb && - this->cb_priv == cb_priv) { + if (this->release == release && + this->indr.cb_priv == cb_priv) { list_move(&this->indr.list, cleanup_list); return; } @@ -397,7 +399,7 @@ static void flow_block_indr_notify(struct list_head *cleanup_list) } void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, - flow_setup_cb_t *setup_cb) + void (*release)(void *cb_priv)) { struct flow_indr_dev *this, *next, *indr_dev = NULL; LIST_HEAD(cleanup_list); @@ -418,7 +420,7 @@ void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv, return; } - __flow_block_indr_cleanup(setup_cb, cb_priv, &cleanup_list); + __flow_block_indr_cleanup(release, cb_priv, &cleanup_list); mutex_unlock(&flow_indr_block_lock); flow_block_indr_notify(&cleanup_list); @@ -429,32 +431,37 @@ EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, struct net_device *dev, void *data, + void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { flow_block->indr.binder_type = bo->binder_type; flow_block->indr.data = data; + flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; flow_block->indr.cleanup = cleanup; } -static void __flow_block_indr_binding(struct flow_block_offload *bo, - struct net_device *dev, void *data, - void (*cleanup)(struct flow_block_cb *block_cb)) +struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, + void (*release)(void *cb_priv), + struct flow_block_offload *bo, + struct net_device *dev, void *data, + void *indr_cb_priv, + void (*cleanup)(struct flow_block_cb *block_cb)) { struct flow_block_cb *block_cb; - list_for_each_entry(block_cb, &bo->cb_list, list) { - switch (bo->command) { - case FLOW_BLOCK_BIND: - flow_block_indr_init(block_cb, bo, dev, data, cleanup); - list_add(&block_cb->indr.list, &flow_block_indr_list); - break; - case FLOW_BLOCK_UNBIND: - list_del(&block_cb->indr.list); - break; - } - } + block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, release); + if (IS_ERR(block_cb)) + goto out; + + flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup); + list_add(&block_cb->indr.list, &flow_block_indr_list); + +out: + return block_cb; } +EXPORT_SYMBOL(flow_indr_block_cb_alloc); int flow_indr_dev_setup_offload(struct net_device *dev, enum tc_setup_type type, void *data, @@ -465,9 +472,8 @@ int flow_indr_dev_setup_offload(struct net_device *dev, mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, this->cb_priv, type, bo); + this->cb(dev, this->cb_priv, type, bo, data, cleanup); - __flow_block_indr_binding(bo, dev, data, cleanup); mutex_unlock(&flow_indr_block_lock); return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e353b822bb15..9de33b594ff2 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -11,6 +11,7 @@ #include <linux/if_arp.h> #include <linux/slab.h> #include <linux/sched/signal.h> +#include <linux/sched/isolation.h> #include <linux/nsproxy.h> #include <net/sock.h> #include <net/net_namespace.h> @@ -741,7 +742,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, { struct rps_map *old_map, *map; cpumask_var_t mask; - int err, cpu, i; + int err, cpu, i, hk_flags; static DEFINE_MUTEX(rps_map_mutex); if (!capable(CAP_NET_ADMIN)) @@ -756,6 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, return err; } + hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; + cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); + if (cpumask_empty(mask)) { + free_cpumask_var(mask); + return -EINVAL; + } + map = kzalloc(max_t(unsigned int, RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), GFP_KERNEL); @@ -1108,7 +1116,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) trans_timeout = queue->trans_timeout; spin_unlock_irq(&queue->_xmit_lock); - return sprintf(buf, "%lu", trans_timeout); + return sprintf(buf, fmt_ulong, trans_timeout); } static unsigned int get_netdev_queue_index(struct netdev_queue *queue) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2269199c5891..85a4b0101f76 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2462,7 +2462,6 @@ static int do_set_master(struct net_device *dev, int ifindex, err = ops->ndo_del_slave(upper_dev, dev); if (err) return err; - netdev_update_lockdep_key(dev); } else { return -EOPNOTSUPP; } @@ -3344,7 +3343,8 @@ replay: */ if (err < 0) { /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED) + if (dev->reg_state == NETREG_UNINITIALIZED || + dev->reg_state == NETREG_UNREGISTERED) free_netdev(dev); goto out; } diff --git a/net/core/scm.c b/net/core/scm.c index 875df1c2989d..8156d4fb8a39 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -280,36 +280,6 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter } EXPORT_SYMBOL(put_cmsg_scm_timestamping); -static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags) -{ - struct socket *sock; - int new_fd; - int error; - - error = security_file_receive(file); - if (error) - return error; - - new_fd = get_unused_fd_flags(o_flags); - if (new_fd < 0) - return new_fd; - - error = put_user(new_fd, ufd); - if (error) { - put_unused_fd(new_fd); - return error; - } - - /* Bump the usage count and install the file. */ - sock = sock_from_file(file, &error); - if (sock) { - sock_update_netprioidx(&sock->sk->sk_cgrp_data); - sock_update_classid(&sock->sk->sk_cgrp_data); - } - fd_install(new_fd, get_file(file)); - return 0; -} - static int scm_max_fds(struct msghdr *msg) { if (msg->msg_controllen <= sizeof(struct cmsghdr)) @@ -319,29 +289,29 @@ static int scm_max_fds(struct msghdr *msg) void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { - struct cmsghdr __user *cm - = (__force struct cmsghdr __user*)msg->msg_control; - int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; + struct cmsghdr __user *cm = + (__force struct cmsghdr __user *)msg->msg_control; + unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count); int __user *cmsg_data = CMSG_USER_DATA(cm); int err = 0, i; + /* no use for FD passing from kernel space callers */ + if (WARN_ON_ONCE(!msg->msg_control_is_user)) + return; + if (msg->msg_flags & MSG_CMSG_COMPAT) { scm_detach_fds_compat(msg, scm); return; } - /* no use for FD passing from kernel space callers */ - if (WARN_ON_ONCE(!msg->msg_control_is_user)) - return; - for (i = 0; i < fdmax; i++) { - err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); - if (err) + err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags); + if (err < 0) break; } - if (i > 0) { + if (i > 0) { int cmlen = CMSG_LEN(i * sizeof(int)); err = put_user(SOL_SOCKET, &cm->cmsg_level); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 351afbf6bfba..6a32a1fd34f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp) return container_of(parser, struct sk_psock, parser); } -static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) +static void sk_psock_skb_redirect(struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; @@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) } } -static void sk_psock_tls_verdict_apply(struct sk_psock *psock, - struct sk_buff *skb, int verdict) +static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) { switch (verdict) { case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_PASS: case __SK_DROP: @@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } + sk_psock_tls_verdict_apply(skb, ret); rcu_read_unlock(); - sk_psock_tls_verdict_apply(psock, skb, ret); return ret; } EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); @@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, } goto out_free; case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_DROP: /* fall-through */ @@ -782,11 +781,18 @@ out_free: static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { - struct sk_psock *psock = sk_psock_from_strp(strp); + struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; + struct sock *sk; rcu_read_lock(); + sk = strp->sk; + psock = sk_psock(sk); + if (unlikely(!psock)) { + kfree_skb(skb); + goto out; + } prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { skb_orphan(skb); @@ -794,8 +800,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } - rcu_read_unlock(); sk_psock_verdict_apply(psock, skb, ret); +out: + rcu_read_unlock(); } static int sk_psock_strp_read_done(struct strparser *strp, int err) diff --git a/net/core/sock.c b/net/core/sock.c index 6c4acf1f0220..8ccdcdaaa673 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -718,7 +718,7 @@ bool sk_mc_loop(struct sock *sk) return inet6_sk(sk)->mc_loop; #endif } - WARN_ON(1); + WARN_ON_ONCE(1); return true; } EXPORT_SYMBOL(sk_mc_loop); @@ -1767,6 +1767,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); + sk_tx_queue_clear(sk); } return sk; @@ -1925,7 +1926,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - cgroup_sk_alloc(&newsk->sk_cgrp_data); + cgroup_sk_clone(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); @@ -1972,7 +1973,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* * Before updating sk_refcnt, we must commit prior changes to memory - * (Documentation/RCU/rculist_nulls.txt for details) + * (Documentation/RCU/rculist_nulls.rst for details) */ smp_wmb(); refcount_set(&newsk->sk_refcnt, 2); @@ -1990,6 +1991,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) */ sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); + sk_tx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) @@ -2840,6 +2842,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct * } EXPORT_SYMBOL(sock_no_mmap); +/* + * When a file is received (via SCM_RIGHTS, etc), we must bump the + * various sock-based usage counts. + */ +void __receive_sock(struct file *file) +{ + struct socket *sock; + int error; + + /* + * The resulting value of "error" is ignored here since we only + * need to take action when the file is a socket and testing + * "sock" for NULL is sufficient. + */ + sock = sock_from_file(file, &error); + if (sock) { + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); + } +} + ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { ssize_t res; @@ -3033,7 +3056,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk_rx_queue_clear(sk); /* * Before updating sk_refcnt, we must commit prior changes to memory - * (Documentation/RCU/rculist_nulls.txt for details) + * (Documentation/RCU/rculist_nulls.rst for details) */ smp_wmb(); refcount_set(&sk->sk_refcnt, 1); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 00a26cf2cfe9..0971f17e8e54 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -70,11 +70,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) struct fd f; int ret; + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, attr->attach_type); + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + fdput(f); + return ret; +} + +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) +{ + u32 ufd = attr->target_fd; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + int ret; + + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + prog = bpf_prog_get(attr->attach_bpf_fd); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto put_map; + } + + if (prog->type != ptype) { + ret = -EINVAL; + goto put_prog; + } + + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); +put_prog: + bpf_prog_put(prog); +put_map: fdput(f); return ret; } @@ -424,10 +462,7 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next) return 0; } -static bool sock_map_redirect_allowed(const struct sock *sk) -{ - return sk->sk_state != TCP_LISTEN; -} +static bool sock_map_redirect_allowed(const struct sock *sk); static int sock_map_update_common(struct bpf_map *map, u32 idx, struct sock *sk, u64 flags) @@ -508,6 +543,11 @@ static bool sk_is_udp(const struct sock *sk) sk->sk_protocol == IPPROTO_UDP; } +static bool sock_map_redirect_allowed(const struct sock *sk) +{ + return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN; +} + static bool sock_map_sk_is_suitable(const struct sock *sk) { return sk_is_tcp(sk) || sk_is_udp(sk); @@ -989,11 +1029,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) err = -EINVAL; goto free_htab; } + err = bpf_map_charge_init(&htab->map.memory, cost); + if (err) + goto free_htab; htab->buckets = bpf_map_area_alloc(htab->buckets_num * sizeof(struct bpf_htab_bucket), htab->map.numa_node); if (!htab->buckets) { + bpf_map_charge_finish(&htab->map.memory); err = -ENOMEM; goto free_htab; } @@ -1013,6 +1057,7 @@ static void sock_hash_free(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct bpf_htab_bucket *bucket; + struct hlist_head unlink_list; struct bpf_htab_elem *elem; struct hlist_node *node; int i; @@ -1024,13 +1069,32 @@ static void sock_hash_free(struct bpf_map *map) synchronize_rcu(); for (i = 0; i < htab->buckets_num; i++) { bucket = sock_hash_select_bucket(htab, i); - hlist_for_each_entry_safe(elem, node, &bucket->head, node) { - hlist_del_rcu(&elem->node); + + /* We are racing with sock_hash_delete_from_link to + * enter the spin-lock critical section. Every socket on + * the list is still linked to sockhash. Since link + * exists, psock exists and holds a ref to socket. That + * lets us to grab a socket ref too. + */ + raw_spin_lock_bh(&bucket->lock); + hlist_for_each_entry(elem, &bucket->head, node) + sock_hold(elem->sk); + hlist_move_list(&bucket->head, &unlink_list); + raw_spin_unlock_bh(&bucket->lock); + + /* Process removed entries out of atomic context to + * block for socket lock before deleting the psock's + * link to sockhash. + */ + hlist_for_each_entry_safe(elem, node, &unlink_list, node) { + hlist_del(&elem->node); lock_sock(elem->sk); rcu_read_lock(); sock_map_unref(elem->sk, elem); rcu_read_unlock(); release_sock(elem->sk); + sock_put(elem->sk); + sock_hash_free_elem(htab, elem); } } @@ -1177,27 +1241,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) } int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - u32 which) + struct bpf_prog *old, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - psock_set_prog(&progs->msg_parser, prog); + pprog = &progs->msg_parser; break; case BPF_SK_SKB_STREAM_PARSER: - psock_set_prog(&progs->skb_parser, prog); + pprog = &progs->skb_parser; break; case BPF_SK_SKB_STREAM_VERDICT: - psock_set_prog(&progs->skb_verdict, prog); + pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + if (old) + return psock_replace_prog(pprog, prog, old); + + psock_set_prog(pprog, prog); return 0; } diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index adcb3aea576d..bbdd3c7b6cb5 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -101,6 +101,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->prog = reuse->prog; more_reuse->reuseport_id = reuse->reuseport_id; more_reuse->bind_inany = reuse->bind_inany; + more_reuse->has_conns = reuse->has_conns; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f93f8ace6c56..6ada114bbcca 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || - (jit_enable == 2 && bpf_dump_raw_ok())) { + (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); diff --git a/net/core/xdp.c b/net/core/xdp.c index 90f44f382115..3c45f99e26d5 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) xdpf->len = totsize - metasize; xdpf->headroom = 0; xdpf->metasize = metasize; + xdpf->frame_sz = PAGE_SIZE; xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; xsk_buff_free(xdp); diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig index 917e6e7b1cac..efee8b9fe1d4 100644 --- a/net/dcb/Kconfig +++ b/net/dcb/Kconfig @@ -2,7 +2,7 @@ config DCB bool "Data Center Bridging support" default n - ---help--- + help This enables support for configuring Data Center Bridging (DCB) features on DCB capable Ethernet adapters via rtnetlink. Say 'Y' if you have a DCB capable Ethernet adapter which supports this diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index f7c7495677b0..51ac2631fb48 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -2,7 +2,7 @@ menuconfig IP_DCCP tristate "The DCCP Protocol" depends on INET - ---help--- + help Datagram Congestion Control Protocol (RFC 4340) From http://www.ietf.org/rfc/rfc4340.txt: @@ -32,7 +32,7 @@ menu "DCCP Kernel Hacking" config IP_DCCP_DEBUG bool "DCCP debug messages" - ---help--- + help Only use this if you're hacking DCCP. When compiling DCCP as a module, this debugging output can be toggled diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 4a358e6847a8..4d7771f36eff 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -3,7 +3,7 @@ menu "DCCP CCIDs Configuration" config IP_DCCP_CCID2_DEBUG bool "CCID-2 debugging messages" - ---help--- + help Enable CCID-2 specific debugging messages. The debugging output can additionally be toggled by setting the @@ -14,7 +14,7 @@ config IP_DCCP_CCID2_DEBUG config IP_DCCP_CCID3 bool "CCID-3 (TCP-Friendly)" def_bool y if (IP_DCCP = y || IP_DCCP = m) - ---help--- + help CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based rate-controlled congestion control mechanism. TFRC is designed to be reasonably fair when competing for bandwidth with TCP-like flows, @@ -39,7 +39,7 @@ config IP_DCCP_CCID3 config IP_DCCP_CCID3_DEBUG bool "CCID-3 debugging messages" depends on IP_DCCP_CCID3 - ---help--- + help Enable CCID-3 specific debugging messages. The debugging output can additionally be toggled by setting the diff --git a/net/dccp/options.c b/net/dccp/options.c index 3b42f5c6a63d..9fed0ae21e63 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -56,7 +56,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, (dh->dccph_doff * 4); struct dccp_options_received *opt_recv = &dp->dccps_options_received; unsigned char opt, len; - unsigned char *uninitialized_var(value); + unsigned char *value; u32 elapsed_time; __be32 opt_val; int rc; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 4af8a98fe784..c13b6609474b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1139,14 +1139,14 @@ static int __init dccp_init(void) inet_hashinfo_init(&dccp_hashinfo); rc = inet_hashinfo2_init_mod(&dccp_hashinfo); if (rc) - goto out_fail; + goto out_free_percpu; rc = -ENOBUFS; dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out_free_percpu; + goto out_free_hashinfo2; /* * Size and allocate the main established and bind bucket @@ -1242,6 +1242,8 @@ out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); +out_free_hashinfo2: + inet_hashinfo2_free_mod(&dccp_hashinfo); out_free_percpu: percpu_counter_destroy(&dccp_orphan_count); out_fail: @@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void) kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); + inet_hashinfo2_free_mod(&dccp_hashinfo); percpu_counter_destroy(&dccp_orphan_count); } diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 8f98fb2f2ec9..24336bdb1054 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig @@ -4,7 +4,7 @@ # config DECNET tristate "DECnet Support" - ---help--- + help The DECnet networking protocol was used in many products made by Digital (now Compaq). It provides reliable stream and sequenced packet communications over which run a variety of services similar @@ -29,7 +29,7 @@ config DECNET_ROUTER bool "DECnet: router support" depends on DECNET select FIB_RULES - ---help--- + help Add support for turning your DECnet Endnode into a level 1 or 2 router. This is an experimental, but functional option. If you do say Y here, then make sure that you also say Y to "Kernel/User diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 739613070d07..d5bc6ac599ef 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -13,7 +13,7 @@ menuconfig NET_DSA select NET_SWITCHDEV select PHYLINK select NET_DEVLINK - ---help--- + help Say Y if you want to enable support for the hardware switches supported by the Distributed Switch Architecture. diff --git a/net/dsa/master.c b/net/dsa/master.c index a621367c6e8c..480a61460c23 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -327,6 +327,8 @@ static void dsa_master_reset_mtu(struct net_device *dev) rtnl_unlock(); } +static struct lock_class_key dsa_master_addr_list_lock_key; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@ -345,6 +347,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; + lockdep_set_class(&dev->addr_list_lock, + &dsa_master_addr_list_lock_key); ret = dsa_master_ethtool_setup(dev); if (ret) return ret; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index e8eaa804ccb9..d6200ff98200 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -13,6 +13,16 @@ #define DSA_HLEN 4 #define EDSA_HLEN 8 +#define FRAME_TYPE_TO_CPU 0x00 +#define FRAME_TYPE_FORWARD 0x03 + +#define TO_CPU_CODE_MGMT_TRAP 0x00 +#define TO_CPU_CODE_FRAME2REG 0x01 +#define TO_CPU_CODE_IGMP_MLD_TRAP 0x02 +#define TO_CPU_CODE_POLICY_TRAP 0x03 +#define TO_CPU_CODE_ARP_MIRROR 0x04 +#define TO_CPU_CODE_POLICY_MIRROR 0x05 + static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -77,6 +87,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { u8 *edsa_header; + int frame_type; + int code; int source_device; int source_port; @@ -91,8 +103,29 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, /* * Check that frame type is either TO_CPU or FORWARD. */ - if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0) + frame_type = edsa_header[0] >> 6; + + switch (frame_type) { + case FRAME_TYPE_TO_CPU: + code = (edsa_header[1] & 0x6) | ((edsa_header[2] >> 4) & 1); + + /* + * Mark the frame to never egress on any port of the same switch + * unless it's a trapped IGMP/MLD packet, in which case the + * bridge might want to forward it. + */ + if (code != TO_CPU_CODE_IGMP_MLD_TRAP) + skb->offload_fwd_mark = 1; + + break; + + case FRAME_TYPE_FORWARD: + skb->offload_fwd_mark = 1; + break; + + default: return NULL; + } /* * Determine source device and port. @@ -156,8 +189,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->offload_fwd_mark = 1; - return skb; } diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 7b7a0456c15c..7194956aa09e 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -234,6 +234,14 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1]; int ret; + cfg->first = 100; + cfg->step = 100; + cfg->last = MAX_CABLE_LENGTH_CM; + cfg->pair = PHY_PAIR_ALL; + + if (!nest) + return 0; + ret = nla_parse_nested(tb, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, nest, cable_test_tdr_act_cfg_policy, info->extack); if (ret < 0) @@ -242,17 +250,12 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]) cfg->first = nla_get_u32( tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]); - else - cfg->first = 100; + if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]) cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]); - else - cfg->last = MAX_CABLE_LENGTH_CM; if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]) cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]); - else - cfg->step = 100; if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) { cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]); @@ -263,8 +266,6 @@ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, "invalid pair parameter"); return -EINVAL; } - } else { - cfg->pair = PHY_PAIR_ALL; } if (cfg->first > MAX_CABLE_LENGTH_CM) { diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 423e640e3876..aaecfc916a4d 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -40,9 +40,11 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", + [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation", [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", + [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b5df90c981c2..21d5fc0f6bb3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2978,7 +2978,7 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) sizeof(match->mask.ipv6.dst)); } if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || - memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 7f47ba89054e..afe5ac8a0f00 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -78,19 +78,18 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, ret = linkstate_get_sqi(dev); if (ret < 0 && ret != -EOPNOTSUPP) - return ret; - + goto out; data->sqi = ret; ret = linkstate_get_sqi_max(dev); if (ret < 0 && ret != -EOPNOTSUPP) - return ret; - + goto out; data->sqi_max = ret; + ret = 0; +out: ethnl_ops_complete(dev); - - return 0; + return ret; } static int linkstate_reply_size(const struct ethnl_req_info *req_base, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 88fd07f47040..dd8a1c1dc07d 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -376,10 +376,17 @@ err_dev: } static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, - const struct ethnl_dump_ctx *ctx) + const struct ethnl_dump_ctx *ctx, + struct netlink_callback *cb) { + void *ehdr; int ret; + ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + ðtool_genl_family, 0, ctx->ops->reply_cmd); + if (!ehdr) + return -EMSGSIZE; + ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev); rtnl_lock(); ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL); @@ -395,6 +402,10 @@ out: if (ctx->ops->cleanup_data) ctx->ops->cleanup_data(ctx->reply_data); ctx->reply_data->dev = NULL; + if (ret < 0) + genlmsg_cancel(skb, ehdr); + else + genlmsg_end(skb, ehdr); return ret; } @@ -411,7 +422,6 @@ static int ethnl_default_dumpit(struct sk_buff *skb, int s_idx = ctx->pos_idx; int h, idx = 0; int ret = 0; - void *ehdr; rtnl_lock(); for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { @@ -431,26 +441,15 @@ restart_chain: dev_hold(dev); rtnl_unlock(); - ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - ðtool_genl_family, 0, - ctx->ops->reply_cmd); - if (!ehdr) { - dev_put(dev); - ret = -EMSGSIZE; - goto out; - } - ret = ethnl_default_dump_one(skb, dev, ctx); + ret = ethnl_default_dump_one(skb, dev, ctx, cb); dev_put(dev); if (ret < 0) { - genlmsg_cancel(skb, ehdr); if (ret == -EOPNOTSUPP) goto lock_and_cont; if (likely(skb->len)) ret = skb->len; goto out; } - genlmsg_end(skb, ehdr); lock_and_cont: rtnl_lock(); if (net->dev_base_seq != seq) { diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig index 9c58f8763997..8095b034e76e 100644 --- a/net/hsr/Kconfig +++ b/net/hsr/Kconfig @@ -5,7 +5,7 @@ config HSR tristate "High-availability Seamless Redundancy (HSR)" - ---help--- + help If you say Y here, then your Linux box will be able to act as a DANH ("Doubly attached node implementing HSR"). For this to work, your Linux box needs (at least) two physical Ethernet interfaces, diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index cd99f548e440..a6f4e9f65b14 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -339,7 +339,7 @@ static void hsr_announce(struct timer_list *t) rcu_read_unlock(); } -static void hsr_del_ports(struct hsr_priv *hsr) +void hsr_del_ports(struct hsr_priv *hsr) { struct hsr_port *port; @@ -356,31 +356,12 @@ static void hsr_del_ports(struct hsr_priv *hsr) hsr_del_port(port); } -/* This has to be called after all the readers are gone. - * Otherwise we would have to check the return value of - * hsr_port_get_hsr(). - */ -static void hsr_dev_destroy(struct net_device *hsr_dev) -{ - struct hsr_priv *hsr = netdev_priv(hsr_dev); - - hsr_debugfs_term(hsr); - hsr_del_ports(hsr); - - del_timer_sync(&hsr->prune_timer); - del_timer_sync(&hsr->announce_timer); - - hsr_del_self_node(hsr); - hsr_del_nodes(&hsr->node_db); -} - static const struct net_device_ops hsr_device_ops = { .ndo_change_mtu = hsr_dev_change_mtu, .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, .ndo_fix_features = hsr_fix_features, - .ndo_uninit = hsr_dev_destroy, }; static struct device_type hsr_type = { @@ -434,6 +415,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version, struct netlink_ext_ack *extack) { + bool unregister = false; struct hsr_priv *hsr; int res; @@ -485,25 +467,27 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_unregister; + unregister = true; + res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A, extack); if (res) - goto err_add_slaves; + goto err_unregister; res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B, extack); if (res) - goto err_add_slaves; + goto err_unregister; hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); return 0; -err_add_slaves: - unregister_netdevice(hsr_dev); err_unregister: hsr_del_ports(hsr); err_add_master: hsr_del_self_node(hsr); + if (unregister) + unregister_netdevice(hsr_dev); return res; } diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index a099d7de7e79..b8f9262ed101 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include "hsr_main.h" +void hsr_del_ports(struct hsr_priv *hsr); void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version, @@ -18,5 +19,4 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); - #endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index ed13760463de..1ea17752fffc 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -120,13 +120,18 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, return skb_clone(frame->skb_std, GFP_ATOMIC); } -static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, - struct hsr_port *port, u8 proto_version) +static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, + struct hsr_frame_info *frame, + struct hsr_port *port, u8 proto_version) { struct hsr_ethhdr *hsr_ethhdr; int lane_id; int lsdu_size; + /* pad to minimum packet size which is 60 + 6 (HSR tag) */ + if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) + return NULL; + if (port->type == HSR_PT_SLAVE_A) lane_id = 0; else @@ -144,6 +149,8 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; hsr_ethhdr->ethhdr.h_proto = htons(proto_version ? ETH_P_HSR : ETH_P_PRP); + + return skb; } static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, @@ -172,9 +179,10 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, memmove(dst, src, movelen); skb_reset_mac_header(skb); - hsr_fill_tag(skb, frame, port, port->hsr->prot_version); - - return skb; + /* skb_put_padto free skb on error and hsr_fill_tag returns NULL in + * that case + */ + return hsr_fill_tag(skb, frame, port, port->hsr->prot_version); } /* If the original frame was an HSR tagged frame, just clone it to be sent diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 03b891904314..530de24b1fb5 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -325,7 +325,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, if (port->type != node_dst->addr_B_port) return; - ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); + if (is_valid_ether_addr(node_dst->macaddress_B)) + ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); } void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index e2564de67603..144da15f0a81 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -6,6 +6,7 @@ */ #include <linux/netdevice.h> +#include <net/rtnetlink.h> #include <linux/rculist.h> #include <linux/timer.h> #include <linux/etherdevice.h> @@ -100,8 +101,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER); hsr_del_port(port); if (hsr_slave_empty(master->hsr)) { - unregister_netdevice_queue(master->dev, - &list_kill); + const struct rtnl_link_ops *ops; + + ops = master->dev->rtnl_link_ops; + ops->dellink(master->dev, &list_kill); unregister_netdevice_many(&list_kill); } } @@ -144,9 +147,9 @@ static int __init hsr_init(void) static void __exit hsr_exit(void) { - unregister_netdevice_notifier(&hsr_nb); hsr_netlink_exit(); hsr_debugfs_remove_root(); + unregister_netdevice_notifier(&hsr_nb); } module_init(hsr_init); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 1decb25f6764..6e14b7d22639 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -83,6 +83,22 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack); } +static void hsr_dellink(struct net_device *dev, struct list_head *head) +{ + struct hsr_priv *hsr = netdev_priv(dev); + + del_timer_sync(&hsr->prune_timer); + del_timer_sync(&hsr->announce_timer); + + hsr_debugfs_term(hsr); + hsr_del_ports(hsr); + + hsr_del_self_node(hsr); + hsr_del_nodes(&hsr->node_db); + + unregister_netdevice_queue(dev, head); +} + static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct hsr_priv *hsr = netdev_priv(dev); @@ -118,6 +134,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, + .dellink = hsr_dellink, .fill_info = hsr_fill_info, }; diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig index d1b4655a6d43..e808e4db2678 100644 --- a/net/ieee802154/6lowpan/Kconfig +++ b/net/ieee802154/6lowpan/Kconfig @@ -2,5 +2,5 @@ config IEEE802154_6LOWPAN tristate "6lowpan support over IEEE 802.15.4" depends on 6LOWPAN - ---help--- + help IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 5dbbc2ca95b4..bcb05ba97686 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig IEEE802154 tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support" - ---help--- + help IEEE Std 802.15.4 defines a low data rate, low power and low complexity short range wireless personal area networks. It was designed to organise networks of sensors, switches, etc automation @@ -15,13 +15,13 @@ if IEEE802154 config IEEE802154_NL802154_EXPERIMENTAL bool "IEEE 802.15.4 experimental netlink support" - ---help--- + help Adds experimental netlink support for nl802154. config IEEE802154_SOCKET tristate "IEEE 802.15.4 socket interface" default y - ---help--- + help Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface for 802.15.4 dataframes. Also RAW socket interface to build MAC header from userspace. diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 23ba5045e3d3..e64e59b536d3 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -14,7 +14,7 @@ config IP_MULTICAST config IP_ADVANCED_ROUTER bool "IP: advanced router" - ---help--- + help If you intend to run your Linux box mostly as a router, i.e. as a computer that forwards and redistributes network packets, say Y; you will then be presented with several options that allow more precise @@ -56,7 +56,7 @@ config IP_ADVANCED_ROUTER config IP_FIB_TRIE_STATS bool "FIB TRIE statistics" depends on IP_ADVANCED_ROUTER - ---help--- + help Keep track of statistics on structure of FIB TRIE table. Useful for testing and measuring TRIE performance. @@ -64,7 +64,7 @@ config IP_MULTIPLE_TABLES bool "IP: policy routing" depends on IP_ADVANCED_ROUTER select FIB_RULES - ---help--- + help Normally, a router decides what to do with a received packet based solely on the packet's final destination address. If you say Y here, the Linux router will also be able to take the packet's source @@ -117,7 +117,7 @@ config IP_PNP config IP_PNP_DHCP bool "IP: DHCP support" depends on IP_PNP - ---help--- + help If you want your Linux box to mount its whole root file system (the one containing the directory /) from some other computer over the net via NFS and you want the IP address of your computer to be @@ -134,7 +134,7 @@ config IP_PNP_DHCP config IP_PNP_BOOTP bool "IP: BOOTP support" depends on IP_PNP - ---help--- + help If you want your Linux box to mount its whole root file system (the one containing the directory /) from some other computer over the net via NFS and you want the IP address of your computer to be @@ -163,7 +163,7 @@ config NET_IPIP tristate "IP: tunneling" select INET_TUNNEL select NET_IP_TUNNEL - ---help--- + help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the encapsulating protocol. This particular tunneling driver implements @@ -267,7 +267,7 @@ config IP_PIMSM_V2 config SYN_COOKIES bool "IP: TCP syncookie support" - ---help--- + help Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote users from being able to connect to your computer during an ongoing @@ -307,7 +307,7 @@ config NET_IPVTI select INET_TUNNEL select NET_IP_TUNNEL select XFRM - ---help--- + help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the encapsulating protocol. This can be used with xfrm mode tunnel to give @@ -323,7 +323,7 @@ config NET_FOU tristate "IP: Foo (IP protocols) over UDP" select XFRM select NET_UDP_TUNNEL - ---help--- + help Foo over UDP allows any IP protocol to be directly encapsulated over UDP include tunnels (IPIP, GRE, SIT). By encapsulating in UDP network mechanisms and optimizations for UDP (such as ECMP @@ -333,36 +333,38 @@ config NET_FOU_IP_TUNNELS bool "IP: FOU encapsulation of IP tunnels" depends on NET_IPIP || NET_IPGRE || IPV6_SIT select NET_FOU - ---help--- + help Allow configuration of FOU or GUE encapsulation for IP tunnels. When this option is enabled IP tunnels can be configured to use FOU or GUE encapsulation. config INET_AH tristate "IP: AH transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_SHA1 - ---help--- - Support for IPsec AH. + select XFRM_AH + help + Support for IPsec AH (Authentication Header). + + AH can be used with various authentication algorithms. Besides + enabling AH support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. config INET_ESP tristate "IP: ESP transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_AUTHENC - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_CBC - select CRYPTO_SHA1 - select CRYPTO_DES - select CRYPTO_ECHAINIV - ---help--- - Support for IPsec ESP. + select XFRM_ESP + help + Support for IPsec ESP (Encapsulating Security Payload). + + ESP can be used with various encryption and authentication algorithms. + Besides enabling ESP support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. @@ -371,7 +373,7 @@ config INET_ESP_OFFLOAD depends on INET_ESP select XFRM_OFFLOAD default n - ---help--- + help Support for ESP transformation offload. This makes sense only if this system really does IPsec and want to do it with high throughput. A typical desktop system does not @@ -395,7 +397,7 @@ config INET_IPCOMP tristate "IP: IPComp transformation" select INET_XFRM_TUNNEL select XFRM_IPCOMP - ---help--- + help Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. @@ -413,7 +415,7 @@ config INET_TUNNEL config INET_DIAG tristate "INET: socket monitoring interface" default y - ---help--- + help Support for INET (TCP, DCCP, etc) socket monitoring interface used by native Linux tools such as ss. ss is included in iproute2, currently downloadable at: @@ -430,7 +432,7 @@ config INET_UDP_DIAG tristate "UDP: socket monitoring interface" depends on INET_DIAG && (IPV6 || IPV6=n) default n - ---help--- + help Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. @@ -438,7 +440,7 @@ config INET_RAW_DIAG tristate "RAW: socket monitoring interface" depends on INET_DIAG && (IPV6 || IPV6=n) default n - ---help--- + help Support for RAW socket monitoring interface used by the ss tool. If unsure, say Y. @@ -446,7 +448,7 @@ config INET_DIAG_DESTROY bool "INET: allow privileged process to administratively close sockets" depends on INET_DIAG default n - ---help--- + help Provides a SOCK_DESTROY operation that allows privileged processes (e.g., a connection manager or a network administration tool such as ss) to close sockets opened by other processes. Closing a socket in @@ -457,7 +459,7 @@ config INET_DIAG_DESTROY menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" - ---help--- + help Support for selection of various TCP congestion control modules. @@ -471,7 +473,7 @@ if TCP_CONG_ADVANCED config TCP_CONG_BIC tristate "Binary Increase Congestion (BIC) control" default m - ---help--- + help BIC-TCP is a sender-side only change that ensures a linear RTT fairness under large windows while offering both scalability and bounded TCP-friendliness. The protocol combines two schemes @@ -485,7 +487,7 @@ config TCP_CONG_BIC config TCP_CONG_CUBIC tristate "CUBIC TCP" default y - ---help--- + help This is version 2.0 of BIC-TCP which uses a cubic growth function among other techniques. See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf @@ -493,7 +495,7 @@ config TCP_CONG_CUBIC config TCP_CONG_WESTWOOD tristate "TCP Westwood+" default m - ---help--- + help TCP Westwood+ is a sender-side only modification of the TCP Reno protocol stack that optimizes the performance of TCP congestion control. It is based on end-to-end bandwidth estimation to set @@ -507,7 +509,7 @@ config TCP_CONG_WESTWOOD config TCP_CONG_HTCP tristate "H-TCP" default m - ---help--- + help H-TCP is a send-side only modifications of the TCP Reno protocol stack that optimizes the performance of TCP congestion control for high speed network links. It uses a @@ -518,7 +520,7 @@ config TCP_CONG_HTCP config TCP_CONG_HSTCP tristate "High Speed TCP" default n - ---help--- + help Sally Floyd's High Speed TCP (RFC 3649) congestion control. A modification to TCP's congestion control mechanism for use with large congestion windows. A table indicates how much to @@ -528,7 +530,7 @@ config TCP_CONG_HSTCP config TCP_CONG_HYBLA tristate "TCP-Hybla congestion control algorithm" default n - ---help--- + help TCP-Hybla is a sender-side only change that eliminates penalization of long-RTT, large-bandwidth connections, like when satellite legs are involved, especially when sharing a common bottleneck with normal @@ -537,7 +539,7 @@ config TCP_CONG_HYBLA config TCP_CONG_VEGAS tristate "TCP Vegas" default n - ---help--- + help TCP Vegas is a sender-side only change to TCP that anticipates the onset of congestion by estimating the bandwidth. TCP Vegas adjusts the sending rate by modifying the congestion @@ -547,7 +549,7 @@ config TCP_CONG_VEGAS config TCP_CONG_NV tristate "TCP NV" default n - ---help--- + help TCP NV is a follow up to TCP Vegas. It has been modified to deal with 10G networks, measurement noise introduced by LRO, GRO and interrupt coalescence. In addition, it will decrease its cwnd multiplicatively @@ -563,7 +565,7 @@ config TCP_CONG_NV config TCP_CONG_SCALABLE tristate "Scalable TCP" default n - ---help--- + help Scalable TCP is a sender-side only change to TCP which uses a MIMD congestion control algorithm which has some nice scaling properties, though is known to have fairness issues. @@ -572,7 +574,7 @@ config TCP_CONG_SCALABLE config TCP_CONG_LP tristate "TCP Low Priority" default n - ---help--- + help TCP Low Priority (TCP-LP), a distributed algorithm whose goal is to utilize only the excess network bandwidth as compared to the ``fair share`` of bandwidth as targeted by TCP. @@ -581,7 +583,7 @@ config TCP_CONG_LP config TCP_CONG_VENO tristate "TCP Veno" default n - ---help--- + help TCP Veno is a sender-side only enhancement of TCP to obtain better throughput over wireless networks. TCP Veno makes use of state distinguishing to circumvent the difficult judgment of the packet loss @@ -593,7 +595,7 @@ config TCP_CONG_YEAH tristate "YeAH TCP" select TCP_CONG_VEGAS default n - ---help--- + help YeAH-TCP is a sender-side high-speed enabled TCP congestion control algorithm, which uses a mixed loss/delay approach to compute the congestion window. It's design goals target high efficiency, @@ -606,7 +608,7 @@ config TCP_CONG_YEAH config TCP_CONG_ILLINOIS tristate "TCP Illinois" default n - ---help--- + help TCP-Illinois is a sender-side modification of TCP Reno for high speed long delay links. It uses round-trip-time to adjust the alpha and beta parameters to achieve a higher average @@ -618,7 +620,7 @@ config TCP_CONG_ILLINOIS config TCP_CONG_DCTCP tristate "DataCenter TCP (DCTCP)" default n - ---help--- + help DCTCP leverages Explicit Congestion Notification (ECN) in the network to provide multi-bit feedback to the end hosts. It is designed to provide: @@ -639,7 +641,7 @@ config TCP_CONG_DCTCP config TCP_CONG_CDG tristate "CAIA Delay-Gradient (CDG)" default n - ---help--- + help CAIA Delay-Gradient (CDG) is a TCP congestion control that modifies the TCP sender in order to: @@ -655,7 +657,7 @@ config TCP_CONG_CDG config TCP_CONG_BBR tristate "BBR TCP" default n - ---help--- + help BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to maximize network utilization and minimize queues. It builds an explicit @@ -734,7 +736,7 @@ config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO select CRYPTO_MD5 - ---help--- + help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers on the Internet. diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 0480918bfc7c..9063c6767d34 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -12,15 +12,14 @@ struct bpfilter_umh_ops bpfilter_ops; EXPORT_SYMBOL_GPL(bpfilter_ops); -static void bpfilter_umh_cleanup(struct umh_info *info) +void bpfilter_umh_cleanup(struct umd_info *info) { - mutex_lock(&bpfilter_ops.lock); - bpfilter_ops.stop = true; fput(info->pipe_to_umh); fput(info->pipe_from_umh); - info->pid = 0; - mutex_unlock(&bpfilter_ops.lock); + put_pid(info->tgid); + info->tgid = NULL; } +EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup); static int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, @@ -38,7 +37,11 @@ static int bpfilter_mbox_request(struct sock *sk, int optname, goto out; } } - if (bpfilter_ops.stop) { + if (bpfilter_ops.info.tgid && + thread_group_exited(bpfilter_ops.info.tgid)) + bpfilter_umh_cleanup(&bpfilter_ops.info); + + if (!bpfilter_ops.info.tgid) { err = bpfilter_ops.start(); if (err) goto out; @@ -69,9 +72,8 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, static int __init bpfilter_sockopt_init(void) { mutex_init(&bpfilter_ops.lock); - bpfilter_ops.stop = true; - bpfilter_ops.info.cmdline = "bpfilter_umh"; - bpfilter_ops.info.cleanup = &bpfilter_umh_cleanup; + bpfilter_ops.info.tgid = NULL; + bpfilter_ops.info.driver_name = "bpfilter_umh"; return 0; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index d14133eac476..5bda5aeda579 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -361,3 +361,4 @@ module_exit(esp4_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP); +MODULE_DESCRIPTION("IPV4 GSO/GRO offload support"); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e53871e4a097..1f75dc686b6b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1109,7 +1109,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; - if (table) + if (table && table != RT_TABLE_MAIN) tbl = fib_get_table(net, table); if (tbl) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 248f1c1959a6..3c65f71d0e82 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1864,7 +1864,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) while ((l = leaf_walk_rcu(&tp, key)) != NULL) { struct key_vector *local_l = NULL, *local_tp; - hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + hlist_for_each_entry(fa, &l->leaf, fa_list) { struct fib_alias *new_fa; if (local_tb->tb_id != fa->tb_id) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index dcc79ff54b41..abd083415f89 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1304,3 +1304,4 @@ module_init(fou_init); module_exit(fou_fini); MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Foo over UDP"); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 956a806649f7..e30515f89802 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ipcm_init(&ipc); inet->tos = ip_hdr(skb)->tos; - sk->sk_mark = mark; + ipc.sockc.mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); @@ -710,10 +710,10 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; - sk->sk_mark = mark; ipcm_init(&ipc); ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; + ipc.sockc.mark = mark; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 090d3097ee15..17206677d503 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1702,7 +1702,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; sk->sk_sndbuf = sysctl_wmem_default; - sk->sk_mark = fl4.flowi4_mark; + ipc.sockc.mark = fl4.flowi4_mark; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); if (unlikely(err)) { diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index f4f1d11eab50..0c1f36404471 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, __be32 remote, __be32 local, __be32 key) { - unsigned int hash; struct ip_tunnel *t, *cand = NULL; struct hlist_head *head; + struct net_device *ndev; + unsigned int hash; hash = ip_tunnel_hash(key, remote); head = &itn->tunnels[hash]; @@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, if (t && t->dev->flags & IFF_UP) return t; - if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) - return netdev_priv(itn->fb_tunnel_dev); + ndev = READ_ONCE(itn->fb_tunnel_dev); + if (ndev && ndev->flags & IFF_UP) + return netdev_priv(ndev); return NULL; } @@ -1259,9 +1261,9 @@ void ip_tunnel_uninit(struct net_device *dev) struct ip_tunnel_net *itn; itn = net_generic(net, tunnel->ip_tnl_net_id); - /* fb_tunnel_dev will be unregisted in net-exit call. */ - if (itn->fb_tunnel_dev != dev) - ip_tunnel_del(itn, netdev_priv(dev)); + ip_tunnel_del(itn, netdev_priv(dev)); + if (itn->fb_tunnel_dev == dev) + WRITE_ONCE(itn->fb_tunnel_dev, NULL); dst_cache_reset(&tunnel->dst_cache); } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 181b7a2a0247..f8b419e2475c 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void) static_branch_dec(&ip_tunnel_metadata_cnt); } EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} +EXPORT_SYMBOL(ip_tunnel_parse_protocol); + +const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; +EXPORT_SYMBOL(ip_tunnel_header_ops); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1d9c8cff5ac3..460ca1099e8a 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -441,6 +441,7 @@ static const struct net_device_ops vti_netdev_ops = { static void vti_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &vti_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_TUNNEL; ip_tunnel_setup(dev, vti_net_id); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 40fea52c8277..75d35e76bec2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -361,6 +361,7 @@ static const struct net_device_ops ipip_netdev_ops = { static void ipip_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_TUNNEL; dev->flags = IFF_NOARP; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f17b402111ce..a2f4f894be2b 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -94,7 +94,7 @@ config NF_NAT_SNMP_BASIC depends on NETFILTER_ADVANCED default NF_NAT && NF_CONNTRACK_SNMP select ASN1 - ---help--- + help This module implements an Application Layer Gateway (ALG) for SNMP payloads. In conjunction with NAT, it allows a network @@ -146,7 +146,7 @@ config IP_NF_MATCH_ECN tristate '"ecn" match support' depends on NETFILTER_ADVANCED select NETFILTER_XT_MATCH_ECN - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MATCH_ECN. @@ -155,7 +155,7 @@ config IP_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' depends on NETFILTER_ADVANCED depends on IP_NF_MANGLE || IP_NF_RAW - ---help--- + help This option allows you to match packets whose replies would go out via the interface the packet came in. @@ -166,7 +166,7 @@ config IP_NF_MATCH_TTL tristate '"ttl" match support' depends on NETFILTER_ADVANCED select NETFILTER_XT_MATCH_HL - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MATCH_HL. @@ -234,7 +234,7 @@ config IP_NF_TARGET_NETMAP tristate "NETMAP target support" depends on NETFILTER_ADVANCED select NETFILTER_XT_TARGET_NETMAP - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_NETMAP. @@ -243,7 +243,7 @@ config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" depends on NETFILTER_ADVANCED select NETFILTER_XT_TARGET_REDIRECT - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_REDIRECT. @@ -279,7 +279,7 @@ config IP_NF_TARGET_ECN tristate "ECN target support" depends on IP_NF_MANGLE depends on NETFILTER_ADVANCED - ---help--- + help This option adds a `ECN' target, which can be used in the iptables mangle table. @@ -294,7 +294,7 @@ config IP_NF_TARGET_TTL tristate '"TTL" target support' depends on NETFILTER_ADVANCED && IP_NF_MANGLE select NETFILTER_XT_TARGET_HL - ---help--- + help This is a backwards-compatible option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c2670eaa74e6..5bf9fa06aee0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1797,11 +1797,22 @@ out_free: return ret; } +void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) +{ + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} + +void ipt_unregister_table_exit(struct net *net, struct xt_table *table) +{ + __ipt_unregister_table(net, table); +} + void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + ipt_unregister_table_pre_exit(net, table, ops); __ipt_unregister_table(net, table); } @@ -1958,6 +1969,8 @@ static void __exit ip_tables_fini(void) EXPORT_SYMBOL(ipt_register_table); EXPORT_SYMBOL(ipt_unregister_table); +EXPORT_SYMBOL(ipt_unregister_table_pre_exit); +EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); module_exit(ip_tables_fini); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 748dc3ce58d3..f2984c7eef40 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -118,3 +118,4 @@ module_exit(synproxy_tg4_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Intercept TCP connections and establish them using syncookies"); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 9d54b4017e50..8f7bc1ee7453 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -72,16 +72,24 @@ static int __net_init iptable_filter_net_init(struct net *net) return 0; } +static void __net_exit iptable_filter_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_filter) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_filter, + filter_ops); +} + static void __net_exit iptable_filter_net_exit(struct net *net) { if (!net->ipv4.iptable_filter) return; - ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_filter); net->ipv4.iptable_filter = NULL; } static struct pernet_operations iptable_filter_net_ops = { .init = iptable_filter_net_init, + .pre_exit = iptable_filter_net_pre_exit, .exit = iptable_filter_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index bb9266ea3785..f703a717ab1d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -100,15 +100,23 @@ static int __net_init iptable_mangle_table_init(struct net *net) return ret; } +static void __net_exit iptable_mangle_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_mangle) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_mangle, + mangle_ops); +} + static void __net_exit iptable_mangle_net_exit(struct net *net) { if (!net->ipv4.iptable_mangle) return; - ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_mangle); net->ipv4.iptable_mangle = NULL; } static struct pernet_operations iptable_mangle_net_ops = { + .pre_exit = iptable_mangle_net_pre_exit, .exit = iptable_mangle_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index ad33687b7444..b0143b109f25 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -113,16 +113,22 @@ static int __net_init iptable_nat_table_init(struct net *net) return ret; } +static void __net_exit iptable_nat_net_pre_exit(struct net *net) +{ + if (net->ipv4.nat_table) + ipt_nat_unregister_lookups(net); +} + static void __net_exit iptable_nat_net_exit(struct net *net) { if (!net->ipv4.nat_table) return; - ipt_nat_unregister_lookups(net); - ipt_unregister_table(net, net->ipv4.nat_table, NULL); + ipt_unregister_table_exit(net, net->ipv4.nat_table); net->ipv4.nat_table = NULL; } static struct pernet_operations iptable_nat_net_ops = { + .pre_exit = iptable_nat_net_pre_exit, .exit = iptable_nat_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 69697eb4bfc6..9abfe6bf2cb9 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -67,15 +67,23 @@ static int __net_init iptable_raw_table_init(struct net *net) return ret; } +static void __net_exit iptable_raw_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_raw) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_raw, + rawtable_ops); +} + static void __net_exit iptable_raw_net_exit(struct net *net) { if (!net->ipv4.iptable_raw) return; - ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_raw); net->ipv4.iptable_raw = NULL; } static struct pernet_operations iptable_raw_net_ops = { + .pre_exit = iptable_raw_net_pre_exit, .exit = iptable_raw_net_exit, }; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index ac633c1db97e..415c1975d770 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -62,16 +62,23 @@ static int __net_init iptable_security_table_init(struct net *net) return ret; } +static void __net_exit iptable_security_net_pre_exit(struct net *net) +{ + if (net->ipv4.iptable_security) + ipt_unregister_table_pre_exit(net, net->ipv4.iptable_security, + sectbl_ops); +} + static void __net_exit iptable_security_net_exit(struct net *net) { if (!net->ipv4.iptable_security) return; - - ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops); + ipt_unregister_table_exit(net, net->ipv4.iptable_security); net->ipv4.iptable_security = NULL; } static struct pernet_operations iptable_security_net_ops = { + .pre_exit = iptable_security_net_pre_exit, .exit = iptable_security_net_exit, }; diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c index e32e41b99f0f..aba65fe90345 100644 --- a/net/ipv4/netfilter/nf_flow_table_ipv4.c +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c @@ -34,3 +34,4 @@ module_exit(nf_flow_ipv4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NF_FLOWTABLE(AF_INET); +MODULE_DESCRIPTION("Netfilter flow table support"); diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index c94445b44d8c..2d42e4c35a20 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -84,11 +84,11 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { - __be32 uninitialized_var(daddr), uninitialized_var(saddr); - __be16 uninitialized_var(dport), uninitialized_var(sport); + __be32 daddr, saddr; + __be16 dport, sport; const struct iphdr *iph = ip_hdr(skb); struct sk_buff *data_skb = NULL; - u8 uninitialized_var(protocol); + u8 protocol; #if IS_ENABLED(CONFIG_NF_CONNTRACK) enum ip_conntrack_info ctinfo; struct nf_conn const *ct; diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index abf89b972094..bcdb37f86a94 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -107,3 +107,4 @@ module_exit(nft_dup_ipv4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup"); +MODULE_DESCRIPTION("IPv4 nftables packet duplication support"); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index ce294113dbcd..03df986217b7 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -210,3 +210,4 @@ module_exit(nft_fib4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); +MODULE_DESCRIPTION("nftables fib / ip route lookup support"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 7e6fd5cde50f..e408f813f5d8 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -71,3 +71,4 @@ module_exit(nft_reject_ipv4_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); +MODULE_DESCRIPTION("IPv4 packet rejection for nftables"); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 400a9f89ebdb..cc8049b100b2 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -247,12 +247,11 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_u32(skb, NHA_ID, nh->id)) goto nla_put_failure; - if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) - goto nla_put_failure; - if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; if (nla_put_nh_group(skb, nhg)) goto nla_put_failure; goto out; @@ -264,7 +263,10 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_flag(skb, NHA_BLACKHOLE)) goto nla_put_failure; goto out; - } else if (!nh->is_fdb_nh) { + } else if (nhi->fdb_nh) { + if (nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; + } else { const struct net_device *dev; dev = nhi->fib_nhc.nhc_dev; @@ -385,7 +387,7 @@ errout: } static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, - struct netlink_ext_ack *extack) + bool *is_fdb, struct netlink_ext_ack *extack) { if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); @@ -398,6 +400,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, "Multipath group can not be a nexthop within a group"); return false; } + *is_fdb = nhg->fdb_nh; } else { struct nh_info *nhi = rtnl_dereference(nh->nh_info); @@ -406,6 +409,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, "Blackhole nexthop can not be used in a group with more than 1 path"); return false; } + *is_fdb = nhi->fdb_nh; } return true; @@ -416,12 +420,13 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, { struct nh_info *nhi; - if (!nh->is_fdb_nh) { + nhi = rtnl_dereference(nh->nh_info); + + if (!nhi->fdb_nh) { NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); return -EINVAL; } - nhi = rtnl_dereference(nh->nh_info); if (*nh_family == AF_UNSPEC) { *nh_family = nhi->family; } else if (*nh_family != nhi->family) { @@ -473,19 +478,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { struct nexthop *nh; + bool is_fdb_nh; nh = nexthop_find_by_id(net, nhg[i].id); if (!nh) { NL_SET_ERR_MSG(extack, "Invalid nexthop id"); return -EINVAL; } - if (!valid_group_nh(nh, len, extack)) + if (!valid_group_nh(nh, len, &is_fdb_nh, extack)) return -EINVAL; if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) return -EINVAL; - if (!nhg_fdb && nh->is_fdb_nh) { + if (!nhg_fdb && is_fdb_nh) { NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); return -EINVAL; } @@ -553,13 +559,13 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) if (hash > atomic_read(&nhge->upper_bound)) continue; - if (nhge->nh->is_fdb_nh) + nhi = rcu_dereference(nhge->nh->nh_info); + if (nhi->fdb_nh) return nhge->nh; /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ - nhi = rcu_dereference(nhge->nh->nh_info); switch (nhi->family) { case AF_INET: if (ipv4_good_nh(&nhi->fib_nh)) @@ -624,11 +630,7 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack) { struct nh_info *nhi; - - if (nh->is_fdb_nh) { - NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); - return -EINVAL; - } + bool is_fdb_nh; /* fib6_src is unique to a fib6_info and limits the ability to cache * routes in fib6_nh within a nexthop that is potentially shared @@ -645,10 +647,17 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, nhg = rtnl_dereference(nh->nh_grp); if (nhg->has_v4) goto no_v4_nh; + is_fdb_nh = nhg->fdb_nh; } else { nhi = rtnl_dereference(nh->nh_info); if (nhi->family == AF_INET) goto no_v4_nh; + is_fdb_nh = nhi->fdb_nh; + } + + if (is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + return -EINVAL; } return 0; @@ -677,12 +686,9 @@ static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, return fib6_check_nexthop(new, NULL, extack); } -static int nexthop_check_scope(struct nexthop *nh, u8 scope, +static int nexthop_check_scope(struct nh_info *nhi, u8 scope, struct netlink_ext_ack *extack) { - struct nh_info *nhi; - - nhi = rtnl_dereference(nh->nh_info); if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); @@ -704,29 +710,38 @@ static int nexthop_check_scope(struct nexthop *nh, u8 scope, int fib_check_nexthop(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack) { + struct nh_info *nhi; int err = 0; - if (nh->is_fdb_nh) { - NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); - err = -EINVAL; - goto out; - } - if (nh->is_group) { struct nh_group *nhg; + nhg = rtnl_dereference(nh->nh_grp); + if (nhg->fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + if (scope == RT_SCOPE_HOST) { NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); err = -EINVAL; goto out; } - nhg = rtnl_dereference(nh->nh_grp); /* all nexthops in a group have the same scope */ - err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack); + nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info); + err = nexthop_check_scope(nhi, scope, extack); } else { - err = nexthop_check_scope(nh, scope, extack); + nhi = rtnl_dereference(nh->nh_info); + if (nhi->fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + err = nexthop_check_scope(nhi, scope, extack); } + out: return err; } @@ -787,6 +802,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, newg->has_v4 = nhg->has_v4; newg->mpath = nhg->mpath; + newg->fdb_nh = nhg->fdb_nh; newg->num_nh = nhg->num_nh; /* copy old entries to new except the one getting removed */ @@ -1216,7 +1232,7 @@ static struct nexthop *nexthop_create_group(struct net *net, } if (cfg->nh_fdb) - nh->is_fdb_nh = 1; + nhg->fdb_nh = 1; rcu_assign_pointer(nh->nh_grp, nhg); @@ -1255,7 +1271,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, goto out; } - if (nh->is_fdb_nh) + if (nhi->fdb_nh) goto out; /* sets nh_dev if successful */ @@ -1326,7 +1342,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; if (cfg->nh_fdb) - nh->is_fdb_nh = 1; + nhi->fdb_nh = 1; if (cfg->nh_blackhole) { nhi->reject_nh = 1; @@ -1349,7 +1365,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, } /* add the entry to the device based hash */ - if (!nh->is_fdb_nh) + if (!nhi->fdb_nh) nexthop_devhash_add(net, nhi); rcu_assign_pointer(nh->nh_info, nhi); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 535427292194..df6fbefe44d4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -786,6 +786,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); + fl4.fl4_icmp_type = user_icmph.type; + fl4.fl4_icmp_code = user_icmph.code; + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1d7076b78e63..a01efa062f6b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2027,7 +2027,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct sk_buff *hint) { struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rt = (struct rtable *)hint; + struct rtable *rt = skb_rtable(hint); struct net *net = dev_net(dev); int err = -EINVAL; u32 tag = 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 27716e4932bc..6f0caf9a866d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1742,14 +1742,48 @@ int tcp_mmap(struct file *file, struct socket *sock, } EXPORT_SYMBOL(tcp_mmap); +static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, + struct page **pages, + unsigned long pages_to_map, + unsigned long *insert_addr, + u32 *length_with_pending, + u32 *seq, + struct tcp_zerocopy_receive *zc) +{ + unsigned long pages_remaining = pages_to_map; + int bytes_mapped; + int ret; + + ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining); + bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining); + /* Even if vm_insert_pages fails, it may have partially succeeded in + * mapping (some but not all of the pages). + */ + *seq += bytes_mapped; + *insert_addr += bytes_mapped; + if (ret) { + /* But if vm_insert_pages did fail, we have to unroll some state + * we speculatively touched before. + */ + const int bytes_not_mapped = PAGE_SIZE * pages_remaining; + *length_with_pending -= bytes_not_mapped; + zc->recv_skip_hint += bytes_not_mapped; + } + return ret; +} + static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { unsigned long address = (unsigned long)zc->address; u32 length = 0, seq, offset, zap_len; + #define PAGE_BATCH_SIZE 8 + struct page *pages[PAGE_BATCH_SIZE]; const skb_frag_t *frags = NULL; struct vm_area_struct *vma; struct sk_buff *skb = NULL; + unsigned long pg_idx = 0; + unsigned long curr_addr; struct tcp_sock *tp; int inq; int ret; @@ -1762,6 +1796,8 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); + tp = tcp_sk(sk); + mmap_read_lock(current->mm); vma = find_vma(current->mm, address); @@ -1771,7 +1807,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); - tp = tcp_sk(sk); seq = tp->copied_seq; inq = tcp_inq(sk); zc->length = min_t(u32, zc->length, inq); @@ -1783,8 +1818,20 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint = zc->length; } ret = 0; + curr_addr = address; while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { + /* If we're here, finish the current batch. */ + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, + pg_idx, + &curr_addr, + &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } if (skb) { if (zc->recv_skip_hint > 0) break; @@ -1793,7 +1840,6 @@ static int tcp_zerocopy_receive(struct sock *sk, } else { skb = tcp_recv_skb(sk, seq, &offset); } - zc->recv_skip_hint = skb->len - offset; offset -= skb_headlen(skb); if ((int)offset < 0 || skb_has_frag_list(skb)) @@ -1817,14 +1863,24 @@ static int tcp_zerocopy_receive(struct sock *sk, zc->recv_skip_hint -= remaining; break; } - ret = vm_insert_page(vma, address + length, - skb_frag_page(frags)); - if (ret) - break; + pages[pg_idx] = skb_frag_page(frags); + pg_idx++; length += PAGE_SIZE; - seq += PAGE_SIZE; zc->recv_skip_hint -= PAGE_SIZE; frags++; + if (pg_idx == PAGE_BATCH_SIZE) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, + &seq, zc); + if (ret) + goto out; + pg_idx = 0; + } + } + if (pg_idx) { + ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx, + &curr_addr, &length, &seq, + zc); } out: mmap_read_unlock(current->mm); @@ -2635,6 +2691,9 @@ int tcp_disconnect(struct sock *sk, int flags) tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); @@ -3190,10 +3249,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: - if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) - err = tp->af_specific->md5_parse(sk, optname, optval, optlen); - else - err = -EINVAL; + err = tp->af_specific->md5_parse(sk, optname, optval, optlen); break; #endif case TCP_USER_TIMEOUT: @@ -3977,11 +4033,14 @@ EXPORT_SYMBOL(tcp_md5_hash_skb_data); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { + u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ struct scatterlist sg; - sg_init_one(&sg, key->key, key->keylen); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen); - return crypto_ahash_update(hp->md5_req); + sg_init_one(&sg, key->key, keylen); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); + + /* We use data_race() because tcp_md5_do_add() might change key->key under us */ + return data_race(crypto_ahash_update(hp->md5_req)); } EXPORT_SYMBOL(tcp_md5_hash_key); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 629aaa9a1eb9..7aa68f4aae6c 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -64,6 +64,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, } while (i != msg_rx->sg.end); if (unlikely(peek)) { + if (msg_rx == list_last_entry(&psock->ingress_msg, + struct sk_msg, list)) + break; msg_rx = list_next_entry(msg_rx, list); continue; } @@ -242,6 +245,9 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, DEFINE_WAIT_FUNC(wait, woken_wake_function); int ret = 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + if (!timeo) return ret; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 3172e31987be..62878cf26d9c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -197,7 +197,7 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_setsockopt = 1; memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - if (sk->sk_state != TCP_CLOSE) + if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) tcp_init_congestion_control(sk); } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 8f8eefd3a3ce..c7bf5b26bf0c 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -432,10 +432,9 @@ static void hystart_update(struct sock *sk, u32 delay) if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ + if (ca->curr_rtt > delay) + ca->curr_rtt = delay; if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { - if (ca->curr_rtt > delay) - ca->curr_rtt = delay; - ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 83330a6cb242..518f04355fbf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -261,7 +261,8 @@ static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) * cwnd may be very low (even just 1 packet), so we should ACK * immediately. */ - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; } } @@ -3487,10 +3488,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } } -/* This routine deals with acks during a TLP episode. - * We mark the end of a TLP episode on receiving TLP dupack or when - * ack is after tlp_high_seq. - * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. +/* This routine deals with acks during a TLP episode and ends an episode by + * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { @@ -3499,7 +3498,10 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) if (before(ack, tp->tlp_high_seq)) return; - if (flag & FLAG_DSACKING_ACK) { + if (!tp->tlp_retrans) { + /* TLP of new data has been acknowledged */ + tp->tlp_high_seq = 0; + } else if (flag & FLAG_DSACKING_ACK) { /* This DSACK means original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; } else if (after(ack, tp->tlp_high_seq)) { @@ -3665,6 +3667,15 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_in_ack_event(sk, ack_ev_flags); } + /* This is a deviation from RFC3168 since it states that: + * "When the TCP data sender is ready to set the CWR bit after reducing + * the congestion window, it SHOULD set the CWR bit only on the first + * new data packet that it transmits." + * We accept CWR on pure ACKs to be more robust + * with widely-deployed TCP implementations that do this. + */ + tcp_ecn_accept_cwr(sk, skb); + /* We passed data and got it acked, remove any soft error * log. Something worked... */ @@ -4572,6 +4583,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); + sk->sk_data_ready(sk); tcp_drop(sk, skb); return; } @@ -4605,7 +4617,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { coalesce_done: - tcp_grow_window(sk, skb); + /* For non sack flows, do not grow window to force DUPACK + * and trigger fast retransmit. + */ + if (tcp_is_sack(tp)) + tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); skb = NULL; goto add_sack; @@ -4689,7 +4705,11 @@ add_sack: tcp_sack_new_ofo_skb(sk, seq, end_seq); end: if (skb) { - tcp_grow_window(sk, skb); + /* For non sack flows, do not grow window to force DUPACK + * and trigger fast retransmit. + */ + if (tcp_is_sack(tp)) + tcp_grow_window(sk, skb); skb_condense(skb); skb_set_owner_r(skb, sk); } @@ -4792,8 +4812,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); - tcp_ecn_accept_cwr(sk, skb); - tp->rx_opt.dsack = 0; /* Queue data for delivery to the user. @@ -4812,6 +4830,7 @@ queue_and_out: sk_forced_mem_schedule(sk, skb->truesize); else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP); + sk->sk_data_ready(sk); goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad6435ba6d72..04bfcbbfee83 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1111,9 +1111,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index); if (key) { - /* Pre-existing entry - just update that one. */ - memcpy(key->key, newkey, newkeylen); - key->keylen = newkeylen; + /* Pre-existing entry - just update that one. + * Note that the key might be used concurrently. + * data_race() is telling kcsan that we do not care of + * key mismatches, since changing MD5 key on live flows + * can lead to packet drops. + */ + data_race(memcpy(key->key, newkey, newkeylen)); + + /* Pairs with READ_ONCE() in tcp_md5_hash_key(). + * Also note that a reader could catch new key->keylen value + * but old key->key[], this is the reason we use __GFP_ZERO + * at sock_kmalloc() time below these lines. + */ + WRITE_ONCE(key->keylen, newkeylen); + return 0; } @@ -1129,7 +1141,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, rcu_assign_pointer(tp->md5sig_info, md5sig); } - key = sock_kmalloc(sk, sizeof(*key), gfp); + key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); if (!key) return -ENOMEM; if (!tcp_alloc_md5sig_pool()) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a50e1990a845..0bc05d68cd74 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -700,7 +700,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, - struct tcp_fastopen_cookie *foc) + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -715,7 +716,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, * rather than TS in order to fit in better with old, * buggy kernels, but that was deemed to be unnecessary. */ - ireq->tstamp_ok &= !ireq->sack_ok; + if (synack_type != TCP_SYNACK_COOKIE) + ireq->tstamp_ok &= !ireq->sack_ok; } #endif @@ -2622,6 +2624,11 @@ void tcp_send_loss_probe(struct sock *sk) int pcount; int mss = tcp_current_mss(sk); + /* At most one outstanding TLP */ + if (tp->tlp_high_seq) + goto rearm_timer; + + tp->tlp_retrans = 0; skb = tcp_send_head(sk); if (skb && tcp_snd_wnd_test(tp, skb, mss)) { pcount = tp->packets_out; @@ -2639,10 +2646,6 @@ void tcp_send_loss_probe(struct sock *sk) return; } - /* At most one outstanding TLP retransmission. */ - if (tp->tlp_high_seq) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; @@ -2664,10 +2667,12 @@ void tcp_send_loss_probe(struct sock *sk) if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; + tp->tlp_retrans = 1; + +probe_sent: /* Record snd_nxt for loss detection. */ tp->tlp_high_seq = tp->snd_nxt; -probe_sent: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; @@ -3394,7 +3399,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #endif skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc) + sizeof(*th); + foc, synack_type) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b7ebbcae497..4077d589b72e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -416,7 +416,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -426,17 +426,20 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } + + result = reuseport_result ? : sk; badness = score; - result = sk; } } return result; @@ -2051,7 +2054,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { /* * MIB statistics other than incrementing the error count are diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 4f03aece2980..f4f19e89af5e 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -7,7 +7,7 @@ menuconfig IPV6 tristate "The IPv6 protocol" default y - ---help--- + help Support for IP version 6 (IPv6). For general information about IPv6, see @@ -23,7 +23,7 @@ if IPV6 config IPV6_ROUTER_PREF bool "IPv6: Router Preference (RFC 4191) support" - ---help--- + help Router Preference is an optional extension to the Router Advertisement message which improves the ability of hosts to pick an appropriate router, especially when the hosts @@ -34,14 +34,14 @@ config IPV6_ROUTER_PREF config IPV6_ROUTE_INFO bool "IPv6: Route Information (RFC 4191) support" depends on IPV6_ROUTER_PREF - ---help--- + help Support of Route Information. If unsure, say N. config IPV6_OPTIMISTIC_DAD bool "IPv6: Enable RFC 4429 Optimistic DAD" - ---help--- + help Support for optimistic Duplicate Address Detection. It allows for autoconfigured addresses to be used more quickly. @@ -49,29 +49,31 @@ config IPV6_OPTIMISTIC_DAD config INET6_AH tristate "IPv6: AH transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_SHA1 - ---help--- - Support for IPsec AH. + select XFRM_AH + help + Support for IPsec AH (Authentication Header). + + AH can be used with various authentication algorithms. Besides + enabling AH support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. config INET6_ESP tristate "IPv6: ESP transformation" - select XFRM_ALGO - select CRYPTO - select CRYPTO_AUTHENC - select CRYPTO_HMAC - select CRYPTO_MD5 - select CRYPTO_CBC - select CRYPTO_SHA1 - select CRYPTO_DES - select CRYPTO_ECHAINIV - ---help--- - Support for IPsec ESP. + select XFRM_ESP + help + Support for IPsec ESP (Encapsulating Security Payload). + + ESP can be used with various encryption and authentication algorithms. + Besides enabling ESP support itself, this option enables the generic + implementations of the algorithms that RFC 8221 lists as MUST be + implemented. If you need any other algorithms, you'll need to enable + them in the crypto API. You should also enable accelerated + implementations of any needed algorithms when available. If unsure, say Y. @@ -80,7 +82,7 @@ config INET6_ESP_OFFLOAD depends on INET6_ESP select XFRM_OFFLOAD default n - ---help--- + help Support for ESP transformation offload. This makes sense only if this system really does IPsec and want to do it with high throughput. A typical desktop system does not @@ -104,7 +106,7 @@ config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL select XFRM_IPCOMP - ---help--- + help Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. @@ -113,7 +115,7 @@ config INET6_IPCOMP config IPV6_MIP6 tristate "IPv6: Mobility" select XFRM - ---help--- + help Support for IPv6 Mobility described in RFC 3775. If unsure, say N. @@ -123,7 +125,7 @@ config IPV6_ILA depends on NETFILTER select DST_CACHE select LWTUNNEL - ---help--- + help Support for IPv6 Identifier Locator Addressing (ILA). ILA is a mechanism to do network virtualization without @@ -153,7 +155,7 @@ tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL select NET_IP_TUNNEL select XFRM - ---help--- + help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the encapsulating protocol. This can be used with xfrm mode tunnel to give @@ -166,7 +168,7 @@ config IPV6_SIT select NET_IP_TUNNEL select IPV6_NDISC_NODETYPE default y - ---help--- + help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the encapsulating protocol. This driver implements encapsulation of IPv6 @@ -179,7 +181,7 @@ config IPV6_SIT_6RD bool "IPv6: IPv6 Rapid Deployment (6RD)" depends on IPV6_SIT default n - ---help--- + help IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly deploy IPv6 unicast service to IPv4 sites to which it provides @@ -202,7 +204,7 @@ config IPV6_TUNNEL select INET6_TUNNEL select DST_CACHE select GRO_CELLS - ---help--- + help Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in RFC 2473. @@ -213,7 +215,7 @@ config IPV6_GRE select IPV6_TUNNEL select NET_IP_TUNNEL depends on NET_IPGRE_DEMUX - ---help--- + help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the encapsulating protocol. This particular tunneling driver implements @@ -238,13 +240,13 @@ config IPV6_FOU_TUNNEL config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" select FIB_RULES - ---help--- + help Support multiple routing tables. config IPV6_SUBTREES bool "IPv6: source address based routing" depends on IPV6_MULTIPLE_TABLES - ---help--- + help Enable routing by source address or prefix. The destination address is still the primary routing key, so mixing @@ -259,7 +261,7 @@ config IPV6_MROUTE bool "IPv6: multicast routing" depends on IPV6 select IP_MROUTE_COMMON - ---help--- + help Support for IPv6 multicast forwarding. If unsure, say N. @@ -280,7 +282,7 @@ config IPV6_MROUTE_MULTIPLE_TABLES config IPV6_PIMSM_V2 bool "IPv6: PIM-SM version 2 support" depends on IPV6_MROUTE - ---help--- + help Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. @@ -290,7 +292,7 @@ config IPV6_SEG6_LWTUNNEL select LWTUNNEL select DST_CACHE select IPV6_MULTIPLE_TABLES - ---help--- + help Support for encapsulation of packets within an outer IPv6 header and a Segment Routing Header using the lightweight tunnels mechanism. Also enable support for advanced local @@ -304,7 +306,7 @@ config IPV6_SEG6_HMAC select CRYPTO_HMAC select CRYPTO_SHA1 select CRYPTO_SHA256 - ---help--- + help Support for HMAC signature generation and verification of SR-enabled packets. @@ -319,7 +321,7 @@ config IPV6_RPL_LWTUNNEL bool "IPv6: RPL Source Routing Header support" depends on IPV6 select LWTUNNEL - ---help--- + help Support for RFC6554 RPL Source Routing Header using the lightweight tunnels mechanism. diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 893261230ffc..dacdea7fcb62 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -183,7 +183,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return 0; } -void ipv6_sock_ac_close(struct sock *sk) +void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; @@ -191,10 +191,7 @@ void ipv6_sock_ac_close(struct sock *sk) struct net *net = sock_net(sk); int prev_index; - if (!np->ipv6_ac_list) - return; - - rtnl_lock(); + ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; @@ -211,6 +208,16 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } +} + +void ipv6_sock_ac_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!np->ipv6_ac_list) + return; + rtnl_lock(); + __ipv6_sock_ac_close(sk); rtnl_unlock(); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c43592771126..52c2f063529f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -805,10 +805,17 @@ int esp6_input_done2(struct sk_buff *skb, int err) if (x->encap) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int offset = skb_network_offset(skb) + sizeof(*ip6h); struct xfrm_encap_tmpl *encap = x->encap; - struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); - struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); - __be16 source; + u8 nexthdr = ip6h->nexthdr; + __be16 frag_off, source; + struct udphdr *uh; + struct tcphdr *th; + + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + uh = (void *)(skb->data + offset); + th = (void *)(skb->data + offset); + hdr_len += offset; switch (x->encap->encap_type) { case TCP_ENCAP_ESPINTCP: diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 55addea1948f..1ca516fb30e1 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -395,3 +395,4 @@ module_exit(esp6_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP); +MODULE_DESCRIPTION("IPV6 GSO/GRO offload support"); diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 091f94184dc1..430518ae26fa 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -224,3 +224,4 @@ module_init(fou6_init); module_exit(fou6_fini); MODULE_AUTHOR("Tom Herbert <therbert@google.com>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Foo over UDP (IPv6)"); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fc5000370030..9df8737ae0d3 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -566,7 +566,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); - sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -583,6 +582,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = np->ucast_oif; ipcm6_init_sk(&ipc6, np); + ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = icmpv6_route_lookup(net, skb, sk, &fl6); @@ -751,7 +751,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; - sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -779,6 +778,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipcm6_init_sk(&ipc6, np); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + ipc6.sockc.mark = mark; if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 257d2b681246..36c58aa257e8 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -120,3 +120,4 @@ module_init(ila_init); module_exit(ila_fini); MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6: Identifier Locator Addressing (ILA)"); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index ce4fbba4acce..73bb047e6037 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -535,7 +535,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { - int uninitialized_var(err); + int err; struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 781ca8c07a0d..3a57fb9ce049 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; + struct net_device *ndev; for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || @@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, if (t && t->dev->flags & IFF_UP) return t; - dev = ign->fb_tunnel_dev; - if (dev && dev->flags & IFF_UP) - return netdev_priv(dev); + ndev = READ_ONCE(ign->fb_tunnel_dev); + if (ndev && ndev->flags & IFF_UP) + return netdev_priv(ndev); return NULL; } @@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); + if (ign->fb_tunnel_dev == dev) + WRITE_ONCE(ign->fb_tunnel_dev, NULL); dst_cache_reset(&t->dst_cache); dev_put(dev); } @@ -1559,17 +1562,18 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) static int __net_init ip6gre_init_net(struct net *net) { struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *ndev; int err; if (!net_has_fallback_tunnels(net)) return 0; - ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", - NET_NAME_UNKNOWN, - ip6gre_tunnel_setup); - if (!ign->fb_tunnel_dev) { + ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + NET_NAME_UNKNOWN, ip6gre_tunnel_setup); + if (!ndev) { err = -ENOMEM; goto err_alloc_dev; } + ign->fb_tunnel_dev = ndev; dev_net_set(ign->fb_tunnel_dev, net); /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. @@ -1589,7 +1593,7 @@ static int __net_init ip6gre_init_net(struct net *net) return 0; err_reg_dev: - free_netdev(ign->fb_tunnel_dev); + free_netdev(ndev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 821d96c720b9..a18c378ca5f4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1846,6 +1846,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6_dev_free; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1147f647b9a0..0d964160a9dd 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -905,6 +905,7 @@ static const struct net_device_ops vti6_netdev_ops = { static void vti6_dev_setup(struct net_device *dev) { dev->netdev_ops = &vti6_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = vti6_dev_free; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 20576e87a5f7..76f9e41859a2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -240,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); + __ipv6_sock_ac_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7e12d2114158..8cd2782a31e4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2615,6 +2615,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) idev->mc_list = i->next; write_unlock_bh(&idev->lock); + ip6_mc_clear_src(i); ma_put(i); write_lock_bh(&idev->lock); } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 0594131fa46d..262bb51a2d99 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -127,7 +127,7 @@ config IP6_NF_MATCH_HL tristate '"hl" hoplimit match support' depends on NETFILTER_ADVANCED select NETFILTER_XT_MATCH_HL - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MATCH_HL. @@ -153,7 +153,7 @@ config IP6_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' depends on NETFILTER_ADVANCED depends on IP6_NF_MANGLE || IP6_NF_RAW - ---help--- + help This option allows you to match packets whose replies would go out via the interface the packet came in. @@ -183,7 +183,7 @@ config IP6_NF_TARGET_HL tristate '"HL" hoplimit target support' depends on NETFILTER_ADVANCED && IP6_NF_MANGLE select NETFILTER_XT_TARGET_HL - ---help--- + help This is a backwards-compatible option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e27393498ecb..e96a431549bc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1807,11 +1807,22 @@ out_free: return ret; } +void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) +{ + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} + +void ip6t_unregister_table_exit(struct net *net, struct xt_table *table) +{ + __ip6t_unregister_table(net, table); +} + void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { if (ops) - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + ip6t_unregister_table_pre_exit(net, table, ops); __ip6t_unregister_table(net, table); } @@ -1969,6 +1980,8 @@ static void __exit ip6_tables_fini(void) EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); +EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); +EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); module_init(ip6_tables_init); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index fd1f52a21bf1..d51d0c3e5fe9 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -121,3 +121,4 @@ module_exit(synproxy_tg6_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Intercept IPv6 TCP connections and establish them using syncookies"); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 32667f5d5a33..88337b51ffbf 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -73,16 +73,24 @@ static int __net_init ip6table_filter_net_init(struct net *net) return 0; } +static void __net_exit ip6table_filter_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_filter) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_filter, + filter_ops); +} + static void __net_exit ip6table_filter_net_exit(struct net *net) { if (!net->ipv6.ip6table_filter) return; - ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_filter); net->ipv6.ip6table_filter = NULL; } static struct pernet_operations ip6table_filter_net_ops = { .init = ip6table_filter_net_init, + .pre_exit = ip6table_filter_net_pre_exit, .exit = ip6table_filter_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 070afb97fa2b..1a2748611e00 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -93,16 +93,24 @@ static int __net_init ip6table_mangle_table_init(struct net *net) return ret; } +static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_mangle) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_mangle, + mangle_ops); +} + static void __net_exit ip6table_mangle_net_exit(struct net *net) { if (!net->ipv6.ip6table_mangle) return; - ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_mangle); net->ipv6.ip6table_mangle = NULL; } static struct pernet_operations ip6table_mangle_net_ops = { + .pre_exit = ip6table_mangle_net_pre_exit, .exit = ip6table_mangle_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 0f4875952efc..0a23265e3caa 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -114,16 +114,22 @@ static int __net_init ip6table_nat_table_init(struct net *net) return ret; } +static void __net_exit ip6table_nat_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_nat) + ip6t_nat_unregister_lookups(net); +} + static void __net_exit ip6table_nat_net_exit(struct net *net) { if (!net->ipv6.ip6table_nat) return; - ip6t_nat_unregister_lookups(net); - ip6t_unregister_table(net, net->ipv6.ip6table_nat, NULL); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat); net->ipv6.ip6table_nat = NULL; } static struct pernet_operations ip6table_nat_net_ops = { + .pre_exit = ip6table_nat_net_pre_exit, .exit = ip6table_nat_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index a22100b1cf2c..8f9e742226f7 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -66,15 +66,23 @@ static int __net_init ip6table_raw_table_init(struct net *net) return ret; } +static void __net_exit ip6table_raw_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_raw) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_raw, + rawtable_ops); +} + static void __net_exit ip6table_raw_net_exit(struct net *net) { if (!net->ipv6.ip6table_raw) return; - ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_raw); net->ipv6.ip6table_raw = NULL; } static struct pernet_operations ip6table_raw_net_ops = { + .pre_exit = ip6table_raw_net_pre_exit, .exit = ip6table_raw_net_exit, }; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index a74335fe2bd9..5e8c48fed032 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -61,15 +61,23 @@ static int __net_init ip6table_security_table_init(struct net *net) return ret; } +static void __net_exit ip6table_security_net_pre_exit(struct net *net) +{ + if (net->ipv6.ip6table_security) + ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_security, + sectbl_ops); +} + static void __net_exit ip6table_security_net_exit(struct net *net) { if (!net->ipv6.ip6table_security) return; - ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops); + ip6t_unregister_table_exit(net, net->ipv6.ip6table_security); net->ipv6.ip6table_security = NULL; } static struct pernet_operations ip6table_security_net_ops = { + .pre_exit = ip6table_security_net_pre_exit, .exit = ip6table_security_net_exit, }; diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c index a8566ee12e83..667b8af2546a 100644 --- a/net/ipv6/netfilter/nf_flow_table_ipv6.c +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -35,3 +35,4 @@ module_exit(nf_flow_ipv6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); +MODULE_DESCRIPTION("Netfilter flow table IPv6 module"); diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index b9df879c48d3..6fd54744cbc3 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -97,7 +97,7 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { - __be16 uninitialized_var(dport), uninitialized_var(sport); + __be16 dport, sport; const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb); struct sk_buff *data_skb = NULL; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 2af32200507d..8b5193efb1f1 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -105,3 +105,4 @@ module_exit(nft_dup_ipv6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup"); +MODULE_DESCRIPTION("IPv6 nftables packet duplication support"); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 7ece86afd079..e204163c7036 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -255,3 +255,4 @@ module_exit(nft_fib6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); +MODULE_DESCRIPTION("nftables fib / ipv6 route lookup support"); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 680a28ce29fd..c1098a1968e1 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -72,3 +72,4 @@ module_exit(nft_reject_ipv6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject"); +MODULE_DESCRIPTION("IPv6 packet rejection for nftables"); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 82cbb46a2a4f..4c36bd0c7930 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -431,9 +431,12 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; - if ((!match->fib6_nsiblings && !match->nh) || have_oif_match) + if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; + if (match->nh && have_oif_match && res->nh) + return; + /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. */ @@ -3402,7 +3405,7 @@ static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) if ((flags & RTF_REJECT) || (dev && (dev->flags & IFF_LOOPBACK) && !(addr_type & IPV6_ADDR_LOOPBACK) && - !(flags & RTF_LOCAL))) + !(flags & (RTF_ANYCAST | RTF_LOCAL)))) return true; return false; @@ -3682,14 +3685,14 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->fib6_src.plen = cfg->fc_src_len; #endif if (nh) { - if (!nexthop_get(nh)) { - NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); - goto out; - } if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); goto out; } + if (!nexthop_get(nh)) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + goto out; + } rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); } else { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1fbb4dfbb191..5e2c34c0ac97 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1421,6 +1421,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ipip6_dev_free; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7d4151747340..a8d74f44056a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } - result = sk; + + result = reuseport_result ? : sk; badness = score; } } @@ -643,7 +646,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 19250a0c85d3..cd2e468852e7 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -105,7 +105,7 @@ static LIST_HEAD(iucv_task_queue); * The tasklet for fast delivery of iucv interrupts. */ static void iucv_tasklet_fn(unsigned long); -static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_fn,0); +static DECLARE_TASKLET_OLD(iucv_tasklet, iucv_tasklet_fn); /* * Queue of interrupt buffers for delivery via a work queue diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig index bf7e970fad65..16f39f2565d9 100644 --- a/net/kcm/Kconfig +++ b/net/kcm/Kconfig @@ -5,7 +5,7 @@ config AF_KCM depends on INET select BPF_SYSCALL select STREAM_PARSER - ---help--- + help KCM (Kernel Connection Multiplexor) sockets provide a method for multiplexing messages of a message based application protocol over kernel connectons (e.g. TCP connections). diff --git a/net/key/af_key.c b/net/key/af_key.c index b67ed3a8486c..a915bc86620a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1849,6 +1849,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; + if ((xfilter->sadb_x_filter_splen >= + (sizeof(xfrm_address_t) << 3)) || + (xfilter->sadb_x_filter_dplen >= + (sizeof(xfrm_address_t) << 3))) { + mutex_unlock(&pfk->dump_lock); + return -EINVAL; + } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); @@ -2400,7 +2407,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa return err; } - xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2651,7 +2658,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig index 655e0646895b..b7856748e960 100644 --- a/net/l2tp/Kconfig +++ b/net/l2tp/Kconfig @@ -8,7 +8,7 @@ menuconfig L2TP depends on (IPV6 || IPV6=n) depends on INET select NET_UDP_TUNNEL - ---help--- + help Layer Two Tunneling Protocol From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>. diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6d7ef78c88af..6434d17e6e8e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1028,6 +1028,7 @@ static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->ignore_df = 1; + skb_dst_drop(skb); #if IS_ENABLED(CONFIG_IPV6) if (l2tp_sk_is_v6(tunnel->sock)) error = inet6_csk_xmit(tunnel->sock, skb, NULL); @@ -1099,10 +1100,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len goto out_unlock; } - /* Get routing info from the tunnel socket */ - skb_dst_drop(skb); - skb_dst_set(skb, sk_dst_check(sk, 0)); - inet = inet_sk(sk); fl = &inet->cork.fl; switch (tunnel->encap) { diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig index de186dff8f63..2b2861e1fb7d 100644 --- a/net/l3mdev/Kconfig +++ b/net/l3mdev/Kconfig @@ -6,6 +6,6 @@ config NET_L3_MASTER_DEV bool "L3 Master device support" depends on INET || IPV6 - ---help--- + help This module provides glue between core networking code and device drivers to support L3 master devices like VRF. diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig index 5b50e8d64f26..da87b47f0dff 100644 --- a/net/lapb/Kconfig +++ b/net/lapb/Kconfig @@ -5,7 +5,7 @@ config LAPB tristate "LAPB Data Link Driver" - ---help--- + help Link Access Procedure, Balanced (LAPB) is the data link layer (i.e. the lower) part of the X.25 protocol. It offers a reliable connection service to exchange data frames with one other host, and diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 54fb8d452a7b..6e53e43c1907 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -273,6 +273,10 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr) if (!sock_flag(sk, SOCK_ZAPPED)) goto out; + if (!addr->sllc_arphrd) + addr->sllc_arphrd = ARPHRD_ETHER; + if (addr->sllc_arphrd != ARPHRD_ETHER) + goto out; rc = -ENODEV; if (sk->sk_bound_dev_if) { llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); @@ -328,7 +332,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; - if (unlikely(addr->sllc_family != AF_LLC)) + if (!addr->sllc_arphrd) + addr->sllc_arphrd = ARPHRD_ETHER; + if (unlikely(addr->sllc_family != AF_LLC || addr->sllc_arphrd != ARPHRD_ETHER)) goto out; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); rc = -ENODEV; @@ -336,8 +342,6 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) if (sk->sk_bound_dev_if) { llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); if (llc->dev) { - if (!addr->sllc_arphrd) - addr->sllc_arphrd = llc->dev->type; if (is_zero_ether_addr(addr->sllc_mac)) memcpy(addr->sllc_mac, llc->dev->dev_addr, IFHWADDRLEN); diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 0c93b1b7a826..cd9a9bd242ba 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -9,7 +9,7 @@ config MAC80211 select CRYPTO_GCM select CRYPTO_CMAC select CRC32 - ---help--- + help This option enables the hardware independent IEEE 802.11 networking stack. @@ -25,14 +25,14 @@ config MAC80211_RC_MINSTREL bool "Minstrel" if EXPERT select MAC80211_HAS_RC default y - ---help--- + help This option enables the 'minstrel' TX rate control algorithm choice prompt "Default rate control algorithm" depends on MAC80211_HAS_RC default MAC80211_RC_DEFAULT_MINSTREL - ---help--- + help This option selects the default rate control algorithm mac80211 will use. Note that this default can still be overridden through the ieee80211_default_rc_algo module @@ -41,7 +41,7 @@ choice config MAC80211_RC_DEFAULT_MINSTREL bool "Minstrel" depends on MAC80211_RC_MINSTREL - ---help--- + help Select Minstrel as the default rate control algorithm. @@ -60,7 +60,7 @@ comment "Some wireless drivers require a rate control algorithm" config MAC80211_MESH bool "Enable mac80211 mesh networking support" depends on MAC80211 - ---help--- + help Select this option to enable 802.11 mesh operation in mac80211 drivers that support it. 802.11 mesh connects multiple stations over (possibly multi-hop) wireless links to form a single logical @@ -71,14 +71,14 @@ config MAC80211_LEDS depends on MAC80211 depends on LEDS_CLASS select LEDS_TRIGGERS - ---help--- + help This option enables a few LED triggers for different packet receive/transmit events. config MAC80211_DEBUGFS bool "Export mac80211 internals in DebugFS" depends on MAC80211 && DEBUG_FS - ---help--- + help Select this to see extensive information about the internal state of mac80211 in debugfs. @@ -87,7 +87,7 @@ config MAC80211_DEBUGFS config MAC80211_MESSAGE_TRACING bool "Trace all mac80211 debug messages" depends on MAC80211 - ---help--- + help Select this option to have mac80211 register the mac80211_msg trace subsystem with tracepoints to collect all debugging messages, independent of @@ -100,13 +100,13 @@ config MAC80211_MESSAGE_TRACING menuconfig MAC80211_DEBUG_MENU bool "Select mac80211 debugging features" depends on MAC80211 - ---help--- + help This option collects various mac80211 debug settings. config MAC80211_NOINLINE bool "Do not inline TX/RX handlers" depends on MAC80211_DEBUG_MENU - ---help--- + help This option affects code generation in mac80211, when selected some functions are marked "noinline" to allow easier debugging of problems in the transmit and receive @@ -122,7 +122,7 @@ config MAC80211_NOINLINE config MAC80211_VERBOSE_DEBUG bool "Verbose debugging output" depends on MAC80211_DEBUG_MENU - ---help--- + help Selecting this option causes mac80211 to print out many debugging messages. It should not be selected on production systems as some of the messages are @@ -133,7 +133,7 @@ config MAC80211_VERBOSE_DEBUG config MAC80211_MLME_DEBUG bool "Verbose managed MLME output" depends on MAC80211_DEBUG_MENU - ---help--- + help Selecting this option causes mac80211 to print out debugging messages for the managed-mode MLME. It should not be selected on production systems as some @@ -144,7 +144,7 @@ config MAC80211_MLME_DEBUG config MAC80211_STA_DEBUG bool "Verbose station debugging" depends on MAC80211_DEBUG_MENU - ---help--- + help Selecting this option causes mac80211 to print out debugging messages for station addition/removal. @@ -153,7 +153,7 @@ config MAC80211_STA_DEBUG config MAC80211_HT_DEBUG bool "Verbose HT debugging" depends on MAC80211_DEBUG_MENU - ---help--- + help This option enables 802.11n High Throughput features debug tracing output. @@ -165,7 +165,7 @@ config MAC80211_HT_DEBUG config MAC80211_OCB_DEBUG bool "Verbose OCB debugging" depends on MAC80211_DEBUG_MENU - ---help--- + help Selecting this option causes mac80211 to print out very verbose OCB debugging messages. It should not be selected on production systems as those messages @@ -176,7 +176,7 @@ config MAC80211_OCB_DEBUG config MAC80211_IBSS_DEBUG bool "Verbose IBSS debugging" depends on MAC80211_DEBUG_MENU - ---help--- + help Selecting this option causes mac80211 to print out very verbose IBSS debugging messages. It should not be selected on production systems as those messages @@ -187,7 +187,7 @@ config MAC80211_IBSS_DEBUG config MAC80211_PS_DEBUG bool "Verbose powersave mode debugging" depends on MAC80211_DEBUG_MENU - ---help--- + help Selecting this option causes mac80211 to print out very verbose power save mode debugging messages (when mac80211 is an AP and has power saving stations.) @@ -200,7 +200,7 @@ config MAC80211_MPL_DEBUG bool "Verbose mesh peer link debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH - ---help--- + help Selecting this option causes mac80211 to print out very verbose mesh peer link debugging messages (when mac80211 is taking part in a mesh network). @@ -213,7 +213,7 @@ config MAC80211_MPATH_DEBUG bool "Verbose mesh path debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH - ---help--- + help Selecting this option causes mac80211 to print out very verbose mesh path selection debugging messages (when mac80211 is taking part in a mesh network). @@ -226,7 +226,7 @@ config MAC80211_MHWMP_DEBUG bool "Verbose mesh HWMP routing debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH - ---help--- + help Selecting this option causes mac80211 to print out very verbose mesh routing (HWMP) debugging messages (when mac80211 is taking part in a mesh network). @@ -239,7 +239,7 @@ config MAC80211_MESH_SYNC_DEBUG bool "Verbose mesh synchronization debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH - ---help--- + help Selecting this option causes mac80211 to print out very verbose mesh synchronization debugging messages (when mac80211 is taking part in a mesh network). @@ -250,7 +250,7 @@ config MAC80211_MESH_CSA_DEBUG bool "Verbose mesh channel switch debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH - ---help--- + help Selecting this option causes mac80211 to print out very verbose mesh channel switch debugging messages (when mac80211 is taking part in a mesh network). @@ -261,7 +261,7 @@ config MAC80211_MESH_PS_DEBUG bool "Verbose mesh powersave debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH - ---help--- + help Selecting this option causes mac80211 to print out very verbose mesh powersave debugging messages (when mac80211 is taking part in a mesh network). @@ -271,7 +271,7 @@ config MAC80211_MESH_PS_DEBUG config MAC80211_TDLS_DEBUG bool "Verbose TDLS debugging" depends on MAC80211_DEBUG_MENU - ---help--- + help Selecting this option causes mac80211 to print out very verbose TDLS selection debugging messages (when mac80211 is a TDLS STA). @@ -284,7 +284,7 @@ config MAC80211_DEBUG_COUNTERS bool "Extra statistics for TX/RX debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_DEBUGFS - ---help--- + help Selecting this option causes mac80211 to keep additional and very verbose statistics about TX and RX handler use as well as a few selected dot11 counters. These will be @@ -298,7 +298,7 @@ config MAC80211_DEBUG_COUNTERS config MAC80211_STA_HASH_MAX_SIZE int "Station hash table maximum size" if MAC80211_DEBUG_MENU default 0 - ---help--- + help Setting this option to a low value (e.g. 4) allows testing the hash table with collisions relatively deterministically (just connect more stations than the number selected here.) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9b360544ad6f..1079a07e43e4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2166,6 +2166,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) ieee80211_stop_mesh(sdata); mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + kfree(sdata->u.mesh.ie); mutex_unlock(&sdata->local->mtx); return 0; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5f1ca25b6c97..e88beb3ff6db 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -617,6 +617,19 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { + struct ieee80211_supported_band *sband; + const struct ieee80211_sband_iftype_data *iftd; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + iftd = ieee80211_get_sband_iftype_data(sband, + NL80211_IFTYPE_MESH_POINT); + /* The device doesn't support HE in mesh mode or at all */ + if (!iftd) + return 0; + ieee80211_ie_build_he_6ghz_cap(sdata, skb); return 0; } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index aa5150929996..02cde0fd08fe 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1105,11 +1105,8 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) ttl, lifetime, 0, ifmsh->preq_id++, sdata); spin_lock_bh(&mpath->state_lock); - if (mpath->flags & MESH_PATH_DELETED) { - spin_unlock_bh(&mpath->state_lock); - goto enddiscovery; - } - mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); + if (!(mpath->flags & MESH_PATH_DELETED)) + mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); spin_unlock_bh(&mpath->state_lock); enddiscovery: diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 117519bf33d6..aca608ae313f 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -521,6 +521,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, del_timer_sync(&mpath->timer); atomic_dec(&sdata->u.mesh.mpaths); atomic_dec(&tbl->entries); + mesh_path_flush_pending(mpath); kfree_rcu(mpath, rcu); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5820ef02a587..b2a9d47cf86d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -167,6 +167,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; + else + ret = 0; vht_chandef = *chandef; goto out; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 21854a61a2b7..5c5af4b5fc08 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2396,6 +2396,7 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) { + struct ieee80211_hdr *hdr = (void *)rx->skb->data; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -2406,6 +2407,31 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) if (status->flag & RX_FLAG_DECRYPTED) return 0; + /* check mesh EAPOL frames first */ + if (unlikely(rx->sta && ieee80211_vif_is_mesh(&rx->sdata->vif) && + ieee80211_is_data(fc))) { + struct ieee80211s_hdr *mesh_hdr; + u16 hdr_len = ieee80211_hdrlen(fc); + u16 ethertype_offset; + __be16 ethertype; + + if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr)) + goto drop_check; + + /* make sure fixed part of mesh header is there, also checks skb len */ + if (!pskb_may_pull(rx->skb, hdr_len + 6)) + goto drop_check; + + mesh_hdr = (struct ieee80211s_hdr *)(skb->data + hdr_len); + ethertype_offset = hdr_len + ieee80211_get_mesh_hdrlen(mesh_hdr) + + sizeof(rfc1042_header); + + if (skb_copy_bits(rx->skb, ethertype_offset, ðertype, 2) == 0 && + ethertype == rx->sdata->control_port_protocol) + return 0; + } + +drop_check: /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && !ieee80211_is_any_nullfunc(fc) && @@ -4694,7 +4720,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, * rate_idx is MCS index, which can be [0-76] * as documented on: * - * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n + * https://wireless.wiki.kernel.org/en/developers/Documentation/ieee80211/802.11n * * Anything else would be some sort of driver or * hardware error. The driver should catch hardware diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cd8487bc6fc2..af4cc5fb678e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1923,9 +1923,7 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, if (sta) { tx_pending = atomic_sub_return(tx_airtime, &sta->airtime[ac].aql_tx_pending); - if (WARN_ONCE(tx_pending < 0, - "STA %pM AC %d txq pending airtime underflow: %u, %u", - sta->addr, ac, tx_pending, tx_airtime)) + if (tx_pending < 0) atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending, tx_pending, 0); } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 7b1bacac39c6..cbc40b358ba2 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -639,11 +639,23 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; struct ieee80211_hdr *hdr = (void *)skb->data; + __be16 ethertype = 0; + + if (skb->len >= ETH_HLEN && skb->protocol == cpu_to_be16(ETH_P_802_3)) + skb_copy_bits(skb, 2 * ETH_ALEN, ðertype, ETH_TLEN); rcu_read_lock(); sdata = ieee80211_sdata_from_skb(local, skb); if (sdata) { - if (ieee80211_is_any_nullfunc(hdr->frame_control)) + if (ethertype == sdata->control_port_protocol || + ethertype == cpu_to_be16(ETH_P_PREAUTH)) + cfg80211_control_port_tx_status(&sdata->wdev, + cookie, + skb->data, + skb->len, + acked, + GFP_ATOMIC); + else if (ieee80211_is_any_nullfunc(hdr->frame_control)) cfg80211_probe_status(sdata->dev, hdr->addr1, cookie, acked, info->status.ack_signal, @@ -654,12 +666,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, skb->data, skb->len, acked, GFP_ATOMIC); else - cfg80211_control_port_tx_status(&sdata->wdev, - cookie, - skb->data, - skb->len, - acked, - GFP_ATOMIC); + pr_warn("Unknown status report in ack skb\n"); + } rcu_read_unlock(); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e9ce658141f5..3529d1368068 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3996,6 +3996,9 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, skb_list_walk_safe(skb, skb, next) { skb_mark_not_on_list(skb); + if (skb->protocol == sdata->control_port_protocol) + ctrl_flags |= IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP; + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, ctrl_flags, cookie); if (IS_ERR(skb)) { @@ -4206,7 +4209,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER))) ra = sdata->u.mgd.bssid; - if (!is_valid_ether_addr(ra)) + if (is_zero_ether_addr(ra)) goto out_free; multicast = is_multicast_ether_addr(ra); @@ -4227,11 +4230,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) goto out_free; + memset(info, 0, sizeof(*info)); + if (unlikely(!multicast && skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) - ieee80211_store_ack_skb(local, skb, &info->flags, NULL); - - memset(info, 0, sizeof(*info)); + info->ack_frame_id = ieee80211_store_ack_skb(local, skb, + &info->flags, NULL); if (unlikely(sdata->control_port_protocol == ehdr->h_proto)) { if (sdata->control_port_no_encrypt) @@ -5371,7 +5375,8 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; if (proto == sdata->control_port_protocol) - ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; + ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO | + IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP; if (unencrypted) flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 21c94094a699..dd9f5c7a1ade 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2878,6 +2878,10 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!iftd)) return; + /* Check for device HE 6 GHz capability before adding element */ + if (!iftd->he_6ghz_capa.capa) + return; + cap = le16_to_cpu(iftd->he_6ghz_capa.capa); cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS; diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig index 742624e4f7bb..901167b1e6f5 100644 --- a/net/mac802154/Kconfig +++ b/net/mac802154/Kconfig @@ -8,7 +8,7 @@ config MAC802154 select CRYPTO_CCM select CRYPTO_CTR select CRYPTO_AES - ---help--- + help This option enables the hardware independent IEEE 802.15.4 networking stack for SoftMAC devices (the ones implementing only PHY level of IEEE 802.15.4 standard). diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index d1ad69b7942a..d672ab72ab12 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -6,7 +6,7 @@ menuconfig MPLS bool "MultiProtocol Label Switching" default n - ---help--- + help MultiProtocol Label Switching routes packets through logical circuits. Originally conceived as a way of routing packets at hardware speeds (before hardware was capable of routing ipv4 packets), @@ -27,13 +27,13 @@ config MPLS_ROUTING tristate "MPLS: routing support" depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n depends on PROC_SYSCTL - ---help--- + help Add support for forwarding of mpls packets. config MPLS_IPTUNNEL tristate "MPLS: IP over MPLS tunnel support" depends on LWTUNNEL && MPLS_ROUTING - ---help--- + help mpls ip tunnel support. endif # MPLS diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index 3d980713a9e2..82bd2b54d741 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -32,11 +32,8 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn) { __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS]; __be64 input = cpu_to_be64(key); - struct sha256_state state; - sha256_init(&state); - sha256_update(&state, (__force u8 *)&input, sizeof(input)); - sha256_final(&state, (u8 *)mptcp_hashed_key); + sha256((__force u8 *)&input, sizeof(input), (u8 *)mptcp_hashed_key); if (token) *token = be32_to_cpu(mptcp_hashed_key[0]); @@ -47,7 +44,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn) void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) { u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; - struct sha256_state state; u8 key1be[8]; u8 key2be[8]; int i; @@ -67,13 +63,10 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) memcpy(&input[SHA256_BLOCK_SIZE], msg, len); - sha256_init(&state); - sha256_update(&state, input, SHA256_BLOCK_SIZE + len); - /* emit sha256(K1 || msg) on the second input block, so we can * reuse 'input' for the last hashing */ - sha256_final(&state, &input[SHA256_BLOCK_SIZE]); + sha256(input, SHA256_BLOCK_SIZE + len, &input[SHA256_BLOCK_SIZE]); /* Prepare second part of hmac */ memset(input, 0x5C, SHA256_BLOCK_SIZE); @@ -82,9 +75,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) for (i = 0; i < 8; i++) input[i + 8] ^= key2be[i]; - sha256_init(&state); - sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE); - sha256_final(&state, (u8 *)hmac); + sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac); } #ifdef CONFIG_MPTCP_HMAC_TEST diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 01f1f4cf4902..8f940be42f98 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -273,6 +273,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE) break; + ptr++; + mp_opt->rm_addr = 1; mp_opt->rm_id = *ptr++; pr_debug("RM_ADDR: id=%d", mp_opt->rm_id); @@ -334,9 +336,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, */ subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { - pr_debug("local_key=%llu", subflow->local_key); opts->suboptions = OPTION_MPTCP_MPC_SYN; - opts->sndr_key = subflow->local_key; *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { @@ -449,9 +449,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, } static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, - struct mptcp_ext *ext) + struct sk_buff *skb, struct mptcp_ext *ext) { - if (!ext->use_map) { + if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped */ @@ -503,7 +503,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, opts->ext_copy = *mpext; if (skb && tcp_fin && subflow->data_fin_tx_enable) - mptcp_write_data_fin(subflow, &opts->ext_copy); + mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 14b253d10ccf..c0abe738e7d3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -374,6 +374,27 @@ void mptcp_subflow_eof(struct sock *sk) sock_hold(sk); } +static void mptcp_check_for_eof(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int receivers = 0; + + mptcp_for_each_subflow(msk, subflow) + receivers += !subflow->rx_eof; + + if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) { + /* hopefully temporary hack: propagate shutdown status + * to msk, when all subflows agree on it + */ + sk->sk_shutdown |= RCV_SHUTDOWN; + + smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ + set_bit(MPTCP_DATA_READY, &msk->flags); + sk->sk_data_ready(sk); + } +} + static void mptcp_stop_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -1011,6 +1032,9 @@ fallback: break; } + if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) + mptcp_check_for_eof(msk); + if (sk->sk_shutdown & RCV_SHUTDOWN) break; @@ -1148,27 +1172,6 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } -static void mptcp_check_for_eof(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int receivers = 0; - - mptcp_for_each_subflow(msk, subflow) - receivers += !subflow->rx_eof; - - if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) { - /* hopefully temporary hack: propagate shutdown status - * to msk, when all subflows agree on it - */ - sk->sk_shutdown |= RCV_SHUTDOWN; - - smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - set_bit(MPTCP_DATA_READY, &msk->flags); - sk->sk_data_ready(sk); - } -} - static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); @@ -1830,7 +1833,7 @@ do_connect: /* on successful connect, the msk state will be moved to established by * subflow_finish_connect() */ - if (!err || err == EINPROGRESS) + if (!err || err == -EINPROGRESS) mptcp_copy_inaddrs(sock->sk, ssock->sk); else inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 809687d3f410..c6eeaf3e8dcb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -135,8 +135,6 @@ static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) ((nib & 0xF) << 8) | field); } -#define MPTCP_PM_MAX_ADDR 4 - struct mptcp_addr_info { sa_family_t family; __be16 port; @@ -234,10 +232,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (list_empty(&msk->rtx_queue)) - return NULL; - - return list_first_entry(&msk->rtx_queue, struct mptcp_data_frag, list); + return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } struct mptcp_subflow_request_sock { @@ -254,6 +249,7 @@ struct mptcp_subflow_request_sock { u64 thmac; u32 local_nonce; u32 remote_nonce; + struct mptcp_sock *msk; }; static inline struct mptcp_subflow_request_sock * diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 493b98a0825c..3838a0b3a21f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -69,6 +69,9 @@ static void subflow_req_destructor(struct request_sock *req) pr_debug("subflow_req=%p", subflow_req); + if (subflow_req->msk) + sock_put((struct sock *)subflow_req->msk); + if (subflow_req->mp_capable) mptcp_token_destroy_request(subflow_req->token); tcp_request_sock_ops.destructor(req); @@ -86,8 +89,8 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, } /* validate received token and create truncated hmac and nonce for SYN-ACK */ -static bool subflow_token_join_request(struct request_sock *req, - const struct sk_buff *skb) +static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, + const struct sk_buff *skb) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); u8 hmac[SHA256_DIGEST_SIZE]; @@ -97,13 +100,13 @@ static bool subflow_token_join_request(struct request_sock *req, msk = mptcp_token_get_sock(subflow_req->token); if (!msk) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); - return false; + return NULL; } local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req); if (local_id < 0) { sock_put((struct sock *)msk); - return false; + return NULL; } subflow_req->local_id = local_id; @@ -114,9 +117,7 @@ static bool subflow_token_join_request(struct request_sock *req, subflow_req->remote_nonce, hmac); subflow_req->thmac = get_unaligned_be64(hmac); - - sock_put((struct sock *)msk); - return true; + return msk; } static void subflow_init_req(struct request_sock *req, @@ -133,6 +134,7 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->msk = NULL; #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -166,12 +168,9 @@ static void subflow_init_req(struct request_sock *req, subflow_req->remote_id = mp_opt.join_id; subflow_req->token = mp_opt.token; subflow_req->remote_nonce = mp_opt.nonce; - pr_debug("token=%u, remote_nonce=%u", subflow_req->token, - subflow_req->remote_nonce); - if (!subflow_token_join_request(req, skb)) { - subflow_req->mp_join = 0; - // @@ need to trigger RST - } + subflow_req->msk = subflow_token_join_request(req, skb); + pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, + subflow_req->remote_nonce, subflow_req->msk); } } @@ -354,10 +353,9 @@ static bool subflow_hmac_valid(const struct request_sock *req, const struct mptcp_subflow_request_sock *subflow_req; u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; - bool ret; subflow_req = mptcp_subflow_rsk(req); - msk = mptcp_token_get_sock(subflow_req->token); + msk = subflow_req->msk; if (!msk) return false; @@ -365,12 +363,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->remote_nonce, subflow_req->local_nonce, hmac); - ret = true; - if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN)) - ret = false; - - sock_put((struct sock *)msk); - return ret; + return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); } static void mptcp_sock_destruct(struct sock *sk) @@ -393,6 +386,7 @@ static void mptcp_sock_destruct(struct sock *sk) sock_orphan(sk); } + mptcp_token_destroy(mptcp_sk(sk)->token); inet_sock_destruct(sk); } @@ -437,22 +431,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; struct mptcp_options_received mp_opt; - bool fallback_is_fatal = false; + bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; - bool fallback = false; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); - /* we need later a valid 'mp_capable' value even when options are not - * parsed + /* After child creation we must look for 'mp_capable' even when options + * are not parsed */ mp_opt.mp_capable = 0; - if (tcp_rsk(req)->is_mptcp == 0) + + /* hopefully temporary handling for MP_JOIN+syncookie */ + subflow_req = mptcp_subflow_rsk(req); + fallback_is_fatal = subflow_req->mp_join; + fallback = !tcp_rsk(req)->is_mptcp; + if (fallback) goto create_child; /* if the sk is MP_CAPABLE, we try to fetch the client key */ - subflow_req = mptcp_subflow_rsk(req); if (subflow_req->mp_capable) { if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { /* here we can receive and accept an in-window, @@ -473,12 +470,11 @@ create_msk: if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - fallback_is_fatal = true; mptcp_get_options(skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); - return NULL; + fallback = true; } } @@ -521,10 +517,12 @@ create_child: } else if (ctx->mp_join) { struct mptcp_sock *owner; - owner = mptcp_token_get_sock(ctx->token); + owner = subflow_req->msk; if (!owner) goto dispose_child; + /* move the msk reference ownership to the subflow */ + subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; if (!mptcp_finish_join(child)) goto dispose_child; @@ -1052,8 +1050,10 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) err = tcp_set_ulp(sf->sk, "mptcp"); release_sock(sf->sk); - if (err) + if (err) { + sock_release(sf); return err; + } /* the newly created socket really belongs to the owning MPTCP master * socket, even if for additional subflows the allocation is performed diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig index 2f1e5756c03a..93309081f5a4 100644 --- a/net/ncsi/Kconfig +++ b/net/ncsi/Kconfig @@ -6,7 +6,7 @@ config NET_NCSI bool "NCSI interface support" depends on INET - ---help--- + help This module provides NCSI (Network Controller Sideband Interface) support. Enable this only if your system connects to a network device via NCSI and the ethernet driver you're using supports @@ -14,6 +14,6 @@ config NET_NCSI config NCSI_OEM_CMD_GET_MAC bool "Get NCSI OEM MAC Address" depends on NET_NCSI - ---help--- + help This allows to get MAC address from NCSI firmware and set them back to controller. diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3a3915d2e1ea..0ffe2b8723c4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -120,7 +120,7 @@ config NF_CONNTRACK_PROCFS bool "Supply CT list in procfs (OBSOLETE)" default y depends on PROC_FS - ---help--- + help This option enables for the list of known conntrack entries to be shown in procfs under net/netfilter/nf_conntrack. This is considered obsolete in favor of using the conntrack(8) @@ -717,7 +717,7 @@ comment "Xtables combined modules" config NETFILTER_XT_MARK tristate 'nfmark target and match support' default m if NETFILTER_ADVANCED=n - ---help--- + help This option adds the "MARK" target and "mark" match. Netfilter mark matching allows you to match packets based on the @@ -733,7 +733,7 @@ config NETFILTER_XT_CONNMARK depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK - ---help--- + help This option adds the "CONNMARK" target and "connmark" match. Netfilter allows you to store a mark value per connection (a.k.a. @@ -760,7 +760,7 @@ config NETFILTER_XT_TARGET_AUDIT tristate "AUDIT target support" depends on AUDIT depends on NETFILTER_ADVANCED - ---help--- + help This option adds a 'AUDIT' target, which can be used to create audit records for packets dropped/accepted. @@ -770,7 +770,7 @@ config NETFILTER_XT_TARGET_CHECKSUM tristate "CHECKSUM target support" depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NETFILTER_ADVANCED - ---help--- + help This option adds a `CHECKSUM' target, which can be used in the iptables mangle table to work around buggy DHCP clients in virtualized environments. @@ -799,7 +799,7 @@ config NETFILTER_XT_TARGET_CONNMARK depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NETFILTER_XT_CONNMARK - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module). @@ -848,7 +848,7 @@ config NETFILTER_XT_TARGET_HL tristate '"HL" hoplimit target support' depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NETFILTER_ADVANCED - ---help--- + help This option adds the "HL" (for IPv6) and "TTL" (for IPv4) targets, which enable the user to change the hoplimit/time-to-live value of the IP header. @@ -863,7 +863,7 @@ config NETFILTER_XT_TARGET_HMARK tristate '"HMARK" target support' depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n depends on NETFILTER_ADVANCED - ---help--- + help This option adds the "HMARK" target. The target allows you to create rules in the "raw" and "mangle" tables @@ -925,7 +925,7 @@ config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' depends on NETFILTER_ADVANCED select NETFILTER_XT_MARK - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). @@ -933,7 +933,7 @@ config NETFILTER_XT_TARGET_MARK config NETFILTER_XT_NAT tristate '"SNAT and DNAT" targets support' depends on NF_NAT - ---help--- + help This option enables the SNAT and DNAT targets. To compile it as a module, choose M here. If unsure, say N. @@ -941,7 +941,7 @@ config NETFILTER_XT_NAT config NETFILTER_XT_TARGET_NETMAP tristate '"NETMAP" target support' depends on NF_NAT - ---help--- + help NETMAP is an implementation of static 1:1 NAT mapping of network addresses. It maps the network address part, while keeping the host address part intact. @@ -991,7 +991,7 @@ config NETFILTER_XT_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT select NF_NAT_REDIRECT - ---help--- + help REDIRECT is a special case of NAT: all incoming connections are mapped onto the incoming interface's address, causing the packets to come to the local machine instead of passing through. This is @@ -1021,7 +1021,7 @@ config NETFILTER_XT_TARGET_TEE depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES select NF_DUP_IPV4 select NF_DUP_IPV6 if IP6_NF_IPTABLES - ---help--- + help This option adds a "TEE" target with which a packet can be cloned and this clone be rerouted to another nexthop. @@ -1073,7 +1073,7 @@ config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' depends on IPV6 || IPV6=n default m if NETFILTER_ADVANCED=n - ---help--- + help This option adds a `TCPMSS' target, which allows you to alter the MSS value of TCP SYN packets, to control the maximum size for that connection (usually limiting it to your outgoing interface's MTU @@ -1111,7 +1111,7 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' default m if NETFILTER_ADVANCED=n - ---help--- + help This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... @@ -1132,7 +1132,7 @@ config NETFILTER_XT_MATCH_CGROUP depends on NETFILTER_ADVANCED depends on CGROUPS select CGROUP_NET_CLASSID - ---help--- + help Socket/process control group matching allows you to match locally generated packets based on which net_cls control group processes belong to. @@ -1141,7 +1141,7 @@ config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - ---help--- + help This option allows you to build work-load-sharing clusters of network servers/stateful firewalls without having a dedicated load-balancing router/server/switch. Basically, this match returns @@ -1179,7 +1179,7 @@ config NETFILTER_XT_MATCH_CONNLABEL select NF_CONNTRACK_LABELS depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - ---help--- + help This match allows you to test and assign userspace-defined labels names to a connection. The kernel only stores bit values - mapping names to bits is done by userspace. @@ -1192,7 +1192,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NETFILTER_CONNCOUNT - ---help--- + help This match allows you to match against the number of parallel connections to a server per client IP address (or address block). @@ -1201,7 +1201,7 @@ config NETFILTER_XT_MATCH_CONNMARK depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NETFILTER_XT_CONNMARK - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module). @@ -1267,7 +1267,7 @@ config NETFILTER_XT_MATCH_DSCP config NETFILTER_XT_MATCH_ECN tristate '"ecn" match support' depends on NETFILTER_ADVANCED - ---help--- + help This option adds an "ECN" match, which allows you to match against the IPv4 and TCP header ECN fields. @@ -1310,7 +1310,7 @@ config NETFILTER_XT_MATCH_HELPER config NETFILTER_XT_MATCH_HL tristate '"hl" hoplimit/TTL match support' depends on NETFILTER_ADVANCED - ---help--- + help HL matching allows you to match packets based on the hoplimit in the IPv6 header, or the time-to-live field in the IPv4 header of the packet. @@ -1327,7 +1327,7 @@ config NETFILTER_XT_MATCH_IPCOMP config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED - ---help--- + help This option adds a "iprange" match, which allows you to match based on an IP address range. (Normal iptables only matches on single addresses with an optional mask.) @@ -1348,7 +1348,7 @@ config NETFILTER_XT_MATCH_L2TP tristate '"l2tp" match support' depends on NETFILTER_ADVANCED default L2TP - ---help--- + help This option adds an "L2TP" match, which allows you to match against L2TP protocol header fields. @@ -1386,7 +1386,7 @@ config NETFILTER_XT_MATCH_MARK tristate '"mark" match support' depends on NETFILTER_ADVANCED select NETFILTER_XT_MARK - ---help--- + help This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). @@ -1428,7 +1428,7 @@ config NETFILTER_XT_MATCH_OSF config NETFILTER_XT_MATCH_OWNER tristate '"owner" match support' depends on NETFILTER_ADVANCED - ---help--- + help Socket owner matching allows you to match locally-generated packets based on who created the socket: the user or group. It is also possible to check whether a socket actually exists. @@ -1503,7 +1503,7 @@ config NETFILTER_XT_MATCH_REALM config NETFILTER_XT_MATCH_RECENT tristate '"recent" match support' depends on NETFILTER_ADVANCED - ---help--- + help This match is used for creating one or many lists of recently used addresses and then matching against that/those list(s). @@ -1586,7 +1586,7 @@ config NETFILTER_XT_MATCH_TCPMSS config NETFILTER_XT_MATCH_TIME tristate '"time" match support' depends on NETFILTER_ADVANCED - ---help--- + help This option adds a "time" match, which allows you to match based on the packet arrival time (at the machine which netfilter is running) on) or departure time/date (for locally generated packets). @@ -1600,7 +1600,7 @@ config NETFILTER_XT_MATCH_TIME config NETFILTER_XT_MATCH_U32 tristate '"u32" match support' depends on NETFILTER_ADVANCED - ---help--- + help u32 allows you to extract quantities of up to 4 bytes from a packet, AND them with specified masks, shift them by specified amounts and test whether the results are in any of a set of specified ranges. diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 486959f70cf3..a8ce04a4bb72 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -326,7 +326,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], set->variant = &bitmap_ip; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 2310a316e0af..2c625e0f49ec 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -363,7 +363,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e56ced66f202..7138e080def4 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -274,7 +274,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); + ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 340cb955af25..56621d6bfd29 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -460,6 +460,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; + if (align < ip_set_extensions[id].align) + align = ip_set_extensions[id].align; len = ALIGN(len, ip_set_extensions[id].align); set->offset[id] = len; set->extensions |= ip_set_extensions[id].type; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1ee43752d6d3..521e970be402 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -682,7 +682,7 @@ retry: } t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); if (!t->hregion) { - kfree(t); + ip_set_free(t); ret = -ENOMEM; goto out; } @@ -1533,7 +1533,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, } t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); if (!t->hregion) { - kfree(t); + ip_set_free(t); kfree(h); return -ENOMEM; } diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 5b672e05d758..2c1593089ede 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -6,7 +6,7 @@ menuconfig IP_VS tristate "IP virtual server support" depends on NET && INET && NETFILTER depends on (NF_CONNTRACK || NF_CONNTRACK=n) - ---help--- + help IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This option must be enabled for at least one of the clustered computers @@ -31,14 +31,14 @@ config IP_VS_IPV6 depends on IPV6 = y || IP_VS = IPV6 select IP6_NF_IPTABLES select NF_DEFRAG_IPV6 - ---help--- + help Add IPv6 support to IPVS. Say Y if unsure. config IP_VS_DEBUG bool "IP virtual server debugging" - ---help--- + help Say Y here if you want to get additional messages useful in debugging the IP virtual server code. You can change the debug level in /proc/sys/net/ipv4/vs/debug_level @@ -47,7 +47,7 @@ config IP_VS_TAB_BITS int "IPVS connection table size (the Nth power of 2)" range 8 20 default 12 - ---help--- + help The IPVS connection hash table uses the chaining scheme to handle hash collisions. Using a big IPVS connection hash table will greatly reduce conflicts when there are hundreds of thousands of connections @@ -78,13 +78,13 @@ comment "IPVS transport protocol load balancing support" config IP_VS_PROTO_TCP bool "TCP load balancing support" - ---help--- + help This option enables support for load balancing TCP transport protocol. Say Y if unsure. config IP_VS_PROTO_UDP bool "UDP load balancing support" - ---help--- + help This option enables support for load balancing UDP transport protocol. Say Y if unsure. @@ -93,20 +93,20 @@ config IP_VS_PROTO_AH_ESP config IP_VS_PROTO_ESP bool "ESP load balancing support" - ---help--- + help This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" - ---help--- + help This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. config IP_VS_PROTO_SCTP bool "SCTP load balancing support" select LIBCRC32C - ---help--- + help This option enables support for load balancing SCTP transport protocol. Say Y if unsure. @@ -114,7 +114,7 @@ comment "IPVS scheduler" config IP_VS_RR tristate "round-robin scheduling" - ---help--- + help The robin-robin scheduling algorithm simply directs network connections to different real servers in a round-robin manner. @@ -123,7 +123,7 @@ config IP_VS_RR config IP_VS_WRR tristate "weighted round-robin scheduling" - ---help--- + help The weighted robin-robin scheduling algorithm directs network connections to different real servers based on server weights in a round-robin manner. Servers with higher weights receive @@ -136,7 +136,7 @@ config IP_VS_WRR config IP_VS_LC tristate "least-connection scheduling" - ---help--- + help The least-connection scheduling algorithm directs network connections to the server with the least number of active connections. @@ -146,7 +146,7 @@ config IP_VS_LC config IP_VS_WLC tristate "weighted least-connection scheduling" - ---help--- + help The weighted least-connection scheduling algorithm directs network connections to the server with the least active connections normalized by the server weight. @@ -156,7 +156,7 @@ config IP_VS_WLC config IP_VS_FO tristate "weighted failover scheduling" - ---help--- + help The weighted failover scheduling algorithm directs network connections to the server with the highest weight that is currently available. @@ -166,7 +166,7 @@ config IP_VS_FO config IP_VS_OVF tristate "weighted overflow scheduling" - ---help--- + help The weighted overflow scheduling algorithm directs network connections to the server with the highest weight that is currently available and overflows to the next when active @@ -177,7 +177,7 @@ config IP_VS_OVF config IP_VS_LBLC tristate "locality-based least-connection scheduling" - ---help--- + help The locality-based least-connection scheduling algorithm is for destination IP load balancing. It is usually used in cache cluster. This algorithm usually directs packet destined for an IP address to @@ -191,7 +191,7 @@ config IP_VS_LBLC config IP_VS_LBLCR tristate "locality-based least-connection with replication scheduling" - ---help--- + help The locality-based least-connection with replication scheduling algorithm is also for destination IP load balancing. It is usually used in cache cluster. It differs from the LBLC scheduling @@ -209,7 +209,7 @@ config IP_VS_LBLCR config IP_VS_DH tristate "destination hashing scheduling" - ---help--- + help The destination hashing scheduling algorithm assigns network connections to the servers through looking up a statically assigned hash table by their destination IP addresses. @@ -219,7 +219,7 @@ config IP_VS_DH config IP_VS_SH tristate "source hashing scheduling" - ---help--- + help The source hashing scheduling algorithm assigns network connections to the servers through looking up a statically assigned hash table by their source IP addresses. @@ -229,7 +229,7 @@ config IP_VS_SH config IP_VS_MH tristate "maglev hashing scheduling" - ---help--- + help The maglev consistent hashing scheduling algorithm provides the Google's Maglev hashing algorithm as a IPVS scheduler. It assigns network connections to the servers through looking up a statically @@ -248,7 +248,7 @@ config IP_VS_MH config IP_VS_SED tristate "shortest expected delay scheduling" - ---help--- + help The shortest expected delay scheduling algorithm assigns network connections to the server with the shortest expected delay. The expected delay that the job will experience is (Ci + 1) / Ui if @@ -261,7 +261,7 @@ config IP_VS_SED config IP_VS_NQ tristate "never queue scheduling" - ---help--- + help The never queue scheduling algorithm adopts a two-speed model. When there is an idle server available, the job will be sent to the idle server, instead of waiting for a fast one. When there @@ -278,7 +278,7 @@ config IP_VS_SH_TAB_BITS int "IPVS source hashing table size (the Nth power of 2)" range 4 20 default 8 - ---help--- + help The source hashing scheduler maps source IPs to destinations stored in a hash table. This table is tiled by each destination until all slots in the table are filled. When using weights to @@ -293,7 +293,7 @@ config IP_VS_MH_TAB_INDEX int "IPVS maglev hashing table index of size (the prime numbers)" range 8 17 default 12 - ---help--- + help The maglev hashing scheduler maps source IPs to destinations stored in a hash table. This table is assigned by a preference list of the positions to each destination until all slots in @@ -312,7 +312,7 @@ config IP_VS_FTP depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \ NF_CONNTRACK_FTP select IP_VS_NFCT - ---help--- + help FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, the IP address and port number of real servers cannot be sent to @@ -326,7 +326,7 @@ config IP_VS_FTP config IP_VS_NFCT bool "Netfilter connection tracking" depends on NF_CONNTRACK - ---help--- + help The Netfilter connection tracking support allows the IPVS connection state to be exported to the Netfilter framework for filtering purposes. @@ -335,7 +335,7 @@ config IP_VS_PE_SIP tristate "SIP persistence engine" depends on IP_VS_PROTO_UDP depends on NF_CONNTRACK_SIP - ---help--- + help Allow persistence based on the SIP Call-ID endif # IP_VS diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 605e0f68f8bd..2b8abbfe018c 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1717,6 +1717,8 @@ static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = tinfo->ipvs; + struct sock *sk = tinfo->sock->sk; + struct udp_sock *up = udp_sk(sk); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " @@ -1724,12 +1726,14 @@ static int sync_thread_backup(void *data) ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id); while (!kthread_should_stop()) { - wait_event_interruptible(*sk_sleep(tinfo->sock->sk), - !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) - || kthread_should_stop()); + wait_event_interruptible(*sk_sleep(sk), + !skb_queue_empty_lockless(&sk->sk_receive_queue) || + !skb_queue_empty_lockless(&up->reader_queue) || + kthread_should_stop()); /* do we have data now? */ - while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { + while (!skb_queue_empty_lockless(&sk->sk_receive_queue) || + !skb_queue_empty_lockless(&up->reader_queue)) { len = ip_vs_receive(tinfo->sock, tinfo->buf, ipvs->bcfg.sync_maxlen); if (len <= 0) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 79cd9dde457b..f33d72c5b06e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2158,6 +2158,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb) err = __nf_conntrack_update(net, skb, ct, ctinfo); if (err < 0) return err; + + ct = nf_ct_get(skb, &ctinfo); } return nf_confirm_cthelper(skb, ct, ctinfo); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 9eca90414bb7..b22801f97bce 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -382,7 +382,7 @@ static int help(struct sk_buff *skb, int ret; u32 seq; int dir = CTINFO2DIR(ctinfo); - unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff); + unsigned int matchlen, matchoff; struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; union nf_inet_addr *daddr; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d7bd8b1f27d5..832eabecfbdd 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -939,7 +939,8 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) filter->mark.mask = 0xffffffff; } } else if (cda[CTA_MARK_MASK]) { - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto err_filter; } #endif if (!cda[CTA_FILTER]) @@ -947,15 +948,17 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); if (err < 0) - return ERR_PTR(err); + goto err_filter; err = ctnetlink_parse_filter(cda[CTA_FILTER], filter); if (err < 0) - return ERR_PTR(err); + goto err_filter; if (filter->orig_flags) { - if (!cda[CTA_TUPLE_ORIG]) - return ERR_PTR(-EINVAL); + if (!cda[CTA_TUPLE_ORIG]) { + err = -EINVAL; + goto err_filter; + } err = ctnetlink_parse_tuple_filter(cda, &filter->orig, CTA_TUPLE_ORIG, @@ -963,23 +966,32 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) &filter->zone, filter->orig_flags); if (err < 0) - return ERR_PTR(err); + goto err_filter; } if (filter->reply_flags) { - if (!cda[CTA_TUPLE_REPLY]) - return ERR_PTR(-EINVAL); + if (!cda[CTA_TUPLE_REPLY]) { + err = -EINVAL; + goto err_filter; + } err = ctnetlink_parse_tuple_filter(cda, &filter->reply, CTA_TUPLE_REPLY, filter->family, &filter->zone, filter->orig_flags); - if (err < 0) - return ERR_PTR(err); + if (err < 0) { + err = -EINVAL; + goto err_filter; + } } return filter; + +err_filter: + kfree(filter); + + return ERR_PTR(err); } static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda) diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index f108a76925dd..2b01a151eaa8 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -73,3 +73,4 @@ EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter packet duplication support"); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 6a3034f84ab6..b1eb5272b379 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -387,51 +387,6 @@ static void nf_flow_offload_work_gc(struct work_struct *work) queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } -int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv) -{ - struct flow_block *block = &flow_table->flow_block; - struct flow_block_cb *block_cb; - int err = 0; - - down_write(&flow_table->flow_block_lock); - block_cb = flow_block_cb_lookup(block, cb, cb_priv); - if (block_cb) { - err = -EEXIST; - goto unlock; - } - - block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); - if (IS_ERR(block_cb)) { - err = PTR_ERR(block_cb); - goto unlock; - } - - list_add_tail(&block_cb->list, &block->cb_list); - -unlock: - up_write(&flow_table->flow_block_lock); - return err; -} -EXPORT_SYMBOL_GPL(nf_flow_table_offload_add_cb); - -void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv) -{ - struct flow_block *block = &flow_table->flow_block; - struct flow_block_cb *block_cb; - - down_write(&flow_table->flow_block_lock); - block_cb = flow_block_cb_lookup(block, cb, cb_priv); - if (block_cb) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } else { - WARN_ON(true); - } - up_write(&flow_table->flow_block_lock); -} -EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb); static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) @@ -639,3 +594,4 @@ module_exit(nf_flow_table_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Netfilter flow table module"); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 88bedf1ff1ae..bc4126d8ef65 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -72,3 +72,4 @@ module_exit(nf_flow_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */ +MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module"); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 62651e6683f6..5fff1e040168 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -950,6 +950,7 @@ static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb) nf_flow_table_gc_cleanup(flowtable, dev); down_write(&flowtable->flow_block_lock); list_del(&block_cb->list); + list_del(&block_cb->driver_list); flow_block_cb_free(block_cb); up_write(&flowtable->flow_block_lock); } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index b9cbe1e2453e..ebcdc8e54476 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -1237,3 +1237,4 @@ EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("nftables SYNPROXY expression support"); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 073aa1051d43..2b3862ea0505 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -12,6 +12,7 @@ #include <linux/netlink.h> #include <linux/vmalloc.h> #include <linux/rhashtable.h> +#include <linux/audit.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> @@ -188,24 +189,6 @@ static void nft_netdev_unregister_hooks(struct net *net, nf_unregister_net_hook(net, &hook->ops); } -static int nft_register_basechain_hooks(struct net *net, int family, - struct nft_base_chain *basechain) -{ - if (family == NFPROTO_NETDEV) - return nft_netdev_register_hooks(net, &basechain->hook_list); - - return nf_register_net_hook(net, &basechain->ops); -} - -static void nft_unregister_basechain_hooks(struct net *net, int family, - struct nft_base_chain *basechain) -{ - if (family == NFPROTO_NETDEV) - nft_netdev_unregister_hooks(net, &basechain->hook_list); - else - nf_unregister_net_hook(net, &basechain->ops); -} - static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -223,7 +206,10 @@ static int nf_tables_register_hook(struct net *net, if (basechain->type->ops_register) return basechain->type->ops_register(net, ops); - return nft_register_basechain_hooks(net, table->family, basechain); + if (table->family == NFPROTO_NETDEV) + return nft_netdev_register_hooks(net, &basechain->hook_list); + + return nf_register_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, @@ -242,7 +228,10 @@ static void nf_tables_unregister_hook(struct net *net, if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - nft_unregister_basechain_hooks(net, table->family, basechain); + if (table->family == NFPROTO_NETDEV) + nft_netdev_unregister_hooks(net, &basechain->hook_list); + else + nf_unregister_net_hook(net, &basechain->ops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -693,6 +682,17 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0", + ctx->table->name, ctx->table->handle); + + audit_log_nfcfg(buf, + ctx->family, + ctx->table->use, + event == NFT_MSG_NEWTABLE ? + AUDIT_NFT_OP_TABLE_REGISTER : + AUDIT_NFT_OP_TABLE_UNREGISTER, + GFP_KERNEL); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -832,8 +832,7 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) if (cnt && i++ == cnt) break; - nft_unregister_basechain_hooks(net, table->family, - nft_base_chain(chain)); + nf_tables_unregister_hook(net, table, chain); } } @@ -848,8 +847,7 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table) if (!nft_is_base_chain(chain)) continue; - err = nft_register_basechain_hooks(net, table->family, - nft_base_chain(chain)); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err_register_hooks; @@ -894,11 +892,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (ret >= 0) nft_trans_table_enable(trans) = true; - } + else + ctx->table->flags |= NFT_TABLE_F_DORMANT; } if (ret < 0) goto err; @@ -1428,6 +1427,18 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + ctx->chain->name, ctx->chain->handle); + + audit_log_nfcfg(buf, + ctx->family, + ctx->chain->use, + event == NFT_MSG_NEWCHAIN ? + AUDIT_NFT_OP_CHAIN_REGISTER : + AUDIT_NFT_OP_CHAIN_UNREGISTER, + GFP_KERNEL); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -2693,6 +2704,18 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + ctx->chain->name, ctx->chain->handle); + + audit_log_nfcfg(buf, + ctx->family, + rule->handle, + event == NFT_MSG_NEWRULE ? + AUDIT_NFT_OP_RULE_REGISTER : + AUDIT_NFT_OP_RULE_UNREGISTER, + GFP_KERNEL); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -3695,6 +3718,18 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, struct sk_buff *skb; u32 portid = ctx->portid; int err; + char *buf = kasprintf(gfp_flags, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + set->name, set->handle); + + audit_log_nfcfg(buf, + ctx->family, + set->field_count, + event == NFT_MSG_NEWSET ? + AUDIT_NFT_OP_SET_REGISTER : + AUDIT_NFT_OP_SET_UNREGISTER, + gfp_flags); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -4811,6 +4846,18 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, u32 portid = ctx->portid; struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + ctx->table->name, ctx->table->handle, + set->name, set->handle); + + audit_log_nfcfg(buf, + ctx->family, + set->handle, + event == NFT_MSG_NEWSETELEM ? + AUDIT_NFT_OP_SETELEM_REGISTER : + AUDIT_NFT_OP_SETELEM_UNREGISTER, + GFP_KERNEL); + kfree(buf); if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; @@ -5892,6 +5939,20 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) obj->ops->type->type != filter->type) goto cont; + if (reset) { + char *buf = kasprintf(GFP_ATOMIC, + "%s:%llu;?:0", + table->name, + table->handle); + + audit_log_nfcfg(buf, + family, + obj->handle, + AUDIT_NFT_OP_OBJ_RESET, + GFP_ATOMIC); + kfree(buf); + } + if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, @@ -6002,6 +6063,18 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) reset = true; + if (reset) { + char *buf = kasprintf(GFP_ATOMIC, "%s:%llu;?:0", + table->name, table->handle); + + audit_log_nfcfg(buf, + family, + obj->handle, + AUDIT_NFT_OP_OBJ_RESET, + GFP_ATOMIC); + kfree(buf); + } + err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, family, table, obj, reset); @@ -6077,6 +6150,17 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, { struct sk_buff *skb; int err; + char *buf = kasprintf(gfp, "%s:%llu;?:0", + table->name, table->handle); + + audit_log_nfcfg(buf, + family, + obj->handle, + event == NFT_MSG_NEWOBJ ? + AUDIT_NFT_OP_OBJ_REGISTER : + AUDIT_NFT_OP_OBJ_UNREGISTER, + gfp); + kfree(buf); if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) @@ -6550,12 +6634,22 @@ err1: return err; } +static void nft_flowtable_hook_release(struct nft_flowtable_hook *flowtable_hook) +{ + struct nft_hook *this, *next; + + list_for_each_entry_safe(this, next, &flowtable_hook->list, list) { + list_del(&this->list); + kfree(this); + } +} + static int nft_delflowtable_hook(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; - struct nft_hook *this, *next, *hook; + struct nft_hook *this, *hook; struct nft_trans *trans; int err; @@ -6564,33 +6658,40 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, if (err < 0) return err; - list_for_each_entry_safe(this, next, &flowtable_hook.list, list) { + list_for_each_entry(this, &flowtable_hook.list, list) { hook = nft_hook_list_find(&flowtable->hook_list, this); if (!hook) { err = -ENOENT; goto err_flowtable_del_hook; } hook->inactive = true; - list_del(&this->list); - kfree(this); } trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE, sizeof(struct nft_trans_flowtable)); - if (!trans) - return -ENOMEM; + if (!trans) { + err = -ENOMEM; + goto err_flowtable_del_hook; + } nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); + nft_flowtable_hook_release(&flowtable_hook); list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; err_flowtable_del_hook: - list_for_each_entry(hook, &flowtable_hook.list, list) + list_for_each_entry(this, &flowtable_hook.list, list) { + hook = nft_hook_list_find(&flowtable->hook_list, this); + if (!hook) + break; + hook->inactive = false; + } + nft_flowtable_hook_release(&flowtable_hook); return err; } @@ -6856,6 +6957,18 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, { struct sk_buff *skb; int err; + char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu", + flowtable->table->name, flowtable->table->handle, + flowtable->name, flowtable->handle); + + audit_log_nfcfg(buf, + ctx->family, + flowtable->hooknum, + event == NFT_MSG_NEWFLOWTABLE ? + AUDIT_NFT_OP_FLOWTABLE_REGISTER : + AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, + GFP_KERNEL); + kfree(buf); if (ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) @@ -6977,6 +7090,9 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, struct sk_buff *skb2; int err; + audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq, + AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL); + if (nlmsg_report(nlh) && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 185fc82c99aa..c7cf1cde46de 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -296,6 +296,7 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb) nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND, basechain, &extack); mutex_lock(&net->nft.commit_mutex); + list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); nft_flow_offload_unbind(&bo, basechain); mutex_unlock(&net->nft.commit_mutex); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 99127e2d95a8..5f24edf95830 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -33,6 +33,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); +MODULE_DESCRIPTION("Netfilter messages via netlink socket"); #define nfnl_dereference_protected(id) \ rcu_dereference_protected(table[(id)].subsys, \ diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 0ba020ca38e6..f02992419850 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -689,7 +689,7 @@ nfulnl_log_packet(struct net *net, struct nfnl_log_net *log = nfnl_log_pernet(net); const struct nfnl_ct_hook *nfnl_ct = NULL; struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); + enum ip_conntrack_info ctinfo; if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3243a31f6e82..dadfc06245a3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -388,7 +388,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct net_device *indev; struct net_device *outdev; struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); + enum ip_conntrack_info ctinfo; struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; @@ -1168,7 +1168,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct nfqnl_instance *queue; unsigned int verdict; struct nf_queue_entry *entry; - enum ip_conntrack_info uninitialized_var(ctinfo); + enum ip_conntrack_info ctinfo; struct nfnl_ct_hook *nfnl_ct; struct nf_conn *ct = NULL; struct nfnl_queue_net *q = nfnl_queue_pernet(net); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f9adca62ccb3..aa1a066cb74b 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -902,3 +902,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("match"); MODULE_ALIAS_NFT_EXPR("target"); +MODULE_DESCRIPTION("x_tables over nftables support"); diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 69d6173f91e2..7d0761fad37e 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -280,3 +280,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso"); MODULE_ALIAS_NFT_EXPR("connlimit"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT); +MODULE_DESCRIPTION("nftables connlimit rule support"); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index f6d4d0fa23a6..85ed461ec24e 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -303,3 +303,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("counter"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER); +MODULE_DESCRIPTION("nftables counter rule support"); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index faea72c2df32..77258af1fce0 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1345,3 +1345,4 @@ MODULE_ALIAS_NFT_EXPR("notrack"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_EXPECT); +MODULE_DESCRIPTION("Netfilter nf_tables conntrack module"); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index c2e78c160fd7..40788b3f1071 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -102,3 +102,4 @@ module_exit(nft_dup_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_AF_EXPR(5, "dup"); +MODULE_DESCRIPTION("nftables netdev packet duplication support"); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c index 465432e0531b..a88d44e163d1 100644 --- a/net/netfilter/nft_fib_inet.c +++ b/net/netfilter/nft_fib_inet.c @@ -76,3 +76,4 @@ module_exit(nft_fib_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(1, "fib"); +MODULE_DESCRIPTION("nftables fib inet support"); diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c index a2e726ae7f07..3f3478abd845 100644 --- a/net/netfilter/nft_fib_netdev.c +++ b/net/netfilter/nft_fib_netdev.c @@ -85,3 +85,4 @@ module_exit(nft_fib_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>"); MODULE_ALIAS_NFT_AF_EXPR(5, "fib"); +MODULE_DESCRIPTION("nftables netdev fib lookups support"); diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index b70b48996801..3b9b97aa4b32 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -286,3 +286,4 @@ module_exit(nft_flow_offload_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("flow_offload"); +MODULE_DESCRIPTION("nftables hardware flow offload module"); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index b836d550b919..96371d878e7e 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -248,3 +248,4 @@ module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>"); MODULE_ALIAS_NFT_EXPR("hash"); +MODULE_DESCRIPTION("Netfilter nftables hash module"); diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 35b67d7e3694..0e2c315c3b5e 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -372,3 +372,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("limit"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT); +MODULE_DESCRIPTION("nftables limit expression support"); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index fe4831f2258f..57899454a530 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -298,3 +298,4 @@ module_exit(nft_log_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("log"); +MODULE_DESCRIPTION("Netfilter nf_tables log module"); diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index bc9fd98c5d6d..71390b727040 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -305,3 +305,4 @@ module_exit(nft_masq_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_EXPR("masq"); +MODULE_DESCRIPTION("Netfilter nftables masquerade expression support"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 23a7bfd10521..4bcf33b049c4 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -402,3 +402,4 @@ module_exit(nft_nat_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); MODULE_ALIAS_NFT_EXPR("nat"); +MODULE_DESCRIPTION("Network Address Translation support"); diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 48edb9d5f012..f1fc824f9737 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -217,3 +217,4 @@ module_exit(nft_ng_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>"); MODULE_ALIAS_NFT_EXPR("numgen"); +MODULE_DESCRIPTION("nftables number generator module"); diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index bfd18d2b65a2..5f9207a9f485 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -252,3 +252,4 @@ module_exit(nft_objref_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("objref"); +MODULE_DESCRIPTION("nftables stateful object reference module"); diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index b42247aa48a9..c261d57a666a 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -149,3 +149,4 @@ module_exit(nft_osf_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); MODULE_ALIAS_NFT_EXPR("osf"); +MODULE_DESCRIPTION("nftables passive OS fingerprint support"); diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 5ece0a6aa8c3..23265d757acb 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -216,3 +216,4 @@ module_exit(nft_queue_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Eric Leblond <eric@regit.org>"); MODULE_ALIAS_NFT_EXPR("queue"); +MODULE_DESCRIPTION("Netfilter nftables queue module"); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 4413690591f2..0363f533a42b 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -254,3 +254,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("quota"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA); +MODULE_DESCRIPTION("Netfilter nftables quota module"); diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 5b779171565c..2056051c0af0 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -292,3 +292,4 @@ module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_EXPR("redir"); +MODULE_DESCRIPTION("Netfilter nftables redirect support"); diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 00f865fb80ca..86eafbb0fdd0 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -119,3 +119,4 @@ EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Netfilter x_tables over nftables module"); diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index f41f414b72d1..cf8f2646e93c 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -149,3 +149,4 @@ module_exit(nft_reject_inet_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_AF_EXPR(1, "reject"); +MODULE_DESCRIPTION("Netfilter nftables reject inet support"); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 8b5acc6910fd..8c04388296b0 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1242,7 +1242,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } - if (!*this_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) { + if (!*get_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) { + put_cpu_ptr(m->scratch); + err = pipapo_realloc_scratch(m, bsize_max); if (err) return err; @@ -1250,6 +1252,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, this_cpu_write(nft_pipapo_scratch_index, false); m->bsize_max = bsize_max; + } else { + put_cpu_ptr(m->scratch); } *ext2 = &e->ext; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 62f416bc0579..b6aad3fc46c3 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -271,12 +271,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, if (nft_rbtree_interval_start(new)) { if (nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, genmask)) + nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) overlap = false; } else { overlap = nft_rbtree_interval_end(rbe) && nft_set_elem_active(&rbe->ext, - genmask); + genmask) && + !nft_set_elem_expired(&rbe->ext); } } else if (d > 0) { p = &parent->rb_right; @@ -284,9 +286,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, if (nft_rbtree_interval_end(new)) { overlap = nft_rbtree_interval_end(rbe) && nft_set_elem_active(&rbe->ext, - genmask); + genmask) && + !nft_set_elem_expired(&rbe->ext); } else if (nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, genmask)) { + nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) { overlap = true; } } else { @@ -294,15 +298,18 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_rbtree_interval_start(new)) { p = &parent->rb_left; - if (nft_set_elem_active(&rbe->ext, genmask)) + if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) overlap = false; } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(new)) { p = &parent->rb_right; - if (nft_set_elem_active(&rbe->ext, genmask)) + if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) overlap = false; - } else if (nft_set_elem_active(&rbe->ext, genmask)) { + } else if (nft_set_elem_active(&rbe->ext, genmask) && + !nft_set_elem_expired(&rbe->ext)) { *ext = &rbe->ext; return -EEXIST; } else { diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index e2c1fc608841..4fda8b3f1762 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -388,3 +388,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); MODULE_ALIAS_NFT_EXPR("synproxy"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY); +MODULE_DESCRIPTION("nftables SYNPROXY expression support"); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 30be5787fbde..d3eb953d0333 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -719,3 +719,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("tunnel"); MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL); +MODULE_DESCRIPTION("nftables tunnel expression support"); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 99a468be4a59..9ad8f3ff66f5 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1410,7 +1410,8 @@ xt_replace_table(struct xt_table *table, audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : - AUDIT_XT_OP_REPLACE); + AUDIT_XT_OP_REPLACE, + GFP_KERNEL); return private; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -1473,7 +1474,7 @@ void *xt_unregister_table(struct xt_table *table) list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number, - AUDIT_XT_OP_UNREGISTER); + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); kfree(table); return private; diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index a8e5f6c8db7a..b4f7bbc3f3ca 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -244,3 +244,4 @@ MODULE_ALIAS("ipt_SNAT"); MODULE_ALIAS("ipt_DNAT"); MODULE_ALIAS("ip6t_SNAT"); MODULE_ALIAS("ip6t_DNAT"); +MODULE_DESCRIPTION("SNAT and DNAT targets support"); diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig index 07b03c306f28..4383ac29693e 100644 --- a/net/netlabel/Kconfig +++ b/net/netlabel/Kconfig @@ -8,7 +8,7 @@ config NETLABEL depends on SECURITY select CRC_CCITT if IPV6 default n - ---help--- + help NetLabel provides support for explicit network packet labeling protocols such as CIPSO and RIPSO. For more information see Documentation/netlabel as well as the NetLabel SourceForge project diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig index 20f967974da0..1039d4f2ce11 100644 --- a/net/netlink/Kconfig +++ b/net/netlink/Kconfig @@ -6,6 +6,6 @@ config NETLINK_DIAG tristate "NETLINK: socket monitoring interface" default n - ---help--- + help Support for NETLINK socket monitoring interface used by the ss tool. If unsure, say Y. diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 6c19b91bbb86..9395ee8a868d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -351,22 +351,11 @@ int genl_register_family(struct genl_family *family) start = end = GENL_ID_VFS_DQUOT; } - if (family->maxattr && !family->parallel_ops) { - family->attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), - GFP_KERNEL); - if (family->attrbuf == NULL) { - err = -ENOMEM; - goto errout_locked; - } - } else - family->attrbuf = NULL; - family->id = idr_alloc_cyclic(&genl_fam_idr, family, start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_free; + goto errout_locked; } err = genl_validate_assign_mc_groups(family); @@ -385,8 +374,6 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); -errout_free: - kfree(family->attrbuf); errout_locked: genl_unlock_all(); return err; @@ -419,8 +406,6 @@ int genl_unregister_family(const struct genl_family *family) atomic_read(&genl_sk_destructing_cnt) == 0); genl_unlock(); - kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); return 0; @@ -474,8 +459,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, struct netlink_ext_ack *extack, const struct genl_ops *ops, int hdrlen, - enum genl_validate_flags no_strict_flag, - bool parallel) + enum genl_validate_flags no_strict_flag) { enum netlink_validation validate = ops->validate & no_strict_flag ? NL_VALIDATE_LIBERAL : @@ -486,31 +470,23 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, if (!family->maxattr) return NULL; - if (parallel) { - attrbuf = kmalloc_array(family->maxattr + 1, - sizeof(struct nlattr *), GFP_KERNEL); - if (!attrbuf) - return ERR_PTR(-ENOMEM); - } else { - attrbuf = family->attrbuf; - } + attrbuf = kmalloc_array(family->maxattr + 1, + sizeof(struct nlattr *), GFP_KERNEL); + if (!attrbuf) + return ERR_PTR(-ENOMEM); err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, family->policy, validate, extack); if (err) { - if (parallel) - kfree(attrbuf); + kfree(attrbuf); return ERR_PTR(err); } return attrbuf; } -static void genl_family_rcv_msg_attrs_free(const struct genl_family *family, - struct nlattr **attrbuf, - bool parallel) +static void genl_family_rcv_msg_attrs_free(struct nlattr **attrbuf) { - if (parallel) - kfree(attrbuf); + kfree(attrbuf); } struct genl_start_context { @@ -537,15 +513,14 @@ static int genl_start(struct netlink_callback *cb) attrs = genl_family_rcv_msg_attrs_parse(ctx->family, ctx->nlh, ctx->extack, ops, ctx->hdrlen, - GENL_DONT_VALIDATE_DUMP_STRICT, - true); + GENL_DONT_VALIDATE_DUMP_STRICT); if (IS_ERR(attrs)) return PTR_ERR(attrs); no_attrs: info = genl_dumpit_info_alloc(); if (!info) { - kfree(attrs); + genl_family_rcv_msg_attrs_free(attrs); return -ENOMEM; } info->family = ctx->family; @@ -562,7 +537,7 @@ no_attrs: } if (rc) { - kfree(attrs); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); cb->data = NULL; } @@ -591,7 +566,7 @@ static int genl_lock_done(struct netlink_callback *cb) rc = ops->done(cb); genl_unlock(); } - genl_family_rcv_msg_attrs_free(info->family, info->attrs, false); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -604,7 +579,7 @@ static int genl_parallel_done(struct netlink_callback *cb) if (ops->done) rc = ops->done(cb); - genl_family_rcv_msg_attrs_free(info->family, info->attrs, true); + genl_family_rcv_msg_attrs_free(info->attrs); genl_dumpit_info_free(info); return rc; } @@ -671,8 +646,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack, ops, hdrlen, - GENL_DONT_VALIDATE_STRICT, - family->parallel_ops); + GENL_DONT_VALIDATE_STRICT); if (IS_ERR(attrbuf)) return PTR_ERR(attrbuf); @@ -698,7 +672,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family, family->post_doit(ops, skb, &info); out: - genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops); + genl_family_rcv_msg_attrs_free(attrbuf); return err; } @@ -1170,60 +1144,11 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; -static int genl_bind(struct net *net, int group) -{ - struct genl_family *f; - int err = -ENOENT; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (!f->netnsok && net != &init_net) - err = -ENOENT; - else if (f->mcast_bind) - err = f->mcast_bind(net, fam_grp); - else - err = 0; - break; - } - } - up_read(&cb_lock); - - return err; -} - -static void genl_unbind(struct net *net, int group) -{ - struct genl_family *f; - unsigned int id; - - down_read(&cb_lock); - - idr_for_each_entry(&genl_fam_idr, f, id) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (f->mcast_unbind) - f->mcast_unbind(net, fam_grp); - break; - } - } - up_read(&cb_lock); -} - static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, - .bind = genl_bind, - .unbind = genl_unbind, }; /* we'll bump the group number right afterwards */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index eccc7d366e17..f90ef6934b8f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -70,6 +70,7 @@ static const struct proto_ops nr_proto_ops; * separate class since they always nest. */ static struct lock_class_key nr_netdev_xmit_lock_key; +static struct lock_class_key nr_netdev_addr_lock_key; static void nr_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -80,6 +81,7 @@ static void nr_set_lockdep_one(struct net_device *dev, static void nr_set_lockdep_key(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL); } diff --git a/net/nfc/hci/Kconfig b/net/nfc/hci/Kconfig index 4822d6f46947..9500b8a27475 100644 --- a/net/nfc/hci/Kconfig +++ b/net/nfc/hci/Kconfig @@ -13,6 +13,6 @@ config NFC_SHDLC select CRC_CCITT bool "SHDLC link layer for HCI based NFC drivers" default n - ---help--- + help Say yes if you use an NFC HCI driver that requires SHDLC link layer. If unsure, say N here. diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7cd524884304..78ea8c94dcba 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1228,10 +1228,13 @@ int nci_register_device(struct nci_dev *ndev) rc = nfc_register_device(ndev->nfc_dev); if (rc) - goto destroy_rx_wq_exit; + goto destroy_tx_wq_exit; goto exit; +destroy_tx_wq_exit: + destroy_workqueue(ndev->tx_wq); + destroy_rx_wq_exit: destroy_workqueue(ndev->rx_wq); diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig index 19af948ab6f0..84f2a2823417 100644 --- a/net/nsh/Kconfig +++ b/net/nsh/Kconfig @@ -2,7 +2,7 @@ menuconfig NET_NSH tristate "Network Service Header (NSH) protocol" default n - ---help--- + help Network Service Header is an implementation of Service Function Chaining (RFC 7665). The current implementation in Linux supports only MD type 1 and only with the openvswitch module. diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 22d7d5604b4c..15bd287f5cbd 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -15,7 +15,7 @@ config OPENVSWITCH select NET_MPLS_GSO select DST_CACHE select NET_NSH - ---help--- + help Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features expected in a traditional hardware switch, it enables fine-grained @@ -43,7 +43,7 @@ config OPENVSWITCH_GRE depends on OPENVSWITCH depends on NET_IPGRE default OPENVSWITCH - ---help--- + help If you say Y here, then the Open vSwitch will be able create GRE vport. @@ -56,7 +56,7 @@ config OPENVSWITCH_VXLAN depends on OPENVSWITCH depends on VXLAN default OPENVSWITCH - ---help--- + help If you say Y here, then the Open vSwitch will be able create vxlan vport. Say N to exclude this support and reduce the binary size. @@ -68,7 +68,7 @@ config OPENVSWITCH_GENEVE depends on OPENVSWITCH depends on GENEVE default OPENVSWITCH - ---help--- + help If you say Y here, then the Open vSwitch will be able create geneve vport. Say N to exclude this support and reduce the binary size. diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fc0efd8833c8..2611657f40ca 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1169,9 +1169,10 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, bool last) { + struct ovs_skb_cb *ovs_cb = OVS_CB(skb); const struct nlattr *actions, *cpl_arg; + int len, max_len, rem = nla_len(attr); const struct check_pkt_len_arg *arg; - int rem = nla_len(attr); bool clone_flow_key; /* The first netlink attribute in 'attr' is always @@ -1180,7 +1181,11 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, cpl_arg = nla_data(attr); arg = nla_data(cpl_arg); - if (skb->len <= arg->pkt_len) { + len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len; + max_len = arg->pkt_len; + + if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) || + len <= max_len) { /* Second netlink attribute in 'attr' is always * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. */ diff --git a/net/packet/Kconfig b/net/packet/Kconfig index b4abad135294..2997382d597c 100644 --- a/net/packet/Kconfig +++ b/net/packet/Kconfig @@ -5,7 +5,7 @@ config PACKET tristate "Packet socket" - ---help--- + help The Packet protocol is used by applications which communicate directly with network devices without an intermediate network protocol implemented in the kernel, e.g. tcpdump. If you want them @@ -20,6 +20,6 @@ config PACKET_DIAG tristate "Packet: sockets monitoring interface" depends on PACKET default n - ---help--- + help Support for PF_PACKET sockets monitoring interface used by the ss tool. If unsure, say Y. diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig index f362ca316015..b4020b84760f 100644 --- a/net/qrtr/Kconfig +++ b/net/qrtr/Kconfig @@ -4,7 +4,7 @@ config QRTR tristate "Qualcomm IPC Router support" - ---help--- + help Say Y if you intend to use Qualcomm IPC router protocol. The protocol is used to communicate with services provided by other hardware blocks in the system. @@ -17,13 +17,13 @@ if QRTR config QRTR_SMD tristate "SMD IPC Router channels" depends on RPMSG || (COMPILE_TEST && RPMSG=n) - ---help--- + help Say Y here to support SMD based ipcrouter channels. SMD is the most common transport for IPC Router. config QRTR_TUN tristate "TUN device for Qualcomm IPC Router" - ---help--- + help Say Y here to expose a character device that allows user space to implement endpoints of QRTR, for purpose of tunneling data to other hosts or testing purposes. diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 2d8d6131bc5f..300a104b9a0f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -166,6 +166,7 @@ static void __qrtr_node_release(struct kref *kref) { struct qrtr_node *node = container_of(kref, struct qrtr_node, ref); struct radix_tree_iter iter; + struct qrtr_tx_flow *flow; unsigned long flags; void __rcu **slot; @@ -181,8 +182,9 @@ static void __qrtr_node_release(struct kref *kref) /* Free tx flow counters */ radix_tree_for_each_slot(slot, &node->qrtr_tx_flow, &iter, 0) { + flow = *slot; radix_tree_iter_delete(&node->qrtr_tx_flow, &iter, slot); - kfree(*slot); + kfree(flow); } kfree(node); } @@ -427,7 +429,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) unsigned int ver; size_t hdrlen; - if (len & 3) + if (len == 0 || len & 3) return -EINVAL; skb = netdev_alloc_skb(NULL, len); @@ -441,6 +443,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) switch (ver) { case QRTR_PROTO_VER_1: + if (len < sizeof(*v1)) + goto err; v1 = data; hdrlen = sizeof(*v1); @@ -454,6 +458,8 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) size = le32_to_cpu(v1->size); break; case QRTR_PROTO_VER_2: + if (len < sizeof(*v2)) + goto err; v2 = data; hdrlen = sizeof(*v2) + v2->optlen; @@ -1174,6 +1180,7 @@ static int qrtr_release(struct socket *sock) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); + sock_orphan(sk); sock->sk = NULL; if (!sock_flag(sk, SOCK_ZAPPED)) diff --git a/net/rds/Kconfig b/net/rds/Kconfig index c64e154bc18f..75cd696963b2 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -3,14 +3,14 @@ config RDS tristate "The Reliable Datagram Sockets Protocol" depends on INET - ---help--- + help The RDS (Reliable Datagram Sockets) protocol provides reliable, sequenced delivery of datagrams over Infiniband or TCP. config RDS_RDMA tristate "RDS over Infiniband" depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS - ---help--- + help Allow RDS to use Infiniband as a transport. This transport supports RDMA operations. @@ -18,7 +18,7 @@ config RDS_TCP tristate "RDS over TCP" depends on RDS depends on IPV6 || !IPV6 - ---help--- + help Allow RDS to use TCP as a transport. This transport does not support RDMA operations. diff --git a/net/rds/connection.c b/net/rds/connection.c index ed7f2133acc2..f2fcab182095 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -905,6 +905,17 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) } EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); +/* Check connectivity of all paths + */ +void rds_check_all_paths(struct rds_connection *conn) +{ + int i = 0; + + do { + rds_conn_path_connect_if_down(&conn->c_path[i]); + } while (++i < conn->c_npaths); +} + void rds_conn_connect_if_down(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); diff --git a/net/rds/ib.h b/net/rds/ib.h index 5ae069d39eab..8dfff43cf07f 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -264,7 +264,13 @@ struct rds_ib_device { int *vector_load; }; -#define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent) +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent; + + parent = ibdev->dev.parent; + return parent ? dev_to_node(parent) : NUMA_NO_NODE; +} #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) /* bits for i_ack_flags */ diff --git a/net/rds/rds.h b/net/rds/rds.h index 6019b0c004a9..106e862996b9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -778,6 +778,7 @@ void rds_conn_drop(struct rds_connection *conn); void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); +void rds_check_all_paths(struct rds_connection *conn); void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens, @@ -823,6 +824,12 @@ rds_conn_path_up(struct rds_conn_path *cp) } static inline int +rds_conn_path_down(struct rds_conn_path *cp) +{ + return atomic_read(&cp->cp_state) == RDS_CONN_DOWN; +} + +static inline int rds_conn_up(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); diff --git a/net/rds/recv.c b/net/rds/recv.c index c8404971d5ab..aba4afe4dfed 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -450,12 +450,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; - struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ + struct rds_rdma_notify cmsg; unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); int err = 0; + memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */ /* put_cmsg copies to user space and thus may sleep. We can't do this * with rs_lock held, so first grab as many notifications as we can stuff diff --git a/net/rds/send.c b/net/rds/send.c index 68e2bdb08fd0..9a529a01cdc6 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1340,7 +1340,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) goto out; } - rds_conn_path_connect_if_down(cpath); + if (rds_conn_path_down(cpath)) + rds_check_all_paths(conn); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); if (ret) { diff --git a/net/rds/transport.c b/net/rds/transport.c index 46f709a4b577..f8001ec80867 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -38,6 +38,12 @@ #include "rds.h" #include "loop.h" +static char * const rds_trans_modules[] = { + [RDS_TRANS_IB] = "rds_rdma", + [RDS_TRANS_GAP] = NULL, + [RDS_TRANS_TCP] = "rds_tcp", +}; + static struct rds_transport *transports[RDS_TRANS_COUNT]; static DECLARE_RWSEM(rds_trans_sem); @@ -110,18 +116,20 @@ struct rds_transport *rds_trans_get(int t_type) { struct rds_transport *ret = NULL; struct rds_transport *trans; - unsigned int i; down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) { - trans = transports[i]; - - if (trans && trans->t_type == t_type && - (!trans->t_owner || try_module_get(trans->t_owner))) { - ret = trans; - break; - } + trans = transports[t_type]; + if (!trans) { + up_read(&rds_trans_sem); + if (rds_trans_modules[t_type]) + request_module(rds_trans_modules[t_type]); + down_read(&rds_trans_sem); + trans = transports[t_type]; } + if (trans && trans->t_type == t_type && + (!trans->t_owner || try_module_get(trans->t_owner))) + ret = trans; + up_read(&rds_trans_sem); return ret; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index e7a872207b46..ce85656ac9c1 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -71,6 +71,7 @@ ax25_address rose_callsign; * separate class since they always nest. */ static struct lock_class_key rose_netdev_xmit_lock_key; +static struct lock_class_key rose_netdev_addr_lock_key; static void rose_set_lockdep_one(struct net_device *dev, struct netdev_queue *txq, @@ -81,6 +82,7 @@ static void rose_set_lockdep_one(struct net_device *dev, static void rose_set_lockdep_key(struct net_device *dev) { + lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9fe264bec70c..9a2139ebd67d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -810,100 +810,6 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) } /* - * Transition a call to the complete state. - */ -static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; - call->error = error; - call->completion = compl, - call->state = RXRPC_CALL_COMPLETE; - trace_rxrpc_call_complete(call); - wake_up(&call->waitq); - return true; - } - return false; -} - -static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Record that a call successfully completed. - */ -static inline bool __rxrpc_call_completed(struct rxrpc_call *call) -{ - return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); -} - -static inline bool rxrpc_call_completed(struct rxrpc_call *call) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_call_completed(call); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Record that a call is locally aborted. - */ -static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, - u32 abort_code, int error) -{ - trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, - abort_code, error); - return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error); -} - -static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) -{ - bool ret; - - write_lock_bh(&call->state_lock); - ret = __rxrpc_abort_call(why, call, seq, abort_code, error); - write_unlock_bh(&call->state_lock); - return ret; -} - -/* - * Abort a call due to a protocol error. - */ -static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, - struct sk_buff *skb, - const char *eproto_why, - const char *why, - u32 abort_code) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); - return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); -} - -#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ - __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ - (abort_why), (abort_code)) - -/* * conn_client.c */ extern unsigned int rxrpc_max_client_connections; @@ -1101,9 +1007,34 @@ extern const struct seq_operations rxrpc_peer_seq_ops; * recvmsg.c */ void rxrpc_notify_socket(struct rxrpc_call *); +bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool __rxrpc_call_completed(struct rxrpc_call *); +bool rxrpc_call_completed(struct rxrpc_call *); +bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); +bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* + * Abort a call due to a protocol error. + */ +static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + const char *eproto_why, + const char *why, + u32 abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); + return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); +} + +#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ + __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ + (abort_why), (abort_code)) + +/* * rtt.c */ void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index b7611cc159e5..032ed76c0166 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -22,6 +22,11 @@ #include <net/ip.h> #include "ar-internal.h" +static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, + unsigned long user_call_ID) +{ +} + /* * Preallocate a single service call, connection and peer and, if possible, * give them a user ID and attach the user's side of the ID to them. @@ -228,6 +233,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); + if (call->notify_rx) + call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2a65ac41055f..6be2672a65ea 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -248,7 +248,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) if (anno_type != RXRPC_TX_ANNO_RETRANS) continue; + /* We need to reset the retransmission state, but we need to do + * so before we drop the lock as a new ACK/NAK may come in and + * confuse things + */ + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + rxrpc_get_skb(skb, rxrpc_skb_got); spin_unlock_bh(&call->lock); @@ -262,24 +273,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_free_skb(skb, rxrpc_skb_freed); spin_lock_bh(&call->lock); - - /* We need to clear the retransmit state, but there are two - * things we need to be aware of: A new ACK/NAK might have been - * received and the packet might have been hard-ACK'd (in which - * case it will no longer be in the buffer). - */ - if (after(seq, call->tx_hard_ack)) { - annotation = call->rxtx_annotations[ix]; - anno_type = annotation & RXRPC_TX_ANNO_MASK; - if (anno_type == RXRPC_TX_ANNO_RETRANS || - anno_type == RXRPC_TX_ANNO_NAK) { - annotation &= ~RXRPC_TX_ANNO_MASK; - annotation |= RXRPC_TX_ANNO_UNACK; - } - annotation |= RXRPC_TX_ANNO_RESENT; - call->rxtx_annotations[ix] = annotation; - } - if (after(call->tx_hard_ack, seq)) seq = call->tx_hard_ack; } @@ -320,7 +313,6 @@ recheck_state: if (call->state == RXRPC_CALL_COMPLETE) { del_timer_sync(&call->timer); - rxrpc_notify_socket(call); goto out_put; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f07970207b54..38a46167523f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -288,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, */ ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) - goto error; + goto error_attached_to_socket; trace_rxrpc_call(call->debug_id, rxrpc_call_connected, atomic_read(&call->usage), here, NULL); @@ -308,18 +308,29 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); - ret = -EEXIST; - -error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); + RX_CALL_DEAD, -EEXIST); trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(ret)); + atomic_read(&call->usage), here, ERR_PTR(-EEXIST)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); - _leave(" = %d", ret); - return ERR_PTR(ret); + _leave(" = -EEXIST"); + return ERR_PTR(-EEXIST); + + /* We got an error, but the call is attached to the socket and is in + * need of release. However, we might now race with recvmsg() when + * completing the call queues it. Return 0 from sys_sendmsg() and + * leave the error to recvmsg() to deal with. + */ +error_attached_to_socket: + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + atomic_read(&call->usage), here, ERR_PTR(ret)); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + _leave(" = c=%08x [err]", call->debug_id); + return call; } /* diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 06fcff2ebbba..447f55ca6886 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -173,10 +173,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, else trace_rxrpc_rx_abort(call, serial, conn->abort_code); - if (rxrpc_set_call_completion(call, compl, - conn->abort_code, - conn->error)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, compl, + conn->abort_code, + conn->error); } } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 19e141eeed17..8cbe0bf20ed5 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -212,9 +212,11 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_cwnd = call->cong_cwnd; - spin_lock_bh(&conn->params.peer->lock); - hlist_del_rcu(&call->error_link); - spin_unlock_bh(&conn->params.peer->lock); + if (!hlist_unhashed(&call->error_link)) { + spin_lock_bh(&call->peer->lock); + hlist_del_rcu(&call->error_link); + spin_unlock_bh(&call->peer->lock); + } if (rxrpc_is_client_call(call)) return rxrpc_disconnect_client_call(call); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3be4177baf70..767579328a06 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -275,7 +275,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, case RXRPC_CALL_SERVER_AWAIT_ACK: __rxrpc_call_completed(call); - rxrpc_notify_socket(call); state = call->state; break; @@ -723,13 +722,12 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), rwind, ntohl(ackinfo->jumbo_max)); + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (call->tx_winsize != rwind) { - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (rwind > call->tx_winsize) wake = true; - trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, - ntohl(ackinfo->rwind), wake); + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake); call->tx_winsize = rwind; } @@ -1013,9 +1011,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, -ECONNABORTED); } /* @@ -1102,7 +1099,6 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, spin_lock(&rx->incoming_lock); __rxrpc_disconnect_call(conn, call); spin_unlock(&rx->incoming_lock); - rxrpc_notify_socket(call); } /* diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 112e490ebbcd..a852f46d5234 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -292,9 +292,7 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { rxrpc_see_call(call); - if (call->state < RXRPC_CALL_COMPLETE && - rxrpc_set_call_completion(call, compl, 0, -error)) - rxrpc_notify_socket(call); + rxrpc_set_call_completion(call, compl, 0, -error); } } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 8578c39ec839..efecc5a8f67d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -59,6 +59,85 @@ void rxrpc_notify_socket(struct rxrpc_call *call) } /* + * Transition a call to the complete state. + */ +bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl, + call->state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; + } + return false; +} + +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call successfully completed. + */ +bool __rxrpc_call_completed(struct rxrpc_call *call) +{ + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +bool rxrpc_call_completed(struct rxrpc_call *call) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call is locally aborted. + */ +bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, + abort_code, error); + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); +} + +bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* * Pass a call terminating message to userspace. */ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) @@ -464,7 +543,7 @@ try_again: list_empty(&rx->recvmsg_q) && rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); - return -ENODATA; + return -EAGAIN; } if (list_empty(&rx->recvmsg_q)) { @@ -541,7 +620,7 @@ try_again: goto error_unlock_call; } - if (msg->msg_name) { + if (msg->msg_name && call->peer) { struct sockaddr_rxrpc *srx = msg->msg_name; size_t len = sizeof(call->peer->srx); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 5e9c43d4a314..f3f6da6e4ad2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -261,10 +261,8 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, case -ENETUNREACH: case -EHOSTUNREACH: case -ECONNREFUSED: - rxrpc_set_call_completion(call, - RXRPC_CALL_LOCAL_ERROR, + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); - rxrpc_notify_socket(call); goto out; } _debug("need instant resend %d", ret); @@ -306,7 +304,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + if (sk->sk_shutdown & SEND_SHUTDOWN) return -EPIPE; more = msg->msg_flags & MSG_MORE; @@ -683,6 +681,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (IS_ERR(call)) return PTR_ERR(call); /* ... and we have the call lock. */ + ret = 0; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + goto out_put_unlock; } else { switch (READ_ONCE(call->state)) { case RXRPC_CALL_UNINITIALISED: diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2f20073f4f84..84badf00647e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -6,7 +6,7 @@ menuconfig NET_SCHED bool "QoS and/or fair queueing" select NET_SCH_FIFO - ---help--- + help When the kernel has several packets to send out over a network device, it has to decide which ones to send first, which ones to delay, and which ones to drop. This is the job of the queueing @@ -47,7 +47,7 @@ comment "Queueing/Scheduling" config NET_SCH_CBQ tristate "Class Based Queueing (CBQ)" - ---help--- + help Say Y here if you want to use the Class-Based Queueing (CBQ) packet scheduling algorithm. This algorithm classifies the waiting packets into a tree-like hierarchy of classes; the leaves of this tree are @@ -64,7 +64,7 @@ config NET_SCH_CBQ config NET_SCH_HTB tristate "Hierarchical Token Bucket (HTB)" - ---help--- + help Say Y here if you want to use the Hierarchical Token Buckets (HTB) packet scheduling algorithm. See <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and @@ -78,7 +78,7 @@ config NET_SCH_HTB config NET_SCH_HFSC tristate "Hierarchical Fair Service Curve (HFSC)" - ---help--- + help Say Y here if you want to use the Hierarchical Fair Service Curve (HFSC) packet scheduling algorithm. @@ -88,7 +88,7 @@ config NET_SCH_HFSC config NET_SCH_ATM tristate "ATM Virtual Circuits (ATM)" depends on ATM - ---help--- + help Say Y here if you want to use the ATM pseudo-scheduler. This provides a framework for invoking classifiers, which in turn select classes of this queuing discipline. Each class maps @@ -101,7 +101,7 @@ config NET_SCH_ATM config NET_SCH_PRIO tristate "Multi Band Priority Queueing (PRIO)" - ---help--- + help Say Y here if you want to use an n-band priority queue packet scheduler. @@ -110,7 +110,7 @@ config NET_SCH_PRIO config NET_SCH_MULTIQ tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)" - ---help--- + help Say Y here if you want to use an n-band queue packet scheduler to support devices that have multiple hardware transmit queues. @@ -119,7 +119,7 @@ config NET_SCH_MULTIQ config NET_SCH_RED tristate "Random Early Detection (RED)" - ---help--- + help Say Y here if you want to use the Random Early Detection (RED) packet scheduling algorithm. @@ -130,7 +130,7 @@ config NET_SCH_RED config NET_SCH_SFB tristate "Stochastic Fair Blue (SFB)" - ---help--- + help Say Y here if you want to use the Stochastic Fair Blue (SFB) packet scheduling algorithm. @@ -141,7 +141,7 @@ config NET_SCH_SFB config NET_SCH_SFQ tristate "Stochastic Fairness Queueing (SFQ)" - ---help--- + help Say Y here if you want to use the Stochastic Fairness Queueing (SFQ) packet scheduling algorithm. @@ -152,7 +152,7 @@ config NET_SCH_SFQ config NET_SCH_TEQL tristate "True Link Equalizer (TEQL)" - ---help--- + help Say Y here if you want to use the True Link Equalizer (TLE) packet scheduling algorithm. This queueing discipline allows the combination of several physical devices into one virtual device. @@ -164,7 +164,7 @@ config NET_SCH_TEQL config NET_SCH_TBF tristate "Token Bucket Filter (TBF)" - ---help--- + help Say Y here if you want to use the Token Bucket Filter (TBF) packet scheduling algorithm. @@ -175,7 +175,7 @@ config NET_SCH_TBF config NET_SCH_CBS tristate "Credit Based Shaper (CBS)" - ---help--- + help Say Y here if you want to use the Credit Based Shaper (CBS) packet scheduling algorithm. @@ -208,7 +208,7 @@ config NET_SCH_TAPRIO config NET_SCH_GRED tristate "Generic Random Early Detection (GRED)" - ---help--- + help Say Y here if you want to use the Generic Random Early Detection (GRED) packet scheduling algorithm for some of your network devices (see the top of <file:net/sched/sch_red.c> for details and @@ -219,7 +219,7 @@ config NET_SCH_GRED config NET_SCH_DSMARK tristate "Differentiated Services marker (DSMARK)" - ---help--- + help Say Y if you want to schedule packets according to the Differentiated Services architecture proposed in RFC 2475. Technical information on this method, with pointers to associated @@ -230,7 +230,7 @@ config NET_SCH_DSMARK config NET_SCH_NETEM tristate "Network emulator (NETEM)" - ---help--- + help Say Y if you want to emulate network delay, loss, and packet re-ordering. This is often useful to simulate networks when testing applications or protocols. @@ -384,7 +384,7 @@ config NET_SCH_INGRESS depends on NET_CLS_ACT select NET_INGRESS select NET_EGRESS - ---help--- + help Say Y here if you want to use classifiers for incoming and/or outgoing packets. This qdisc doesn't do anything else besides running classifiers, which can also have actions attached to them. In case of outgoing packets, @@ -398,7 +398,7 @@ config NET_SCH_INGRESS config NET_SCH_PLUG tristate "Plug network traffic until release (PLUG)" - ---help--- + help This queuing discipline allows userspace to plug/unplug a network output queue, using the netlink interface. When it receives an @@ -441,7 +441,7 @@ config NET_SCH_ETS menuconfig NET_SCH_DEFAULT bool "Allow override default queue discipline" - ---help--- + help Support for selection of default queuing discipline. Nearly all users can safely say no here, and the default @@ -492,7 +492,7 @@ config NET_CLS config NET_CLS_BASIC tristate "Elementary classification (BASIC)" select NET_CLS - ---help--- + help Say Y here if you want to be able to classify packets using only extended matches and actions. @@ -502,7 +502,7 @@ config NET_CLS_BASIC config NET_CLS_TCINDEX tristate "Traffic-Control Index (TCINDEX)" select NET_CLS - ---help--- + help Say Y here if you want to be able to classify packets based on traffic control indices. You will want this feature if you want to implement Differentiated Services together with DSMARK. @@ -515,7 +515,7 @@ config NET_CLS_ROUTE4 depends on INET select IP_ROUTE_CLASSID select NET_CLS - ---help--- + help If you say Y here, you will be able to classify packets according to the route table entry they matched. @@ -525,7 +525,7 @@ config NET_CLS_ROUTE4 config NET_CLS_FW tristate "Netfilter mark (FW)" select NET_CLS - ---help--- + help If you say Y here, you will be able to classify packets according to netfilter/firewall marks. @@ -535,7 +535,7 @@ config NET_CLS_FW config NET_CLS_U32 tristate "Universal 32bit comparisons w/ hashing (U32)" select NET_CLS - ---help--- + help Say Y here to be able to classify packets using a universal 32bit pieces based comparison scheme. @@ -545,20 +545,20 @@ config NET_CLS_U32 config CLS_U32_PERF bool "Performance counters support" depends on NET_CLS_U32 - ---help--- + help Say Y here to make u32 gather additional statistics useful for fine tuning u32 classifiers. config CLS_U32_MARK bool "Netfilter marks support" depends on NET_CLS_U32 - ---help--- + help Say Y here to be able to use netfilter marks as u32 key. config NET_CLS_RSVP tristate "IPv4 Resource Reservation Protocol (RSVP)" select NET_CLS - ---help--- + help The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this is important for real time data such as streaming sound or video. @@ -572,7 +572,7 @@ config NET_CLS_RSVP config NET_CLS_RSVP6 tristate "IPv6 Resource Reservation Protocol (RSVP6)" select NET_CLS - ---help--- + help The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this is important for real time data such as streaming sound or video. @@ -586,7 +586,7 @@ config NET_CLS_RSVP6 config NET_CLS_FLOW tristate "Flow classifier" select NET_CLS - ---help--- + help If you say Y here, you will be able to classify packets based on a configurable combination of packet keys. This is mostly useful in combination with SFQ. @@ -599,7 +599,7 @@ config NET_CLS_CGROUP select NET_CLS select CGROUP_NET_CLASSID depends on CGROUPS - ---help--- + help Say Y here if you want to classify packets based on the control cgroup of their process. @@ -609,7 +609,7 @@ config NET_CLS_CGROUP config NET_CLS_BPF tristate "BPF-based classifier" select NET_CLS - ---help--- + help If you say Y here, you will be able to classify packets based on programmable BPF (JIT'ed) filters as an alternative to ematches. @@ -619,7 +619,7 @@ config NET_CLS_BPF config NET_CLS_FLOWER tristate "Flower classifier" select NET_CLS - ---help--- + help If you say Y here, you will be able to classify packets based on a configurable combination of packet keys and masks. @@ -629,7 +629,7 @@ config NET_CLS_FLOWER config NET_CLS_MATCHALL tristate "Match-all classifier" select NET_CLS - ---help--- + help If you say Y here, you will be able to classify packets based on nothing. Every packet will match. @@ -639,7 +639,7 @@ config NET_CLS_MATCHALL config NET_EMATCH bool "Extended Matches" select NET_CLS - ---help--- + help Say Y here if you want to use extended matches on top of classifiers and select the extended matches below. @@ -653,7 +653,7 @@ config NET_EMATCH_STACK int "Stack size" depends on NET_EMATCH default "32" - ---help--- + help Size of the local stack variable used while evaluating the tree of ematches. Limits the depth of the tree, i.e. the number of encapsulated precedences. Every level requires 4 bytes of additional @@ -662,7 +662,7 @@ config NET_EMATCH_STACK config NET_EMATCH_CMP tristate "Simple packet data comparison" depends on NET_EMATCH - ---help--- + help Say Y here if you want to be able to classify packets based on simple packet data comparisons for 8, 16, and 32bit values. @@ -672,7 +672,7 @@ config NET_EMATCH_CMP config NET_EMATCH_NBYTE tristate "Multi byte comparison" depends on NET_EMATCH - ---help--- + help Say Y here if you want to be able to classify packets based on multiple byte comparisons mainly useful for IPv6 address comparisons. @@ -682,7 +682,7 @@ config NET_EMATCH_NBYTE config NET_EMATCH_U32 tristate "U32 key" depends on NET_EMATCH - ---help--- + help Say Y here if you want to be able to classify packets using the famous u32 key in combination with logic relations. @@ -692,7 +692,7 @@ config NET_EMATCH_U32 config NET_EMATCH_META tristate "Metadata" depends on NET_EMATCH - ---help--- + help Say Y here if you want to be able to classify packets based on metadata such as load average, netfilter attributes, socket attributes and routing decisions. @@ -707,7 +707,7 @@ config NET_EMATCH_TEXT select TEXTSEARCH_KMP select TEXTSEARCH_BM select TEXTSEARCH_FSM - ---help--- + help Say Y here if you want to be able to classify packets based on textsearch comparisons. @@ -717,7 +717,7 @@ config NET_EMATCH_TEXT config NET_EMATCH_CANID tristate "CAN Identifier" depends on NET_EMATCH && (CAN=y || CAN=m) - ---help--- + help Say Y here if you want to be able to classify CAN frames based on CAN Identifier. @@ -727,7 +727,7 @@ config NET_EMATCH_CANID config NET_EMATCH_IPSET tristate "IPset" depends on NET_EMATCH && IP_SET - ---help--- + help Say Y here if you want to be able to classify packets based on ipset membership. @@ -737,7 +737,7 @@ config NET_EMATCH_IPSET config NET_EMATCH_IPT tristate "IPtables Matches" depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES - ---help--- + help Say Y here to be able to classify packets based on iptables matches. Current supported match is "policy" which allows packet classification @@ -749,7 +749,7 @@ config NET_EMATCH_IPT config NET_CLS_ACT bool "Actions" select NET_CLS - ---help--- + help Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful classification. They are used to overwrite the classification @@ -761,7 +761,7 @@ config NET_CLS_ACT config NET_ACT_POLICE tristate "Traffic Policing" depends on NET_CLS_ACT - ---help--- + help Say Y here if you want to do traffic policing, i.e. strict bandwidth limiting. This action replaces the existing policing module. @@ -772,7 +772,7 @@ config NET_ACT_POLICE config NET_ACT_GACT tristate "Generic actions" depends on NET_CLS_ACT - ---help--- + help Say Y here to take generic actions such as dropping and accepting packets. @@ -782,13 +782,13 @@ config NET_ACT_GACT config GACT_PROB bool "Probability support" depends on NET_ACT_GACT - ---help--- + help Say Y here to use the generic action randomly or deterministically. config NET_ACT_MIRRED tristate "Redirecting and Mirroring" depends on NET_CLS_ACT - ---help--- + help Say Y here to allow packets to be mirrored or redirected to other devices. @@ -799,7 +799,7 @@ config NET_ACT_SAMPLE tristate "Traffic Sampling" depends on NET_CLS_ACT select PSAMPLE - ---help--- + help Say Y here to allow packet sampling tc action. The packet sample action consists of statistically choosing packets and sampling them using the psample module. @@ -810,7 +810,7 @@ config NET_ACT_SAMPLE config NET_ACT_IPT tristate "IPtables targets" depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES - ---help--- + help Say Y here to be able to invoke iptables targets after successful classification. @@ -820,7 +820,7 @@ config NET_ACT_IPT config NET_ACT_NAT tristate "Stateless NAT" depends on NET_CLS_ACT - ---help--- + help Say Y here to do stateless NAT on IPv4 packets. You should use netfilter for NAT unless you know what you are doing. @@ -830,7 +830,7 @@ config NET_ACT_NAT config NET_ACT_PEDIT tristate "Packet Editing" depends on NET_CLS_ACT - ---help--- + help Say Y here if you want to mangle the content of packets. To compile this code as a module, choose M here: the @@ -839,7 +839,7 @@ config NET_ACT_PEDIT config NET_ACT_SIMP tristate "Simple Example (Debug)" depends on NET_CLS_ACT - ---help--- + help Say Y here to add a simple action for demonstration purposes. It is meant as an example and for debugging purposes. It will print a configured policy string followed by the packet count @@ -853,7 +853,7 @@ config NET_ACT_SIMP config NET_ACT_SKBEDIT tristate "SKB Editing" depends on NET_CLS_ACT - ---help--- + help Say Y here to change skb priority or queue_mapping settings. If unsure, say N. @@ -865,7 +865,7 @@ config NET_ACT_CSUM tristate "Checksum Updating" depends on NET_CLS_ACT && INET select LIBCRC32C - ---help--- + help Say Y here to update some common checksum after some direct packet alterations. @@ -886,7 +886,7 @@ config NET_ACT_MPLS config NET_ACT_VLAN tristate "Vlan manipulation" depends on NET_CLS_ACT - ---help--- + help Say Y here to push or pop vlan headers. If unsure, say N. @@ -897,7 +897,7 @@ config NET_ACT_VLAN config NET_ACT_BPF tristate "BPF based action" depends on NET_CLS_ACT - ---help--- + help Say Y here to execute BPF code on packets. The BPF code will decide if the packet should be dropped or not. @@ -910,7 +910,7 @@ config NET_ACT_CONNMARK tristate "Netfilter Connection Mark Retriever" depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES depends on NF_CONNTRACK && NF_CONNTRACK_MARK - ---help--- + help Say Y here to allow retrieving of conn mark If unsure, say N. @@ -938,7 +938,7 @@ config NET_ACT_CTINFO config NET_ACT_SKBMOD tristate "skb data modification action" depends on NET_CLS_ACT - ---help--- + help Say Y here to allow modification of skb data If unsure, say N. @@ -950,7 +950,7 @@ config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT select NET_IFE - ---help--- + help Say Y here to allow for sourcing and terminating metadata For details refer to netdev01 paper: "Distributing Linux Traffic Control Classifier-Action Subsystem" @@ -962,7 +962,7 @@ config NET_ACT_IFE config NET_ACT_TUNNEL_KEY tristate "IP tunnel metadata manipulation" depends on NET_CLS_ACT - ---help--- + help Say Y here to set/release ip tunnel metadata. If unsure, say N. diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 43a243081e7d..f901421b0634 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -43,17 +43,20 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, tcf_lastuse_update(&ca->tcf_tm); bstats_update(&ca->tcf_bstats, skb); - if (skb->protocol == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): if (skb->len < sizeof(struct iphdr)) goto out; proto = NFPROTO_IPV4; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): if (skb->len < sizeof(struct ipv6hdr)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index cb8608f0a77a..c60674cf25c4 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -587,7 +587,7 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, goto drop; update_flags = params->update_flags; - protocol = tc_skb_protocol(skb); + protocol = skb_protocol(skb, false); again: switch (protocol) { case cpu_to_be16(ETH_P_IP): diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e29f0f45d688..6ed1652d1e26 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -624,7 +624,7 @@ static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) { u8 family = NFPROTO_UNSPEC; - switch (skb->protocol) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): family = NFPROTO_IPV4; break; @@ -673,9 +673,10 @@ static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) } static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, - u8 family, u16 zone) + u8 family, u16 zone, bool *defrag) { enum ip_conntrack_info ctinfo; + struct qdisc_skb_cb cb; struct nf_conn *ct; int err = 0; bool frag; @@ -693,6 +694,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); + cb = *qdisc_skb_cb(skb); if (family == NFPROTO_IPV4) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; @@ -703,6 +705,9 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, local_bh_enable(); if (err && err != -EINPROGRESS) goto out_free; + + if (!err) + *defrag = true; } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; @@ -711,12 +716,16 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, err = nf_ct_frag6_gather(net, skb, user); if (err && err != -EINPROGRESS) goto out_free; + + if (!err) + *defrag = true; #else err = -EOPNOTSUPP; goto out_free; #endif } + *qdisc_skb_cb(skb) = cb; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -748,6 +757,7 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype) { + __be16 proto = skb_protocol(skb, true); int hooknum, err = NF_ACCEPT; /* See HOOK2MANIP(). */ @@ -759,14 +769,13 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (skb->protocol == htons(ETH_P_IP) && + if (proto == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto out; - } else if (IS_ENABLED(CONFIG_IPV6) && - skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; int hdrlen = ipv6_skip_exthdr(skb, @@ -914,6 +923,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, int nh_ofs, err, retval; struct tcf_ct_params *p; bool skip_add = false; + bool defrag = false; struct nf_conn *ct; u8 family; @@ -925,6 +935,8 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, force = p->ct_action & TCA_CT_ACT_FORCE; tmpl = p->tmpl; + tcf_lastuse_update(&c->tcf_tm); + if (clear) { ct = nf_ct_get(skb, &ctinfo); if (ct) { @@ -944,7 +956,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); - err = tcf_ct_handle_fragments(net, skb, family, p->zone); + err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); if (err == -EINPROGRESS) { retval = TC_ACT_STOLEN; goto out; @@ -1012,6 +1024,8 @@ out_push: out: tcf_action_update_bstats(&c->common, skb); + if (defrag) + qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; drop: @@ -1529,10 +1543,10 @@ static int __init ct_init_module(void) return 0; -err_tbl_init: - destroy_workqueue(act_ct_wq); err_register: tcf_ct_flow_tables_uninit(); +err_tbl_init: + destroy_workqueue(act_ct_wq); return err; } @@ -1543,17 +1557,6 @@ static void __exit ct_cleanup_module(void) destroy_workqueue(act_ct_wq); } -void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) -{ - enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK; - struct nf_conn *ct; - - ct = (struct nf_conn *)(cookie & NFCT_PTRMASK); - nf_conntrack_get(&ct->ct_general); - nf_ct_set(skb, ct, ctinfo); -} -EXPORT_SYMBOL_GPL(tcf_ct_flow_table_restore_skb); - module_init(ct_init_module); module_exit(ct_cleanup_module); MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); @@ -1561,4 +1564,3 @@ MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>"); MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>"); MODULE_DESCRIPTION("Connection tracking action"); MODULE_LICENSE("GPL v2"); - diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 19649623493b..b5042f3ea079 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -96,19 +96,22 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, action = READ_ONCE(ca->tcf_action); wlen = skb_network_offset(skb); - if (tc_skb_protocol(skb) == htons(ETH_P_IP)) { + switch (skb_protocol(skb, true)) { + case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV4; - } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): wlen += sizeof(struct ipv6hdr); if (!pskb_may_pull(skb, wlen)) goto out; proto = NFPROTO_IPV6; - } else { + break; + default: goto out; } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 9c628591f452..323ae7f6315d 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -32,7 +32,7 @@ static ktime_t gate_get_time(struct tcf_gate *gact) return KTIME_MAX; } -static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start) +static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start) { struct tcf_gate_params *param = &gact->param; ktime_t now, base, cycle; @@ -43,18 +43,13 @@ static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start) if (ktime_after(base, now)) { *start = base; - return 0; + return; } cycle = param->tcfg_cycletime; - /* cycle time should not be zero */ - if (!cycle) - return -EFAULT; - n = div64_u64(ktime_sub_ns(now, base), cycle); *start = ktime_add_ns(base, (n + 1) * cycle); - return 0; } static void gate_start_timer(struct tcf_gate *gact, ktime_t start) @@ -277,6 +272,27 @@ release_list: return err; } +static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, + enum tk_offsets tko, s32 clockid, + bool do_init) +{ + if (!do_init) { + if (basetime == gact->param.tcfg_basetime && + tko == gact->tk_offset && + clockid == gact->param.tcfg_clockid) + return; + + spin_unlock_bh(&gact->tcf_lock); + hrtimer_cancel(&gact->hitimer); + spin_lock_bh(&gact->tcf_lock); + } + gact->param.tcfg_basetime = basetime; + gact->param.tcfg_clockid = clockid; + gact->tk_offset = tko; + hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); + gact->hitimer.function = gate_timer_func; +} + static int tcf_gate_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, @@ -287,12 +303,12 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, enum tk_offsets tk_offset = TK_OFFS_TAI; struct nlattr *tb[TCA_GATE_MAX + 1]; struct tcf_chain *goto_ch = NULL; + u64 cycletime = 0, basetime = 0; struct tcf_gate_params *p; s32 clockid = CLOCK_TAI; struct tcf_gate *gact; struct tc_gate *parm; int ret = 0, err; - u64 basetime = 0; u32 gflags = 0; s32 prio = -1; ktime_t start; @@ -308,6 +324,27 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (!tb[TCA_GATE_PARMS]) return -EINVAL; + if (tb[TCA_GATE_CLOCKID]) { + clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); + switch (clockid) { + case CLOCK_REALTIME: + tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + return -EINVAL; + } + } + parm = nla_data(tb[TCA_GATE_PARMS]); index = parm->index; @@ -331,10 +368,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } - if (ret == ACT_P_CREATED) { - to_gate(*a)->param.tcfg_clockid = -1; - INIT_LIST_HEAD(&(to_gate(*a)->param.entries)); - } if (tb[TCA_GATE_PRIORITY]) prio = nla_get_s32(tb[TCA_GATE_PRIORITY]); @@ -345,41 +378,19 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (tb[TCA_GATE_FLAGS]) gflags = nla_get_u32(tb[TCA_GATE_FLAGS]); - if (tb[TCA_GATE_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]); - switch (clockid) { - case CLOCK_REALTIME: - tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - goto release_idr; - } - } + gact = to_gate(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&gact->param.entries); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; - gact = to_gate(*a); - spin_lock_bh(&gact->tcf_lock); p = &gact->param; - if (tb[TCA_GATE_CYCLE_TIME]) { - p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); - if (!p->tcfg_cycletime_ext) - goto chain_put; - } + if (tb[TCA_GATE_CYCLE_TIME]) + cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]); if (tb[TCA_GATE_ENTRY_LIST]) { err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack); @@ -387,35 +398,29 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, goto chain_put; } - if (!p->tcfg_cycletime) { + if (!cycletime) { struct tcfg_gate_entry *entry; ktime_t cycle = 0; list_for_each_entry(entry, &p->entries, list) cycle = ktime_add_ns(cycle, entry->interval); - p->tcfg_cycletime = cycle; + cycletime = cycle; + if (!cycletime) { + err = -EINVAL; + goto chain_put; + } } + p->tcfg_cycletime = cycletime; if (tb[TCA_GATE_CYCLE_TIME_EXT]) p->tcfg_cycletime_ext = nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]); + gate_setup_timer(gact, basetime, tk_offset, clockid, + ret == ACT_P_CREATED); p->tcfg_priority = prio; - p->tcfg_basetime = basetime; - p->tcfg_clockid = clockid; p->tcfg_flags = gflags; - - gact->tk_offset = tk_offset; - hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT); - gact->hitimer.function = gate_timer_func; - - err = gate_get_start_time(gact, &start); - if (err < 0) { - NL_SET_ERR_MSG(extack, - "Internal error: failed get start time"); - release_entry_list(&p->entries); - goto chain_put; - } + gate_get_start_time(gact, &start); gact->current_close_time = start; gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING; @@ -443,6 +448,13 @@ chain_put: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: + /* action is not inserted in any list: it's safe to init hitimer + * without taking tcf_lock. + */ + if (ret == ACT_P_CREATED) + gate_setup_timer(gact, gact->param.tcfg_basetime, + gact->tk_offset, gact->param.tcfg_clockid, + true); tcf_idr_release(*a, bind); return err; } @@ -453,9 +465,7 @@ static void tcf_gate_cleanup(struct tc_action *a) struct tcf_gate_params *p; p = &gact->param; - if (p->tcfg_clockid != -1) - hrtimer_cancel(&gact->hitimer); - + hrtimer_cancel(&gact->hitimer); release_entry_list(&p->entries); } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index be3f215cd027..8118e2640979 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -82,7 +82,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_MPLS_ACT_PUSH: - new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol)); + new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb_protocol(skb, true))); if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len, skb->dev && skb->dev->type == ARPHRD_ETHER)) goto drop; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index b125b2be4467..b2b3faa57294 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -41,7 +41,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, if (params->flags & SKBEDIT_F_INHERITDSFIELD) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen)) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a00a203b2ef5..4619cb3cb0a8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -20,7 +20,6 @@ #include <linux/kmod.h> #include <linux/slab.h> #include <linux/idr.h> -#include <linux/rhashtable.h> #include <linux/jhash.h> #include <linux/rculist.h> #include <net/net_namespace.h> @@ -652,6 +651,7 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) &block->flow_block, tcf_block_shared(block), &extack); down_write(&block->cb_lock); + list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); up_write(&block->cb_lock); rtnl_lock(); @@ -671,25 +671,29 @@ static int tcf_block_offload_cmd(struct tcf_block *block, struct netlink_ext_ack *extack) { struct flow_block_offload bo = {}; - int err; tcf_block_offload_init(&bo, dev, command, ei->binder_type, &block->flow_block, tcf_block_shared(block), extack); - if (dev->netdev_ops->ndo_setup_tc) + if (dev->netdev_ops->ndo_setup_tc) { + int err; + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); - else - err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, - &bo, tc_block_indr_cleanup); + if (err < 0) { + if (err != -EOPNOTSUPP) + NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); + return err; + } - if (err < 0) { - if (err != -EOPNOTSUPP) - NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed"); - return err; + return tcf_block_setup(block, &bo); } - return tcf_block_setup(block, &bo); + flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, &bo, + tc_block_indr_cleanup); + tcf_block_setup(block, &bo); + + return -EOPNOTSUPP; } static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, @@ -1533,7 +1537,7 @@ static inline int __tcf_classify(struct sk_buff *skb, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { - __be16 protocol = tc_skb_protocol(skb); + __be16 protocol = skb_protocol(skb, false); int err; if (tp->protocol != protocol && diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 80ae7b9fa90a..87398af2715a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -80,7 +80,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) if (dst) return ntohl(dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_proto(const struct sk_buff *skb, @@ -104,7 +104,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, if (flow->ports.ports) return ntohs(flow->ports.dst); - return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); + return addr_fold(skb_dst(skb)) ^ (__force u16)skb_protocol(skb, true); } static u32 flow_get_iif(const struct sk_buff *skb) @@ -151,7 +151,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); case htons(ETH_P_IPV6): @@ -164,7 +164,7 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) { - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); case htons(ETH_P_IPV6): @@ -225,7 +225,7 @@ static u32 flow_get_skgid(const struct sk_buff *skb) static u32 flow_get_vlan_tag(const struct sk_buff *skb) { - u16 uninitialized_var(tag); + u16 tag; if (vlan_get_tag(skb, &tag) < 0) return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b2da37286082..e30bd969fc48 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -313,7 +313,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, /* skb_flow_dissect() does not set n_proto in case an unknown * protocol, so do it rather here. */ - skb_key.basic.n_proto = skb->protocol; + skb_key.basic.n_proto = skb_protocol(skb, false); skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index df00566d327d..c95cf86fb431 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, }; int ret, network_offset; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): state.pf = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index 18755d29fd15..3650117da47f 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -212,7 +212,7 @@ static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em, struct nf_hook_state state; int ret; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index d99966a55c84..46254968d390 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -195,7 +195,7 @@ META_COLLECTOR(int_priority) META_COLLECTOR(int_protocol) { /* Let userspace take care of the byte ordering */ - dst->value = tc_skb_protocol(skb); + dst->value = skb_protocol(skb, false); } META_COLLECTOR(int_pkttype) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ee12ca9f55b4..1c281cc81f57 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -553,16 +553,16 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); + p->link.vcc = NULL; + p->link.sock = NULL; + p->link.common.classid = sch->handle; + p->link.ref = 1; err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, extack); if (err) return err; - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 60f8ae578819..c7b587897585 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -592,7 +592,7 @@ static bool cake_update_flowkeys(struct flow_keys *keys, bool rev = !skb->_nfct, upd = false; __be32 ip; - if (tc_skb_protocol(skb) != htons(ETH_P_IP)) + if (skb_protocol(skb, true) != htons(ETH_P_IP)) return false; if (!nf_ct_get_tuple_skb(&tuple, skb)) @@ -1551,32 +1551,51 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) return idx + (tin << 16); } -static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) +static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) { - int wlen = skb_network_offset(skb); + const int offset = skb_network_offset(skb); + u16 *buf, buf_; u8 dscp; - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); + if (unlikely(!buf)) return 0; - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; - if (wash && dscp) + /* ToS is in the second byte of iphdr */ + dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2; + + if (wash && dscp) { + const int wlen = offset + sizeof(struct iphdr); + + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); + } + return dscp; case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); + if (unlikely(!buf)) return 0; - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; - if (wash && dscp) + /* Traffic class is in the first and second bytes of ipv6hdr */ + dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2; + + if (wash && dscp) { + const int wlen = offset + sizeof(struct ipv6hdr); + + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) + return 0; + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); + } + return dscp; case htons(ETH_P_ARP): @@ -1593,14 +1612,17 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, { struct cake_sched_data *q = qdisc_priv(sch); u32 tin, mark; + bool wash; u8 dscp; /* Tin selection: Default to diffserv-based selection, allow overriding - * using firewall marks or skb->priority. + * using firewall marks or skb->priority. Call DSCP parsing early if + * wash is enabled, otherwise defer to below to skip unneeded parsing. */ - dscp = cake_handle_diffserv(skb, - q->rate_flags & CAKE_FLAG_WASH); mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; + wash = !!(q->rate_flags & CAKE_FLAG_WASH); + if (wash) + dscp = cake_handle_diffserv(skb, wash); if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) tin = 0; @@ -1614,6 +1636,8 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; else { + if (!wash) + dscp = cake_handle_diffserv(skb, wash); tin = q->tin_index[dscp]; if (unlikely(tin >= q->tin_cnt)) @@ -1668,7 +1692,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct cake_sched_data *q = qdisc_priv(sch); int len = qdisc_pkt_len(skb); - int uninitialized_var(ret); + int ret; struct sk_buff *ack = NULL; ktime_t now = ktime_get(); struct cake_tin_data *b; @@ -2691,7 +2715,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, qdisc_watchdog_init(&q->watchdog, sch); if (opt) { - int err = cake_change(sch, opt, extack); + err = cake_change(sch, opt, extack); if (err) return err; @@ -3008,7 +3032,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, PUT_STAT_S32(BLUE_TIMER_US, ktime_to_us( ktime_sub(now, - flow->cvars.blue_timer))); + flow->cvars.blue_timer))); } if (flow->cvars.dropping) { PUT_STAT_S32(DROP_NEXT_US, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 39b427dc7512..ce4519358106 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -360,7 +360,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbq_sched_data *q = qdisc_priv(sch); - int uninitialized_var(ret); + int ret; struct cbq_class *cl = cbq_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 05605b30bef3..2b88710994d7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (p->set_tc_index) { int wlen = skb_network_offset(skb); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): wlen += sizeof(struct iphdr); if (!pskb_may_pull(skb, wlen) || @@ -303,7 +303,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) index = skb->tc_index & (p->indices - 1); pr_debug("index %d->%d\n", skb->tc_index, index); - switch (tc_skb_protocol(skb)) { + switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, p->mv[index].value); @@ -320,7 +320,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) */ if (p->mv[index].mask != 0xff || p->mv[index].value) pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(tc_skb_protocol(skb))); + __func__, ntohs(skb_protocol(skb, true))); break; } diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 8f06a808c59a..2fb76fc0cc31 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1075,3 +1075,4 @@ module_init(fq_module_init) module_exit(fq_module_exit) MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Fair Queue Packet Scheduler"); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 436160be9c18..985d5208f563 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -187,7 +187,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct fq_codel_sched_data *q = qdisc_priv(sch); unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; - int uninitialized_var(ret); + int ret; unsigned int pkt_len; bool memory_limited; @@ -721,3 +721,4 @@ module_init(fq_codel_module_init) module_exit(fq_codel_module_exit) MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Fair Queue CoDel discipline"); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index fb760cee824e..4d307f17e084 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -130,7 +130,7 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct fq_pie_sched_data *q = qdisc_priv(sch); struct fq_pie_flow *sel_flow; - int uninitialized_var(ret); + int ret; u8 memory_limited = false; u8 enqueue = false; u32 pkt_len; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b19a0021a0bd..265a61d011df 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -464,6 +464,7 @@ void __netdev_watchdog_up(struct net_device *dev) dev_hold(dev); } } +EXPORT_SYMBOL_GPL(__netdev_watchdog_up); static void dev_watchdog_up(struct net_device *dev) { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 433f2190960f..92ad4115e473 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1533,7 +1533,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; - int uninitialized_var(err); + int err; bool first; cl = hfsc_classify(skb, sch, &err); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index be35f03b657b..420ede875322 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -721,3 +721,4 @@ module_exit(hhf_module_exit) MODULE_AUTHOR("Terry Lam"); MODULE_AUTHOR("Nandita Dukkipati"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)"); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 8184c87da8be..6feab225b4ba 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -579,7 +579,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - int uninitialized_var(ret); + int ret; unsigned int len = qdisc_pkt_len(skb); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 5a6def5e4e6d..15f400dcb400 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -349,7 +349,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) unsigned int hash, dropped; sfq_index x, qlen; struct sfq_slot *slot; - int uninitialized_var(ret); + int ret; struct sk_buff *head; int delta; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 689ef6f3ded8..2f1f0a378408 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -239,7 +239,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, char haddr[MAX_ADDR_LEN]; neigh_ha_snapshot(haddr, n, dev); - err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)), + err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), haddr, NULL, skb->len); if (err < 0) diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 68934438ee19..39d7fa9569f8 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -11,7 +11,7 @@ menuconfig IP_SCTP select CRYPTO_HMAC select CRYPTO_SHA1 select LIBCRC32C - ---help--- + help Stream Control Transmission Protocol From RFC 2960 <http://www.ietf.org/rfc/rfc2960.txt>. diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 72315137d7e7..8d735461fa19 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1565,12 +1565,15 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, enum sctp_scope scope, gfp_t gfp) { + struct sock *sk = asoc->base.sk; int flags; /* Use scoping rules to determine the subset of addresses from * the endpoint. */ - flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; + if (!inet_v6_ipv6only(sk)) + flags |= SCTP_ADDR4_ALLOWED; if (asoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; if (asoc->peer.ipv6_address) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 53bc61537f44..701c5a4e441d 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, * well as the remote peer. */ if ((((AF_INET == addr->sa.sa_family) && + (flags & SCTP_ADDR4_ALLOWED) && (flags & SCTP_ADDR4_PEERSUPP))) || (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 092d1afdee0d..cde29f3c7fb3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, * sock as well as the remote peer. */ if (addr->a.sa.sa_family == AF_INET && - !(copy_flags & SCTP_ADDR4_PEERSUPP)) + (!(copy_flags & SCTP_ADDR4_ALLOWED) || + !(copy_flags & SCTP_ADDR4_PEERSUPP))) continue; if (addr->a.sa.sa_family == AF_INET6 && (!(copy_flags & SCTP_ADDR6_ALLOWED) || diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 67f7e71f9129..bda2536dd740 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -22,17 +22,11 @@ #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> -/* Migrates chunks from stream queues to new stream queues if needed, - * but not across associations. Also, removes those chunks to streams - * higher than the new max. - */ -static void sctp_stream_outq_migrate(struct sctp_stream *stream, - struct sctp_stream *new, __u16 outcnt) +static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) { struct sctp_association *asoc; struct sctp_chunk *ch, *temp; struct sctp_outq *outq; - int i; asoc = container_of(stream, struct sctp_association, stream); outq = &asoc->outqueue; @@ -56,6 +50,19 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, sctp_chunk_free(ch); } +} + +/* Migrates chunks from stream queues to new stream queues if needed, + * but not across associations. Also, removes those chunks to streams + * higher than the new max. + */ +static void sctp_stream_outq_migrate(struct sctp_stream *stream, + struct sctp_stream *new, __u16 outcnt) +{ + int i; + + if (stream->outcnt > outcnt) + sctp_stream_shrink_out(stream, outcnt); if (new) { /* Here we actually move the old ext stuff into the new @@ -1037,11 +1044,13 @@ struct sctp_chunk *sctp_process_strreset_resp( nums = ntohs(addstrm->number_of_streams); number = stream->outcnt - nums; - if (result == SCTP_STRRESET_PERFORMED) + if (result == SCTP_STRRESET_PERFORMED) { for (i = number; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; - else + } else { + sctp_stream_shrink_out(stream, number); stream->outcnt = number; + } *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, 0, nums, GFP_ATOMIC); diff --git a/net/smc/Kconfig b/net/smc/Kconfig index f54a70b8da82..1ab3c5a2c5ad 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -2,7 +2,7 @@ config SMC tristate "SMC socket protocol family" depends on INET && INFINIBAND - ---help--- + help SMC-R provides a "sockets over RDMA" solution making use of RDMA over Converged Ethernet (RoCE) technology to upgrade AF_INET TCP connections transparently. @@ -14,7 +14,7 @@ config SMC config SMC_DIAG tristate "SMC: socket monitoring interface" depends on SMC - ---help--- + help Support for SMC socket monitoring interface used by tools such as smcss. diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 903321543838..1163d51196da 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -126,8 +126,10 @@ EXPORT_SYMBOL_GPL(smc_proto6); static void smc_restore_fallback_changes(struct smc_sock *smc) { - smc->clcsock->file->private_data = smc->sk.sk_socket; - smc->clcsock->file = NULL; + if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ + smc->clcsock->file->private_data = smc->sk.sk_socket; + smc->clcsock->file = NULL; + } } static int __smc_release(struct smc_sock *smc) @@ -352,7 +354,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, */ mutex_lock(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + if (!smc_link_active(&lgr->lnk[i])) continue; rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); if (rc) @@ -632,7 +634,9 @@ static int smc_connect_rdma(struct smc_sock *smc, for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *l = &smc->conn.lgr->lnk[i]; - if (l->peer_qpn == ntoh24(aclc->qpn)) { + if (l->peer_qpn == ntoh24(aclc->qpn) && + !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) && + !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) { link = l; break; } diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a47e8855e045..ce468ff62a19 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -66,9 +66,13 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); - if (conn->killed) + if (conn->killed) { /* abnormal termination */ + if (!rc) + smc_wr_tx_put_slot(link, + (struct smc_wr_tx_pend_priv *)pend); rc = -EPIPE; + } return rc; } diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index d5627df24215..779f4142a11d 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -27,6 +27,7 @@ #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 +#define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; @@ -36,7 +37,7 @@ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ -static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) +static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { struct smc_clc_msg_proposal_prefix *pclc_prfx; struct smc_clc_msg_accept_confirm *clc; @@ -49,12 +50,9 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return false; switch (clcm->type) { case SMC_CLC_PROPOSAL: - if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && - clcm->path != SMC_TYPE_B) - return false; pclc = (struct smc_clc_msg_proposal *)clcm; pclc_prfx = smc_clc_proposal_get_prefix(pclc); - if (ntohs(pclc->hdr.length) != + if (ntohs(pclc->hdr.length) < sizeof(*pclc) + ntohs(pclc->iparea_offset) + sizeof(*pclc_prfx) + pclc_prfx->ipv6_prefixes_cnt * @@ -86,7 +84,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) default: return false; } - if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && + if (check_trl && + memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; return true; @@ -276,7 +275,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, struct msghdr msg = {NULL, 0}; int reason_code = 0; struct kvec vec = {buf, buflen}; - int len, datlen; + int len, datlen, recvlen; + bool check_trl = true; int krflags; /* peek the first few bytes to determine length of data to receive @@ -320,10 +320,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen > buflen) || - (clcm->version != SMC_CLC_V1) || - (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D && - clcm->path != SMC_TYPE_B) || + (clcm->version < SMC_CLC_V1) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@ -331,16 +328,38 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, goto out; } + if (clcm->type == SMC_CLC_PROPOSAL && clcm->path == SMC_TYPE_N) + reason_code = SMC_CLC_DECL_VERSMISMAT; /* just V2 offered */ + /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); - iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen); + if (datlen > buflen) { + check_trl = false; + recvlen = buflen; + } else { + recvlen = datlen; + } + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); krflags = MSG_WAITALL; len = sock_recvmsg(smc->clcsock, &msg, krflags); - if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { + if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; } + datlen -= len; + while (datlen) { + u8 tmp[SMC_CLC_RECV_BUF_LEN]; + + vec.iov_base = &tmp; + vec.iov_len = SMC_CLC_RECV_BUF_LEN; + /* receive remaining proposal message */ + recvlen = datlen > SMC_CLC_RECV_BUF_LEN ? + SMC_CLC_RECV_BUF_LEN : datlen; + iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, recvlen); + len = sock_recvmsg(smc->clcsock, &msg, krflags); + datlen -= len; + } if (clcm->type == SMC_CLC_DECLINE) { struct smc_clc_msg_decline *dclc; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 465876701b75..76c2b150d040 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -25,6 +25,7 @@ #define SMC_CLC_V1 0x1 /* SMC version */ #define SMC_TYPE_R 0 /* SMC-R only */ #define SMC_TYPE_D 1 /* SMC-D only */ +#define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */ #define SMC_TYPE_B 3 /* SMC-R and SMC-D */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define CLC_WAIT_TIME_SHORT HZ /* short wait time on clcsock */ @@ -46,6 +47,7 @@ #define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */ #define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ +#define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 7964a21e5e6f..f82a2e599917 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -15,6 +15,7 @@ #include <linux/workqueue.h> #include <linux/wait.h> #include <linux/reboot.h> +#include <linux/mutex.h> #include <net/tcp.h> #include <net/sock.h> #include <rdma/ib_verbs.h> @@ -44,18 +45,10 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */ static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); -struct smc_ib_up_work { - struct work_struct work; - struct smc_link_group *lgr; - struct smc_ib_device *smcibdev; - u8 ibport; -}; - static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc); static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); -static void smc_link_up_work(struct work_struct *work); static void smc_link_down_work(struct work_struct *work); /* return head of link group list and its lock for a given link group */ @@ -247,7 +240,8 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) if (smc_link_usable(lnk)) lnk->state = SMC_LNK_INACTIVE; } - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_msg_waiter); + wake_up_all(&lgr->llc_flow_waiter); } static void smc_lgr_free(struct smc_link_group *lgr); @@ -324,7 +318,6 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, get_device(&ini->ib_dev->ibdev->dev); atomic_inc(&ini->ib_dev->lnk_cnt); - lnk->state = SMC_LNK_ACTIVATING; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; lnk->link_idx = link_idx; @@ -360,6 +353,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, rc = smc_wr_create_link(lnk); if (rc) goto destroy_qp; + lnk->state = SMC_LNK_ACTIVATING; return 0; destroy_qp: @@ -450,7 +444,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } smc->conn.lgr = lgr; spin_lock_bh(lgr_lock); - list_add(&lgr->list, lgr_list); + list_add_tail(&lgr->list, lgr_list); spin_unlock_bh(lgr_lock); return 0; @@ -548,8 +542,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, smc_wr_wakeup_tx_wait(from_lnk); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE || - i == from_lnk->link_idx) + if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx) continue; if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && from_lnk->ibport == lgr->lnk[i].ibport) { @@ -1104,66 +1097,23 @@ static void smc_conn_abort_work(struct work_struct *work) sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ } -/* link is up - establish alternate link if applicable */ -static void smcr_link_up(struct smc_link_group *lgr, - struct smc_ib_device *smcibdev, u8 ibport) -{ - struct smc_link *link = NULL; - - if (list_empty(&lgr->list) || - lgr->type == SMC_LGR_SYMMETRIC || - lgr->type == SMC_LGR_ASYMMETRIC_PEER) - return; - - if (lgr->role == SMC_SERV) { - /* trigger local add link processing */ - link = smc_llc_usable_link(lgr); - if (!link) - return; - smc_llc_srv_add_link_local(link); - } else { - /* invite server to start add link processing */ - u8 gid[SMC_GID_SIZE]; - - if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, - NULL)) - return; - if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { - /* some other llc task is ongoing */ - wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), - SMC_LLC_WAIT_TIME); - } - if (list_empty(&lgr->list) || - !smc_ib_port_active(smcibdev, ibport)) - return; /* lgr or device no longer active */ - link = smc_llc_usable_link(lgr); - if (!link) - return; - smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid, - NULL, SMC_LLC_REQ); - } -} - void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) { - struct smc_ib_up_work *ib_work; struct smc_link_group *lgr, *n; list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { + struct smc_link *link; + if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, SMC_MAX_PNETID_LEN) || lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) continue; - ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); - if (!ib_work) - continue; - INIT_WORK(&ib_work->work, smc_link_up_work); - ib_work->lgr = lgr; - ib_work->smcibdev = smcibdev; - ib_work->ibport = ibport; - schedule_work(&ib_work->work); + + /* trigger local add link processing */ + link = smc_llc_usable_link(lgr); + if (link) + smc_llc_add_link_local(link); } } @@ -1195,13 +1145,19 @@ static void smcr_link_down(struct smc_link *lnk) if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* another llc task is ongoing */ mutex_unlock(&lgr->llc_conf_mutex); - wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), + wait_event_timeout(lgr->llc_flow_waiter, + (list_empty(&lgr->list) || + lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), SMC_LLC_WAIT_TIME); mutex_lock(&lgr->llc_conf_mutex); } - smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, - SMC_LLC_DEL_LOST_PATH); + if (!list_empty(&lgr->list)) { + smc_llc_send_delete_link(to_lnk, del_link_id, + SMC_LLC_REQ, true, + SMC_LLC_DEL_LOST_PATH); + smcr_link_clear(lnk, true); + } + wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ } } @@ -1240,20 +1196,6 @@ void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) } } -static void smc_link_up_work(struct work_struct *work) -{ - struct smc_ib_up_work *ib_work = container_of(work, - struct smc_ib_up_work, - work); - struct smc_link_group *lgr = ib_work->lgr; - - if (list_empty(&lgr->list)) - goto out; - smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); -out: - kfree(ib_work); -} - static void smc_link_down_work(struct work_struct *work) { struct smc_link *link = container_of(work, struct smc_link, @@ -1262,7 +1204,7 @@ static void smc_link_down_work(struct work_struct *work) if (list_empty(&lgr->list)) return; - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_msg_waiter); mutex_lock(&lgr->llc_conf_mutex); smcr_link_down(link); mutex_unlock(&lgr->llc_conf_mutex); @@ -1326,7 +1268,7 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, return false; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + if (!smc_link_active(&lgr->lnk[i])) continue; if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && @@ -1369,7 +1311,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && lgr->vlan_id == ini->vlan_id && - (role == SMC_CLNT || + (role == SMC_CLNT || ini->is_smcd || lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; @@ -1774,14 +1716,14 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -1793,7 +1735,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&conn->lgr->lnk[i])) + if (!smc_link_active(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); @@ -1807,7 +1749,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&conn->lgr->lnk[i])) + if (!smc_link_active(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); @@ -1830,8 +1772,12 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) return rc; /* create rmb */ rc = __smc_buf_create(smc, is_smcd, true); - if (rc) + if (rc) { + mutex_lock(&smc->conn.lgr->sndbufs_lock); + list_del(&smc->conn.sndbuf_desc->list); + mutex_unlock(&smc->conn.lgr->sndbufs_lock); smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); + } return rc; } @@ -1955,20 +1901,20 @@ static void smc_core_going_away(void) struct smc_ib_device *smcibdev; struct smcd_dev *smcd; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { int i; for (i = 0; i < SMC_MAX_PORTS; i++) set_bit(i, smcibdev->ports_going_away); } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd, &smcd_dev_list.list, list) { smcd->going_away = 1; } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } /* Clean up all SMC link groups */ @@ -1980,10 +1926,10 @@ static void smc_lgrs_shutdown(void) smc_smcr_terminate_all(NULL); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd, &smcd_dev_list.list, list) smc_smcd_terminate_all(smcd); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } static int smc_core_reboot_event(struct notifier_block *this, diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 86d160f0d187..1c4d5439d0ff 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -262,8 +262,10 @@ struct smc_link_group { struct work_struct llc_del_link_work; struct work_struct llc_event_work; /* llc event worker */ - wait_queue_head_t llc_waiter; + wait_queue_head_t llc_flow_waiter; /* w4 next llc event */ + wait_queue_head_t llc_msg_waiter; + /* w4 next llc msg */ struct smc_llc_flow llc_flow_lcl; /* llc local control field */ struct smc_llc_flow llc_flow_rmt; @@ -347,6 +349,11 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_active(struct smc_link *lnk) +{ + return lnk->state == SMC_LNK_ACTIVE; +} + struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 562a52d01ad1..1c314dbdc7fa 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -16,6 +16,7 @@ #include <linux/workqueue.h> #include <linux/scatterlist.h> #include <linux/wait.h> +#include <linux/mutex.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -33,7 +34,7 @@ #define SMC_QP_RNR_RETRY 7 /* 7: infinite */ struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */ - .lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock), + .mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex), .list = LIST_HEAD_INIT(smc_ib_devices.list), }; @@ -505,6 +506,10 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) int cqe_size_order, smc_order; long rc; + mutex_lock(&smcibdev->mutex); + rc = 0; + if (smcibdev->initialized) + goto out; /* the calculated number of cq entries fits to mlx5 cq allocation */ cqe_size_order = cache_line_size() == 128 ? 7 : 6; smc_order = MAX_ORDER - cqe_size_order - 1; @@ -516,7 +521,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send); if (IS_ERR(smcibdev->roce_cq_send)) { smcibdev->roce_cq_send = NULL; - return rc; + goto out; } smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev, smc_wr_rx_cq_handler, NULL, @@ -528,21 +533,26 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) } smc_wr_add_dev(smcibdev); smcibdev->initialized = 1; - return rc; + goto out; err: ib_destroy_cq(smcibdev->roce_cq_send); +out: + mutex_unlock(&smcibdev->mutex); return rc; } static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) { + mutex_lock(&smcibdev->mutex); if (!smcibdev->initialized) - return; + goto out; smcibdev->initialized = 0; ib_destroy_cq(smcibdev->roce_cq_recv); ib_destroy_cq(smcibdev->roce_cq_send); smc_wr_remove_dev(smcibdev); +out: + mutex_unlock(&smcibdev->mutex); } static struct ib_client smc_ib_client; @@ -565,9 +575,10 @@ static int smc_ib_add_dev(struct ib_device *ibdev) INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); atomic_set(&smcibdev->lnk_cnt, 0); init_waitqueue_head(&smcibdev->lnks_deleted); - spin_lock(&smc_ib_devices.lock); + mutex_init(&smcibdev->mutex); + mutex_lock(&smc_ib_devices.mutex); list_add_tail(&smcibdev->list, &smc_ib_devices.list); - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); ib_set_client_data(ibdev, &smc_ib_client, smcibdev); INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev, smc_ib_global_event_handler); @@ -602,9 +613,9 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { struct smc_ib_device *smcibdev = client_data; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); pr_warn_ratelimited("smc: removing ib device %s\n", smcibdev->ibdev->name); smc_smcr_terminate_all(smcibdev); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index e6a696ae15f3..2ce481187dd0 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/if_ether.h> +#include <linux/mutex.h> #include <linux/wait.h> #include <rdma/ib_verbs.h> #include <net/smc.h> @@ -25,7 +26,7 @@ struct smc_ib_devices { /* list of smc ib devices definition */ struct list_head list; - spinlock_t lock; /* protects list of smc ib devices */ + struct mutex mutex; /* protects list of smc ib devices */ }; extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */ @@ -51,6 +52,7 @@ struct smc_ib_device { /* ib-device infos for smc */ DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS); atomic_t lnk_cnt; /* number of links on ibdev */ wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/ + struct mutex mutex; /* protect dev setup+cleanup */ }; struct smc_buf_desc; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 91f85fc09fb8..998c525de785 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -7,6 +7,7 @@ */ #include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/slab.h> #include <asm/page.h> @@ -17,7 +18,7 @@ struct smcd_dev_list smcd_dev_list = { .list = LIST_HEAD_INIT(smcd_dev_list.list), - .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock) + .mutex = __MUTEX_INITIALIZER(smcd_dev_list.mutex) }; /* Test if an ISM communication is possible. */ @@ -317,9 +318,9 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev); int smcd_register_dev(struct smcd_dev *smcd) { - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_add_tail(&smcd->list, &smcd_dev_list.list); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", dev_name(&smcd->dev), smcd->pnetid, @@ -333,9 +334,9 @@ void smcd_unregister_dev(struct smcd_dev *smcd) { pr_warn_ratelimited("smc: removing smcd device %s\n", dev_name(&smcd->dev)); - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_del_init(&smcd->list); - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); smcd->going_away = 1; smc_smcd_terminate_all(smcd); flush_workqueue(smcd->event_wq); diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 4da946cbfa29..81cc4537efd3 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -10,12 +10,13 @@ #define SMCD_ISM_H #include <linux/uio.h> +#include <linux/mutex.h> #include "smc.h" struct smcd_dev_list { /* List of SMCD devices */ struct list_head list; - spinlock_t lock; /* Protects list of devices */ + struct mutex mutex; /* Protects list of devices */ }; extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 391237b601fe..df5b0a6ea848 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -186,6 +186,26 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow, flow->qentry = qentry; } +static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type, + struct smc_llc_qentry *qentry) +{ + u8 msg_type = qentry->msg.raw.hdr.common.type; + + if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) && + flow_type != msg_type && !lgr->delayed_event) { + lgr->delayed_event = qentry; + return; + } + /* drop parallel or already-in-progress llc requests */ + if (flow_type != msg_type) + pr_warn_once("smc: SMC-R lg %*phN dropped parallel " + "LLC msg: msg %d flow %d role %d\n", + SMC_LGR_ID_SIZE, &lgr->id, + qentry->msg.raw.hdr.common.type, + flow_type, lgr->role); + kfree(qentry); +} + /* try to start a new llc flow, initiated by an incoming llc msg */ static bool smc_llc_flow_start(struct smc_llc_flow *flow, struct smc_llc_qentry *qentry) @@ -195,14 +215,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, spin_lock_bh(&lgr->llc_flow_lock); if (flow->type) { /* a flow is already active */ - if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK || - qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) && - !lgr->delayed_event) { - lgr->delayed_event = qentry; - } else { - /* forget this llc request */ - kfree(qentry); - } + smc_llc_flow_parallel(lgr, flow->type, qentry); spin_unlock_bh(&lgr->llc_flow_lock); return false; } @@ -222,8 +235,8 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow, } if (qentry == lgr->delayed_event) lgr->delayed_event = NULL; - spin_unlock_bh(&lgr->llc_flow_lock); smc_llc_flow_qentry_set(flow, qentry); + spin_unlock_bh(&lgr->llc_flow_lock); return true; } @@ -251,11 +264,11 @@ again: return 0; } spin_unlock_bh(&lgr->llc_flow_lock); - rc = wait_event_interruptible_timeout(lgr->llc_waiter, - (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && - (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || - lgr->llc_flow_rmt.type == allowed_remote)), - SMC_LLC_WAIT_TIME); + rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) || + (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE && + (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE || + lgr->llc_flow_rmt.type == allowed_remote))), + SMC_LLC_WAIT_TIME * 10); if (!rc) return -ETIMEDOUT; goto again; @@ -272,7 +285,7 @@ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow) flow == &lgr->llc_flow_lcl) schedule_work(&lgr->llc_event_work); else - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_flow_waiter); } /* lnk is optional and used for early wakeup when link goes down, useful in @@ -283,26 +296,32 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, int time_out, u8 exp_msg) { struct smc_llc_flow *flow = &lgr->llc_flow_lcl; + u8 rcv_msg; - wait_event_interruptible_timeout(lgr->llc_waiter, - (flow->qentry || - (lnk && !smc_link_usable(lnk)) || - list_empty(&lgr->list)), - time_out); + wait_event_timeout(lgr->llc_msg_waiter, + (flow->qentry || + (lnk && !smc_link_usable(lnk)) || + list_empty(&lgr->list)), + time_out); if (!flow->qentry || (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) { smc_llc_flow_qentry_del(flow); goto out; } - if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) { + rcv_msg = flow->qentry->msg.raw.hdr.common.type; + if (exp_msg && rcv_msg != exp_msg) { if (exp_msg == SMC_LLC_ADD_LINK && - flow->qentry->msg.raw.hdr.common.type == - SMC_LLC_DELETE_LINK) { + rcv_msg == SMC_LLC_DELETE_LINK) { /* flow_start will delay the unexpected msg */ smc_llc_flow_start(&lgr->llc_flow_lcl, smc_llc_flow_qentry_clr(flow)); return NULL; } + pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: " + "msg %d exp %d flow %d role %d flags %x\n", + SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg, + flow->type, lgr->role, + flow->qentry->msg.raw.hdr.flags); smc_llc_flow_qentry_del(flow); } out: @@ -409,7 +428,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, rtok_ix = 1; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { link = &send_link->lgr->lnk[i]; - if (link->state == SMC_LNK_ACTIVE && link != send_link) { + if (smc_link_active(link) && link != send_link) { rkeyllc->rtoken[rtok_ix].link_id = link->link_id; rkeyllc->rtoken[rtok_ix].rmb_key = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); @@ -876,6 +895,36 @@ out: return rc; } +/* as an SMC client, invite server to start the add_link processing */ +static void smc_llc_cli_add_link_invite(struct smc_link *link, + struct smc_llc_qentry *qentry) +{ + struct smc_link_group *lgr = smc_get_lgr(link); + struct smc_init_info ini; + + if (lgr->type == SMC_LGR_SYMMETRIC || + lgr->type == SMC_LGR_ASYMMETRIC_PEER) + goto out; + + ini.vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); + if (!ini.ib_dev) + goto out; + + smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], + ini.ib_gid, NULL, SMC_LLC_REQ); +out: + kfree(qentry); +} + +static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) +{ + if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && + !llc->add_link.qp_mtu && !llc->add_link.link_num) + return true; + return false; +} + static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) { struct smc_llc_qentry *qentry; @@ -883,7 +932,10 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); mutex_lock(&lgr->llc_conf_mutex); - smc_llc_cli_add_link(qentry->link, qentry); + if (smc_llc_is_local_add_link(&qentry->msg)) + smc_llc_cli_add_link_invite(qentry->link, qentry); + else + smc_llc_cli_add_link(qentry->link, qentry); mutex_unlock(&lgr->llc_conf_mutex); } @@ -892,7 +944,7 @@ static int smc_llc_active_link_count(struct smc_link_group *lgr) int i, link_count = 0; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_active(&lgr->lnk[i])) continue; link_count++; } @@ -1032,12 +1084,14 @@ static int smc_llc_srv_conf_link(struct smc_link *link, if (rc) return -ENOLINK; /* receive CONFIRM LINK response over the RoCE fabric */ - qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, - SMC_LLC_CONFIRM_LINK); - if (!qentry) { + qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); + if (!qentry || + qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { /* send DELETE LINK */ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); + if (qentry) + smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return -ENOLINK; } smc_llc_save_peer_uid(qentry); @@ -1139,14 +1193,14 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) mutex_unlock(&lgr->llc_conf_mutex); } -/* enqueue a local add_link req to trigger a new add_link flow, only as SERV */ -void smc_llc_srv_add_link_local(struct smc_link *link) +/* enqueue a local add_link req to trigger a new add_link flow */ +void smc_llc_add_link_local(struct smc_link *link) { struct smc_llc_msg_add_link add_llc = {0}; add_llc.hd.length = sizeof(add_llc); add_llc.hd.common.type = SMC_LLC_ADD_LINK; - /* no dev and port needed, we as server ignore client data anyway */ + /* no dev and port needed */ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); } @@ -1222,8 +1276,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) smc_llc_send_message(lnk, &qentry->msg); /* response */ if (smc_link_downing(&lnk_del->state)) { - smc_switch_conns(lgr, lnk_del, false); - smc_wr_tx_wait_no_pending_sends(lnk_del); + if (smc_switch_conns(lgr, lnk_del, false)) + smc_wr_tx_wait_no_pending_sends(lnk_del); } smcr_link_clear(lnk_del, true); @@ -1297,8 +1351,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) goto out; /* asymmetric link already deleted */ if (smc_link_downing(&lnk_del->state)) { - smc_switch_conns(lgr, lnk_del, false); - smc_wr_tx_wait_no_pending_sends(lnk_del); + if (smc_switch_conns(lgr, lnk_del, false)) + smc_wr_tx_wait_no_pending_sends(lnk_del); } if (!list_empty(&lgr->list)) { /* qentry is either a request from peer (send it back to @@ -1326,7 +1380,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) { /* trigger setup of asymm alt link */ - smc_llc_srv_add_link_local(lnk); + smc_llc_add_link_local(lnk); } out: mutex_unlock(&lgr->llc_conf_mutex); @@ -1455,11 +1509,22 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (list_empty(&lgr->list)) goto out; /* lgr is terminating */ if (lgr->role == SMC_CLNT) { - if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) { + if (smc_llc_is_local_add_link(llc)) { + if (lgr->llc_flow_lcl.type == + SMC_LLC_FLOW_ADD_LINK) + break; /* add_link in progress */ + if (smc_llc_flow_start(&lgr->llc_flow_lcl, + qentry)) { + schedule_work(&lgr->llc_add_link_work); + } + return; + } + if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && + !lgr->llc_flow_lcl.qentry) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { schedule_work(&lgr->llc_add_link_work); @@ -1474,33 +1539,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&lgr->llc_waiter); + wake_up(&lgr->llc_msg_waiter); return; } break; case SMC_LLC_DELETE_LINK: - if (lgr->role == SMC_CLNT) { - /* server requests to delete this link, send response */ - if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { - /* DEL LINK REQ during ADD LINK SEQ */ - smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, - qentry); - wake_up_interruptible(&lgr->llc_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { - schedule_work(&lgr->llc_del_link_work); - } - } else { - if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && - !lgr->llc_flow_lcl.qentry) { - /* DEL LINK REQ during ADD LINK SEQ */ - smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, - qentry); - wake_up_interruptible(&lgr->llc_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { - schedule_work(&lgr->llc_del_link_work); - } + if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && + !lgr->llc_flow_lcl.qentry) { + /* DEL LINK REQ during ADD LINK SEQ */ + smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); + wake_up(&lgr->llc_msg_waiter); + } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { + schedule_work(&lgr->llc_del_link_work); } return; case SMC_LLC_CONFIRM_RKEY: @@ -1566,23 +1616,30 @@ again: static void smc_llc_rx_response(struct smc_link *link, struct smc_llc_qentry *qentry) { + enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; + struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; u8 llc_type = qentry->msg.raw.hdr.common.type; switch (llc_type) { case SMC_LLC_TEST_LINK: - if (link->state == SMC_LNK_ACTIVE) + if (smc_link_active(link)) complete(&link->llc_testlink_resp); break; case SMC_LLC_ADD_LINK: - case SMC_LLC_DELETE_LINK: - case SMC_LLC_CONFIRM_LINK: case SMC_LLC_ADD_LINK_CONT: + case SMC_LLC_CONFIRM_LINK: + if (flowtype != SMC_LLC_FLOW_ADD_LINK || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; + case SMC_LLC_DELETE_LINK: + if (flowtype != SMC_LLC_FLOW_DEL_LINK || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; case SMC_LLC_CONFIRM_RKEY: case SMC_LLC_DELETE_RKEY: - /* assign responses to the local flow, we requested them */ - smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); - wake_up_interruptible(&link->lgr->llc_waiter); - return; + if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; @@ -1591,6 +1648,11 @@ static void smc_llc_rx_response(struct smc_link *link, break; } kfree(qentry); + return; +assign: + /* assign responses to the local flow, we requested them */ + smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); + wake_up(&link->lgr->llc_msg_waiter); } static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) @@ -1616,7 +1678,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) spin_lock_irqsave(&lgr->llc_event_q_lock, flags); list_add_tail(&qentry->list, &lgr->llc_event_q); spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags); - schedule_work(&link->lgr->llc_event_work); + schedule_work(&lgr->llc_event_work); } /* copy received msg and add it to the event queue */ @@ -1644,7 +1706,7 @@ static void smc_llc_testlink_work(struct work_struct *work) u8 user_data[16] = { 0 }; int rc; - if (link->state != SMC_LNK_ACTIVE) + if (!smc_link_active(link)) return; /* don't reschedule worker */ expire_time = link->wr_rx_tstamp + link->llc_testlink_time; if (time_is_after_jiffies(expire_time)) { @@ -1656,7 +1718,7 @@ static void smc_llc_testlink_work(struct work_struct *work) /* receive TEST LINK response over RoCE fabric */ rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); - if (link->state != SMC_LNK_ACTIVE) + if (!smc_link_active(link)) return; /* link state changed */ if (rc <= 0) { smcr_link_down_cond_sched(link); @@ -1677,7 +1739,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) INIT_LIST_HEAD(&lgr->llc_event_q); spin_lock_init(&lgr->llc_event_q_lock); spin_lock_init(&lgr->llc_flow_lock); - init_waitqueue_head(&lgr->llc_waiter); + init_waitqueue_head(&lgr->llc_flow_waiter); + init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; } @@ -1686,7 +1749,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) void smc_llc_lgr_clear(struct smc_link_group *lgr) { smc_llc_event_flush(lgr); - wake_up_interruptible_all(&lgr->llc_waiter); + wake_up_all(&lgr->llc_flow_waiter); + wake_up_all(&lgr->llc_msg_waiter); cancel_work_sync(&lgr->llc_event_work); cancel_work_sync(&lgr->llc_add_link_work); cancel_work_sync(&lgr->llc_del_link_work); diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index a5d2fe3eea61..cc00a2ec4e92 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -103,7 +103,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); int smc_llc_srv_add_link(struct smc_link *link); -void smc_llc_srv_add_link_local(struct smc_link *link); +void smc_llc_add_link_local(struct smc_link *link); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 014d91b9778e..30e5fac7034e 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/list.h> #include <linux/ctype.h> +#include <linux/mutex.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -129,7 +130,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) return rc; /* remove ib devices */ - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { if (ibdev->pnetid_by_user[ibport] && @@ -149,9 +150,9 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) } } } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); /* remove smcd devices */ - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { if (smcd_dev->pnetid_by_user && (!pnet_name || @@ -165,7 +166,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) rc = 0; } } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return rc; } @@ -240,14 +241,14 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, SMC_MAX_PNETID_LEN); ib_dev->pnetid_by_user[ib_port - 1] = true; applied = true; } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); return applied; } @@ -258,13 +259,13 @@ static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; bool applied = false; - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); smcd_dev->pnetid_by_user = true; applied = true; } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return applied; } @@ -300,7 +301,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) { struct smc_ib_device *ibdev; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (!strncmp(ibdev->ibdev->name, ib_name, sizeof(ibdev->ibdev->name)) || @@ -311,7 +312,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) } ibdev = NULL; out: - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); return ibdev; } @@ -320,7 +321,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) { struct smcd_dev *smcd_dev; - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, IB_DEVICE_NAME_MAX - 1)) @@ -328,7 +329,7 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) } smcd_dev = NULL; out: - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); return smcd_dev; } @@ -825,7 +826,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, int i; ini->ib_dev = NULL; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (ibdev == known_dev) continue; @@ -844,7 +845,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, } } out: - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); } /* find alternate roce device with same pnet_id and vlan_id */ @@ -863,7 +864,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, { struct smc_ib_device *ibdev; - spin_lock(&smc_ib_devices.lock); + mutex_lock(&smc_ib_devices.mutex); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { struct net_device *ndev; int i; @@ -888,7 +889,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, } } } - spin_unlock(&smc_ib_devices.lock); + mutex_unlock(&smc_ib_devices.mutex); } /* Determine the corresponding IB device port based on the hardware PNETID. @@ -924,7 +925,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) return; /* pnetid could not be determined */ - spin_lock(&smcd_dev_list.lock); + mutex_lock(&smcd_dev_list.mutex); list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away) { @@ -932,7 +933,7 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, break; } } - spin_unlock(&smcd_dev_list.lock); + mutex_unlock(&smcd_dev_list.mutex); } /* PNET table analysis for a given sock: diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 7239ba9b99dc..1e23cdd41eb1 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -169,6 +169,8 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; + if (!smc_link_usable(link)) + return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) return 0; @@ -560,15 +562,15 @@ void smc_wr_free_link(struct smc_link *lnk) { struct ib_device *ibdev; + if (!lnk->smcibdev) + return; + ibdev = lnk->smcibdev->ibdev; + if (smc_wr_tx_wait_no_pending_sends(lnk)) memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); - if (!lnk->smcibdev) - return; - ibdev = lnk->smcibdev->ibdev; - if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 39e14d5edaf1..e9d0953522f0 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1317,6 +1317,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) q.len = strlen(gssd_dummy_clnt_dir[0].name); clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); if (!clnt_dentry) { + __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); goto out; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5c4ec9386f81..d5805fa1d066 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -44,6 +44,7 @@ #include <net/tcp.h> #include <net/tcp_states.h> #include <linux/uaccess.h> +#include <linux/highmem.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> @@ -563,7 +564,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) .msg_control = cmh, .msg_controllen = sizeof(buffer), }; - unsigned int uninitialized_var(sent); + unsigned int sent; int err; svc_udp_release_rqst(rqstp); @@ -1080,7 +1081,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) struct msghdr msg = { .msg_flags = 0, }; - unsigned int uninitialized_var(sent); + unsigned int sent; int err; svc_tcp_release_rqst(rqstp); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6f7d82fb1eb0..be11d672b5b9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1118,6 +1118,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->head[0].iov_len; + subbuf->head[0].iov_base = buf->head[0].iov_base; subbuf->head[0].iov_len = 0; } @@ -1130,6 +1131,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->page_len; + subbuf->pages = buf->pages; + subbuf->page_base = 0; subbuf->page_len = 0; } @@ -1141,6 +1144,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->tail[0].iov_len; + subbuf->tail[0].iov_base = buf->tail[0].iov_base; subbuf->tail[0].iov_len = 0; } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ef997880e17a..b647562a26dd 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -367,7 +367,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_fastreg(wc, frwr); /* The MR will get recycled when the associated req is retransmitted */ - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -452,7 +452,7 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_li(wc, frwr); __frwr_release_mr(wc, mr); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -474,7 +474,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) __frwr_release_mr(wc, mr); complete(&frwr->fr_linv_done); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -582,7 +582,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) smp_rmb(); rpcrdma_complete_rqst(rep); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2081c8fbfa48..453bacc99907 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -71,7 +71,7 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) size = RPCRDMA_HDRLEN_MIN; /* Maximum Read list size */ - size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32); + size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32); /* Minimal Read chunk size */ size += sizeof(__be32); /* segment count */ @@ -94,7 +94,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) size = RPCRDMA_HDRLEN_MIN; /* Maximum Write list size */ - size = sizeof(__be32); /* segment count */ + size += sizeof(__be32); /* segment count */ size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ @@ -1349,8 +1349,7 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, be32_to_cpup(p), be32_to_cpu(rep->rr_xid)); } - r_xprt->rx_stats.bad_reply_count++; - return -EREMOTEIO; + return -EIO; } /* Perform XID lookup, reconstruction of the RPC reply, and @@ -1387,13 +1386,11 @@ out: spin_unlock(&xprt->queue_lock); return; -/* If the incoming reply terminated a pending RPC, the next - * RPC call will post a replacement receive buffer as it is - * being marshaled. - */ out_badheader: trace_xprtrdma_reply_hdr(rep); r_xprt->rx_stats.bad_reply_count++; + rqst->rq_task->tk_status = status; + status = 0; goto out; } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0c4af7f5e241..053c8ab1265a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -242,13 +242,18 @@ xprt_rdma_connect_worker(struct work_struct *work) rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); - if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) { + if (!rc) { xprt->connect_cookie++; xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; xprt_set_connected(xprt); rc = -EAGAIN; + } else { + /* Force a call to xprt_rdma_close to clean up */ + spin_lock(&xprt->transport_lock); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + spin_unlock(&xprt->transport_lock); } xprt_wake_pending_tasks(xprt, rc); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2ae348377806..75c646743df3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -84,7 +84,8 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep); +static void rpcrdma_ep_get(struct rpcrdma_ep *ep); +static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, gfp_t flags); @@ -97,7 +98,8 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); */ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct rdma_cm_id *id = r_xprt->rx_ep->re_id; + struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct rdma_cm_id *id = ep->re_id; /* Flush Receives, then wait for deferred Reply work * to complete. @@ -108,6 +110,8 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) * local invalidations. */ ib_drain_sq(id->qp); + + rpcrdma_ep_put(ep); } /** @@ -126,23 +130,27 @@ static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) trace_xprtrdma_qp_event(ep, event); } +/* Ensure xprt_force_disconnect() is invoked exactly once when a + * connection is closed or lost. (The important thing is it needs + * to be invoked "at least" once). + */ +static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) +{ + if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) + xprt_force_disconnect(ep->re_xprt); +} + /** * rpcrdma_flush_disconnect - Disconnect on flushed completion - * @cq: completion queue + * @r_xprt: transport to disconnect * @wc: work completion entry * * Must be called in process context. */ -void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) { - struct rpcrdma_xprt *r_xprt = cq->cq_context; - struct rpc_xprt *xprt = &r_xprt->rx_xprt; - - if (wc->status != IB_WC_SUCCESS && - r_xprt->rx_ep->re_connect_status == 1) { - r_xprt->rx_ep->re_connect_status = -ECONNABORTED; - xprt_force_disconnect(xprt); - } + if (wc->status != IB_WC_SUCCESS) + rpcrdma_force_disconnect(r_xprt->rx_ep); } /** @@ -156,11 +164,12 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_sendctx *sc = container_of(cqe, struct rpcrdma_sendctx, sc_cqe); + struct rpcrdma_xprt *r_xprt = cq->cq_context; /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_send(sc, wc); - rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_sendctx_put_locked(r_xprt, sc); + rpcrdma_flush_disconnect(r_xprt, wc); } /** @@ -195,7 +204,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; out_flushed: - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(r_xprt, wc); rpcrdma_rep_destroy(rep); } @@ -239,7 +248,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; struct rpcrdma_ep *ep = id->context; - struct rpc_xprt *xprt = ep->re_xprt; might_sleep(); @@ -263,10 +271,9 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) /* fall through */ case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; - xprt_force_disconnect(xprt); goto disconnected; case RDMA_CM_EVENT_ESTABLISHED: - kref_get(&ep->re_kref); + rpcrdma_ep_get(ep); ep->re_connect_status = 1; rpcrdma_update_cm_private(ep, &event->param.conn); trace_xprtrdma_inline_thresh(ep); @@ -274,22 +281,24 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_CONNECT_ERROR: ep->re_connect_status = -ENOTCONN; - goto disconnected; + goto wake_connect_worker; case RDMA_CM_EVENT_UNREACHABLE: ep->re_connect_status = -ENETUNREACH; - goto disconnected; + goto wake_connect_worker; case RDMA_CM_EVENT_REJECTED: dprintk("rpcrdma: connection to %pISpc rejected: %s\n", sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) - ep->re_connect_status = -EAGAIN; - goto disconnected; + ep->re_connect_status = -ENOTCONN; +wake_connect_worker: + wake_up_all(&ep->re_connect_wait); + return 0; case RDMA_CM_EVENT_DISCONNECTED: ep->re_connect_status = -ECONNABORTED; disconnected: - xprt_force_disconnect(xprt); - return rpcrdma_ep_destroy(ep); + rpcrdma_force_disconnect(ep); + return rpcrdma_ep_put(ep); default: break; } @@ -345,7 +354,7 @@ out: return ERR_PTR(rc); } -static void rpcrdma_ep_put(struct kref *kref) +static void rpcrdma_ep_destroy(struct kref *kref) { struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); @@ -369,13 +378,18 @@ static void rpcrdma_ep_put(struct kref *kref) module_put(THIS_MODULE); } +static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) +{ + kref_get(&ep->re_kref); +} + /* Returns: * %0 if @ep still has a positive kref count, or * %1 if @ep was destroyed successfully. */ -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep) +static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) { - return kref_put(&ep->re_kref, rpcrdma_ep_put); + return kref_put(&ep->re_kref, rpcrdma_ep_destroy); } static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) @@ -388,14 +402,14 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep = kzalloc(sizeof(*ep), GFP_NOFS); if (!ep) - return -EAGAIN; + return -ENOTCONN; ep->re_xprt = &r_xprt->rx_xprt; kref_init(&ep->re_kref); id = rpcrdma_create_id(r_xprt, ep); if (IS_ERR(id)) { - rc = PTR_ERR(id); - goto out_free; + kfree(ep); + return PTR_ERR(id); } __module_get(THIS_MODULE); device = id->device; @@ -492,11 +506,8 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) return 0; out_destroy: - rpcrdma_ep_destroy(ep); + rpcrdma_ep_put(ep); rdma_destroy_id(id); -out_free: - kfree(ep); - r_xprt->rx_ep = NULL; return rc; } @@ -512,22 +523,19 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ep *ep; int rc; -retry: - rpcrdma_xprt_disconnect(r_xprt); rc = rpcrdma_ep_create(r_xprt); if (rc) return rc; ep = r_xprt->rx_ep; - ep->re_connect_status = 0; xprt_clear_connected(xprt); - rpcrdma_reset_cwnd(r_xprt); - rpcrdma_post_recvs(r_xprt, true); - rc = rpcrdma_sendctxs_create(r_xprt); - if (rc) - goto out; + /* Bump the ep's reference count while there are + * outstanding Receives. + */ + rpcrdma_ep_get(ep); + rpcrdma_post_recvs(r_xprt, true); rc = rdma_connect(ep->re_id, &ep->re_remote_cma); if (rc) @@ -538,22 +546,24 @@ retry: wait_event_interruptible(ep->re_connect_wait, ep->re_connect_status != 0); if (ep->re_connect_status <= 0) { - if (ep->re_connect_status == -EAGAIN) - goto retry; rc = ep->re_connect_status; goto out; } + rc = rpcrdma_sendctxs_create(r_xprt); + if (rc) { + rc = -ENOTCONN; + goto out; + } + rc = rpcrdma_reqs_setup(r_xprt); if (rc) { - rpcrdma_xprt_disconnect(r_xprt); + rc = -ENOTCONN; goto out; } rpcrdma_mrs_create(r_xprt); out: - if (rc) - ep->re_connect_status = rc; trace_xprtrdma_connect(r_xprt, rc); return rc; } @@ -587,7 +597,7 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); - if (rpcrdma_ep_destroy(ep)) + if (rpcrdma_ep_put(ep)) rdma_destroy_id(id); r_xprt->rx_ep = NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fdb09b2c..43974ef39a50 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -82,6 +82,7 @@ struct rpcrdma_ep { unsigned int re_max_inline_recv; int re_async_rc; int re_connect_status; + atomic_t re_force_disconnect; struct ib_qp_init_attr re_attr; wait_queue_head_t re_connect_wait; struct rpc_xprt *re_xprt; @@ -446,7 +447,7 @@ extern unsigned int xprt_rdma_memreg_strategy; /* * Endpoint calls - xprtrdma/verbs.c */ -void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc); +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc); int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 914508ea9b84..c57aef829403 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -496,8 +496,8 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, int flags, struct rpc_rqst *req) { struct xdr_buf *buf = &req->rq_private_buf; - size_t want, uninitialized_var(read); - ssize_t uninitialized_var(ret); + size_t want, read; + ssize_t ret; xs_read_header(transport, buf); @@ -844,7 +844,7 @@ static int xs_local_send_request(struct rpc_rqst *req) struct msghdr msg = { .msg_flags = XS_SENDMSG_FLAGS, }; - unsigned int uninitialized_var(sent); + unsigned int sent; int status; /* Close the stream if the previous transmission was incomplete */ @@ -915,7 +915,7 @@ static int xs_udp_send_request(struct rpc_rqst *req) .msg_namelen = xprt->addrlen, .msg_flags = XS_SENDMSG_FLAGS, }; - unsigned int uninitialized_var(sent); + unsigned int sent; int status; xs_pktdump("packet data:", @@ -999,7 +999,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) .msg_flags = XS_SENDMSG_FLAGS, }; bool vm_wait = false; - unsigned int uninitialized_var(sent); + unsigned int sent; int status; /* Close the stream if the previous transmission was incomplete */ diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig index 50f21a657007..18a2d980e11d 100644 --- a/net/switchdev/Kconfig +++ b/net/switchdev/Kconfig @@ -6,7 +6,7 @@ config NET_SWITCHDEV bool "Switch (and switch-ish) device support" depends on INET - ---help--- + help This module provides glue between core networking code and device drivers in order to support hardware switch chips in very generic meaning of the word "switch". This include devices supporting L2/L3 but diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 716b61a701a8..9dd780215eef 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -6,7 +6,7 @@ menuconfig TIPC tristate "The TIPC Protocol" depends on INET - ---help--- + help The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol originates from Ericsson where it has been used in carrier grade @@ -55,6 +55,6 @@ config TIPC_DIAG tristate "TIPC: socket monitoring interface" depends on TIPC default y - ---help--- + help Support for TIPC socket monitoring interface used by ss tool. If unsure, say Y. diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 34ca7b789eba..e366ec9a7e4d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -316,7 +316,6 @@ static int tipc_enable_bearer(struct net *net, const char *name, b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; - test_and_set_bit_lock(0, &b->up); refcount_set(&b->refcnt, 1); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); @@ -326,6 +325,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, goto rejected; } + test_and_set_bit_lock(0, &b->up); rcu_assign_pointer(tn->bearer_list[bearer_id], b); if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); diff --git a/net/tipc/link.c b/net/tipc/link.c index ee3b8d0576b8..d40f8e5b7683 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -827,11 +827,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) state |= l->bc_rcvlink->rcv_unacked; state |= l->rcv_unacked; state |= !skb_queue_empty(&l->transmq); - state |= !skb_queue_empty(&l->deferdq); probe = mstate->probing; probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; + probe |= !skb_queue_empty(&l->deferdq); if (l->snd_nxt == l->checkpoint) { tipc_link_update_cwin(l, 0, 0); probe = true; @@ -921,6 +921,21 @@ static void link_prepare_wakeup(struct tipc_link *l) } +/** + * tipc_link_set_skb_retransmit_time - set the time at which retransmission of + * the given skb should be next attempted + * @skb: skb to set a future retransmission time for + * @l: link the skb will be transmitted on + */ +static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, + struct tipc_link *l) +{ + if (link_is_bc_sndlink(l)) + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + else + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; +} + void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; @@ -1036,9 +1051,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -ENOBUFS; } __skb_queue_tail(transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; @@ -1139,9 +1152,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l, if (unlikely(skb == l->backlog[imp].target_bskb)) l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; @@ -1584,8 +1595,7 @@ release: /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; - TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ? - TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) continue; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 046e4cb3acea..01b64869a173 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -238,14 +238,14 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, hdr = buf_msg(skb); curr = msg_blocks(hdr); mlen = msg_size(hdr); - cpy = min_t(int, rem, mss - mlen); + cpy = min_t(size_t, rem, mss - mlen); if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter)) return -EFAULT; msg_set_size(hdr, mlen + cpy); skb_put(skb, cpy); rem -= cpy; total += msg_blocks(hdr) - curr; - } while (rem); + } while (rem > 0); return total - accounted; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 26123f4177fd..a94f38333698 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1574,7 +1574,8 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) break; send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); blocks = tsk->snd_backlog; - if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) { + if (tsk->oneway++ >= tsk->nagle_start && maxnagle && + send <= maxnagle) { rc = tipc_msg_append(hdr, m, send, maxnagle, txq); if (unlikely(rc < 0)) break; diff --git a/net/tls/Kconfig b/net/tls/Kconfig index 61ec78521a60..fa0724fd84b4 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -11,7 +11,7 @@ config TLS select STREAM_PARSER select NET_SOCK_MSG default n - ---help--- + help Enable kernel support for TLS protocol. This allows symmetric encryption handling of the TLS protocol to be done in-kernel. diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 24f64bc0de18..710bd44eaa49 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -670,7 +670,7 @@ static int tls_push_record(struct sock *sk, int flags, struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec = ctx->open_rec, *tmp = NULL; - u32 i, split_point, uninitialized_var(orig_end); + u32 i, split_point, orig_end; struct sk_msg *msg_pl, *msg_en; struct aead_request *req; bool split; diff --git a/net/unix/Kconfig b/net/unix/Kconfig index a23a5cca9753..b6c4282899ec 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -5,7 +5,7 @@ config UNIX tristate "Unix domain sockets" - ---help--- + help If you say Y here, you will include support for Unix domain sockets; sockets are the standard Unix mechanism for establishing and accessing network connections. Many commonly used programs such as @@ -29,6 +29,6 @@ config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX default n - ---help--- + help Support for UNIX socket monitoring interface used by the ss tool. If unsure, say Y. diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index dfbaf6bd8b1c..2700a63ab095 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -22,7 +22,7 @@ #include <net/af_vsock.h> static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; +static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ struct virtio_vsock { diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 813e93644ae7..faf74850a1b5 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -25,13 +25,13 @@ config CFG80211 # using a different algorithm, though right now they shouldn't # (this is here rather than below to allow it to be a module) select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS - ---help--- + help cfg80211 is the Linux wireless LAN (802.11) configuration API. Enable this if you have a wireless device. For more information refer to documentation on the wireless wiki: - http://wireless.kernel.org/en/developers/Documentation/cfg80211 + https://wireless.wiki.kernel.org/en/developers/Documentation/cfg80211 When built as a module it will be called cfg80211. @@ -71,7 +71,7 @@ config CFG80211_CERTIFICATION_ONUS bool "cfg80211 certification onus" depends on EXPERT default n - ---help--- + help You should disable this option unless you are both capable and willing to ensure your system will remain regulatory compliant with the features available under this option. @@ -124,7 +124,7 @@ config CFG80211_EXTRA_REGDB_KEYDIR config CFG80211_REG_CELLULAR_HINTS bool "cfg80211 regulatory support for cellular base station hints" depends on CFG80211_CERTIFICATION_ONUS - ---help--- + help This option enables support for parsing regulatory hints from cellular base stations. If enabled and at least one driver claims support for parsing cellular base station hints the @@ -137,7 +137,7 @@ config CFG80211_REG_CELLULAR_HINTS config CFG80211_REG_RELAX_NO_IR bool "cfg80211 support for NO_IR relaxation" depends on CFG80211_CERTIFICATION_ONUS - ---help--- + help This option enables support for relaxation of the NO_IR flag for situations that certain regulatory bodies have provided clarifications on how relaxation can occur. This feature has an inherent dependency on @@ -171,7 +171,7 @@ config CFG80211_DEFAULT_PS config CFG80211_DEBUGFS bool "cfg80211 DebugFS entries" depends on DEBUG_FS - ---help--- + help You can enable this if you want debugfs entries for cfg80211. If unsure, say N. @@ -228,7 +228,7 @@ config LIB80211_DEBUG bool "lib80211 debugging messages" depends on LIB80211 default n - ---help--- + help You can enable this if you want verbose debugging messages from lib80211. diff --git a/net/wireless/core.c b/net/wireless/core.c index f0226ae9561c..c623d9bf5096 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -497,6 +497,8 @@ use_default_name: INIT_WORK(&rdev->propagate_radar_detect_wk, cfg80211_propagate_radar_detect_wk); INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk); + INIT_WORK(&rdev->mgmt_registrations_update_wk, + cfg80211_mgmt_registrations_update_wk); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -1047,6 +1049,7 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->sched_scan_stop_wk); flush_work(&rdev->propagate_radar_detect_wk); flush_work(&rdev->propagate_cac_done_wk); + flush_work(&rdev->mgmt_registrations_update_wk); #ifdef CONFIG_PM if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) @@ -1108,7 +1111,6 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) rdev->devlist_generation++; cfg80211_mlme_purge_registrations(wdev); - flush_work(&wdev->mgmt_registrations_update_wk); switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: @@ -1253,8 +1255,6 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - INIT_WORK(&wdev->mgmt_registrations_update_wk, - cfg80211_mgmt_registrations_update_wk); INIT_LIST_HEAD(&wdev->pmsr_list); spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); diff --git a/net/wireless/core.h b/net/wireless/core.h index e0e5b3ee9699..67b0389fca4d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -99,6 +99,8 @@ struct cfg80211_registered_device { struct cfg80211_chan_def cac_done_chandef; struct work_struct propagate_cac_done_wk; + struct work_struct mgmt_registrations_update_wk; + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __aligned(NETDEV_ALIGN); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 189334314cba..a6c61a2e6569 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -440,9 +440,15 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) ASSERT_RTNL(); + spin_lock_bh(&wdev->mgmt_registrations_lock); + if (!wdev->mgmt_registrations_need_update) { + spin_unlock_bh(&wdev->mgmt_registrations_lock); + return; + } + rcu_read_lock(); list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) { - list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) { + list_for_each_entry(reg, &tmp->mgmt_registrations, list) { u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4); u32 mcast_mask = 0; @@ -460,16 +466,23 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) } rcu_read_unlock(); + wdev->mgmt_registrations_need_update = 0; + spin_unlock_bh(&wdev->mgmt_registrations_lock); + rdev_update_mgmt_frame_registrations(rdev, wdev, &upd); } void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk) { - struct wireless_dev *wdev = container_of(wk, struct wireless_dev, - mgmt_registrations_update_wk); + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + rdev = container_of(wk, struct cfg80211_registered_device, + mgmt_registrations_update_wk); rtnl_lock(); - cfg80211_mgmt_registrations_update(wdev); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) + cfg80211_mgmt_registrations_update(wdev); rtnl_unlock(); } @@ -557,6 +570,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, nreg->multicast_rx = multicast_rx; list_add(&nreg->list, &wdev->mgmt_registrations); } + wdev->mgmt_registrations_need_update = 1; spin_unlock_bh(&wdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); @@ -585,7 +599,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) list_del(®->list); kfree(reg); - schedule_work(&wdev->mgmt_registrations_update_wk); + wdev->mgmt_registrations_need_update = 1; + schedule_work(&rdev->mgmt_registrations_update_wk); } spin_unlock_bh(&wdev->mgmt_registrations_lock); @@ -608,6 +623,7 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) list_del(®->list); kfree(reg); } + wdev->mgmt_registrations_need_update = 1; spin_unlock_bh(&wdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 263ae395ad44..7fbca0854265 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5016,7 +5016,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms.he_obss_pd); - goto out; + if (err) + goto out; } if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { @@ -5024,7 +5025,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_HE_BSS_COLOR], ¶ms.he_bss_color); if (err) - return err; + goto out; } nl80211_calculate_ap_params(¶ms); @@ -13265,13 +13266,13 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) if (!wdev_running(wdev)) return -ENETDOWN; } - - if (!vcmd->doit) - return -EOPNOTSUPP; } else { wdev = NULL; } + if (!vcmd->doit) + return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); diff --git a/net/x25/Kconfig b/net/x25/Kconfig index 9f0d58b0b90b..e3ed23245a82 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -5,7 +5,7 @@ config X25 tristate "CCITT X.25 Packet Layer" - ---help--- + help X.25 is a set of standardized network protocols, similar in scope to frame relay; the one physical line from your box to the X.25 network entry point can carry several logical point-to-point connections diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b6c0f08bd80d..3700266229f6 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -352,10 +352,8 @@ static int xsk_generic_xmit(struct sock *sk) len = desc.len; skb = sock_alloc_send_skb(sk, len, 1, &err); - if (unlikely(!skb)) { - err = -EAGAIN; + if (unlikely(!skb)) goto out; - } skb_put(skb, len); addr = desc.addr; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 540ed75e4482..08b80669f649 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -2,9 +2,6 @@ #include <net/xsk_buff_pool.h> #include <net/xdp_sock.h> -#include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> -#include <linux/swiotlb.h> #include "xsk_queue.h" @@ -55,7 +52,6 @@ struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, pool->free_heads_cnt = chunks; pool->headroom = headroom; pool->chunk_size = chunk_size; - pool->cheap_dma = true; pool->unaligned = unaligned; pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; INIT_LIST_HEAD(&pool->free_list); @@ -125,48 +121,6 @@ static void xp_check_dma_contiguity(struct xsk_buff_pool *pool) } } -static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_SWIOTLB) - phys_addr_t paddr; - u32 i; - - for (i = 0; i < pool->dma_pages_cnt; i++) { - paddr = dma_to_phys(pool->dev, pool->dma_pages[i]); - if (is_swiotlb_buffer(paddr)) - return false; - } -#endif - return true; -} - -static bool xp_check_cheap_dma(struct xsk_buff_pool *pool) -{ -#if defined(CONFIG_HAS_DMA) - const struct dma_map_ops *ops = get_dma_ops(pool->dev); - - if (ops) { - return !ops->sync_single_for_cpu && - !ops->sync_single_for_device; - } - - if (!dma_is_direct(ops)) - return false; - - if (!xp_check_swiotlb_dma(pool)) - return false; - - if (!dev_is_dma_coherent(pool->dev)) { -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) - return false; -#endif - } -#endif - return true; -} - int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages) { @@ -180,6 +134,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, pool->dev = dev; pool->dma_pages_cnt = nr_pages; + pool->dma_need_sync = false; for (i = 0; i < pool->dma_pages_cnt; i++) { dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, @@ -188,14 +143,13 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, xp_dma_unmap(pool, attrs); return -ENOMEM; } + if (dma_need_sync(dev, dma)) + pool->dma_need_sync = true; pool->dma_pages[i] = dma; } if (pool->unaligned) xp_check_dma_contiguity(pool); - - pool->dev = dev; - pool->cheap_dma = xp_check_cheap_dma(pool); return 0; } EXPORT_SYMBOL(xp_dma_map); @@ -280,7 +234,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; - if (!pool->cheap_dma) { + if (pool->dma_need_sync) { dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, pool->frame_len, DMA_BIDIRECTIONAL); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index b7fd9c838416..5b9a5ab48111 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -22,7 +22,7 @@ if INET config XFRM_USER tristate "Transformation user configuration interface" select XFRM_ALGO - ---help--- + help Support for Transformation(XFRM) user configuration interface like IPsec used by native Linux tools. @@ -31,7 +31,7 @@ config XFRM_USER config XFRM_INTERFACE tristate "Transformation virtual interface" depends on XFRM && IPV6 - ---help--- + help This provides a virtual interface to route IPsec traffic. If unsure, say N. @@ -39,7 +39,7 @@ config XFRM_INTERFACE config XFRM_SUB_POLICY bool "Transformation sub policy support" depends on XFRM - ---help--- + help Support sub policy for developers. By using sub policy with main one, two policies can be applied to the same packet at once. Policy which lives shorter time in kernel should be a sub. @@ -49,7 +49,7 @@ config XFRM_SUB_POLICY config XFRM_MIGRATE bool "Transformation migrate database" depends on XFRM - ---help--- + help A feature to update locator(s) of a given IPsec security association dynamically. This feature is required, for instance, in a Mobile IPv6 environment with IPsec configuration @@ -60,13 +60,37 @@ config XFRM_MIGRATE config XFRM_STATISTICS bool "Transformation statistics" depends on XFRM && PROC_FS - ---help--- + help This statistics is not a SNMP/MIB specification but shows statistics about transformation error (or almost error) factor at packet processing for developer. If unsure, say N. +# This option selects XFRM_ALGO along with the AH authentication algorithms that +# RFC 8221 lists as MUST be implemented. +config XFRM_AH + tristate + select XFRM_ALGO + select CRYPTO + select CRYPTO_HMAC + select CRYPTO_SHA256 + +# This option selects XFRM_ALGO along with the ESP encryption and authentication +# algorithms that RFC 8221 lists as MUST be implemented. +config XFRM_ESP + tristate + select XFRM_ALGO + select CRYPTO + select CRYPTO_AES + select CRYPTO_AUTHENC + select CRYPTO_CBC + select CRYPTO_ECHAINIV + select CRYPTO_GCM + select CRYPTO_HMAC + select CRYPTO_SEQIV + select CRYPTO_SHA256 + config XFRM_IPCOMP tristate select XFRM_ALGO @@ -76,7 +100,7 @@ config XFRM_IPCOMP config NET_KEY tristate "PF_KEY sockets" select XFRM_ALGO - ---help--- + help PF_KEYv2 socket family, compatible to KAME ones. They are required if you are going to use IPsec tools ported from KAME. @@ -87,7 +111,7 @@ config NET_KEY_MIGRATE bool "PF_KEY MIGRATE" depends on NET_KEY select XFRM_MIGRATE - ---help--- + help Add a PF_KEY MIGRATE message to PF_KEYv2 socket family. The PF_KEY MIGRATE message is used to dynamically update locator(s) of a given IPsec security association. diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 100e29682b48..827ccdf2db57 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -15,6 +15,7 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, { if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, skb->truesize)) { + XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } @@ -49,23 +50,51 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx, strp); struct strp_msg *rxm = strp_msg(skb); + int len = rxm->full_len - 2; u32 nonesp_marker; int err; + /* keepalive packet? */ + if (unlikely(len == 1)) { + u8 data; + + err = skb_copy_bits(skb, rxm->offset + 2, &data, 1); + if (err < 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); + kfree_skb(skb); + return; + } + + if (data == 0xff) { + kfree_skb(skb); + return; + } + } + + /* drop other short messages */ + if (unlikely(len <= sizeof(nonesp_marker))) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); + kfree_skb(skb); + return; + } + err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker, sizeof(nonesp_marker)); if (err < 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); kfree_skb(skb); return; } /* remove header, leave non-ESP marker/SPI */ if (!__pskb_pull(skb, rxm->offset + 2)) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } if (pskb_trim(skb, rxm->full_len - 2) != 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } @@ -91,7 +120,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) return err; len = be16_to_cpu(blen); - if (len < 6) + if (len < 2) return -EINVAL; return len; @@ -109,8 +138,11 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, flags |= nonblock ? MSG_DONTWAIT : 0; skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err); - if (!skb) + if (!skb) { + if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) + return 0; return err; + } copied = len; if (copied > skb->len) @@ -213,7 +245,7 @@ retry: return 0; } -static int espintcp_push_msgs(struct sock *sk) +static int espintcp_push_msgs(struct sock *sk, int flags) { struct espintcp_ctx *ctx = espintcp_getctx(sk); struct espintcp_msg *emsg = &ctx->partial; @@ -227,12 +259,12 @@ static int espintcp_push_msgs(struct sock *sk) ctx->tx_running = 1; if (emsg->skb) - err = espintcp_sendskb_locked(sk, emsg, 0); + err = espintcp_sendskb_locked(sk, emsg, flags); else - err = espintcp_sendskmsg_locked(sk, emsg, 0); + err = espintcp_sendskmsg_locked(sk, emsg, flags); if (err == -EAGAIN) { ctx->tx_running = 0; - return 0; + return flags & MSG_DONTWAIT ? -EAGAIN : 0; } if (!err) memset(emsg, 0, sizeof(*emsg)); @@ -257,7 +289,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb) offset = skb_transport_offset(skb); len = skb->len - offset; - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); if (emsg->len) { kfree_skb(skb); @@ -270,7 +302,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb) emsg->len = len; emsg->skb = skb; - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); return 0; } @@ -287,7 +319,7 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) char buf[2] = {0}; int err, end; - if (msg->msg_flags) + if (msg->msg_flags & ~MSG_DONTWAIT) return -EOPNOTSUPP; if (size > MAX_ESPINTCP_MSG) @@ -298,9 +330,10 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); - err = espintcp_push_msgs(sk); + err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT); if (err < 0) { - err = -ENOBUFS; + if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT)) + err = -ENOBUFS; goto unlock; } @@ -337,10 +370,9 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) tcp_rate_check_app_limited(sk); - err = espintcp_push_msgs(sk); + err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT); /* this message could be partially sent, keep it */ - if (err < 0) - goto unlock; + release_sock(sk); return size; @@ -374,7 +406,7 @@ static void espintcp_tx_work(struct work_struct *work) lock_sock(sk); if (!ctx->tx_running) - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); release_sock(sk); } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index f50d1f97cf8e..626096bd0d29 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -108,7 +108,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (!xo) + if (!xo || (xo->flags & XFRM_XMIT)) return skb; if (!(features & NETIF_F_HW_ESP)) @@ -129,6 +129,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } + xo->flags |= XFRM_XMIT; + if (skb_is_gso(skb)) { struct net_device *dev = skb->dev; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index c407ecbc5d46..b615729812e5 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -37,6 +37,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/ip_tunnels.h> #include <net/addrconf.h> #include <net/xfrm.h> #include <net/net_namespace.h> @@ -581,6 +582,7 @@ static const struct net_device_ops xfrmi_netdev_ops = { static void xfrmi_dev_setup(struct net_device *dev) { dev->netdev_ops = &xfrmi_netdev_ops; + dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_NONE; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index e4c23f69f69f..a7ab19353313 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -574,16 +574,12 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) switch (x->outer_mode.family) { case AF_INET: memset(IPCB(skb), 0, sizeof(*IPCB(skb))); -#ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; -#endif break; case AF_INET6: memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif break; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 564aa6492e7c..19c5e0fa3f44 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,7 +39,7 @@ #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif -#ifdef CONFIG_INET_ESPINTCP +#ifdef CONFIG_XFRM_ESPINTCP #include <net/espintcp.h> #endif @@ -1433,14 +1433,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_unlock_bh(&pq->hold_queue.lock); } -static bool xfrm_policy_mark_match(struct xfrm_policy *policy, - struct xfrm_policy *pol) +static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark, + struct xfrm_policy *pol) { - if (policy->mark.v == pol->mark.v && - policy->priority == pol->priority) - return true; - - return false; + return mark->v == pol->mark.v && mark->m == pol->mark.m; } static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed) @@ -1503,7 +1499,7 @@ static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(policy, pol) && + xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { delpol = pol; @@ -1538,7 +1534,7 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(policy, pol) && + xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) @@ -1610,9 +1606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) EXPORT_SYMBOL(xfrm_policy_insert); static struct xfrm_policy * -__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, - u8 type, int dir, - struct xfrm_selector *sel, +__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark, + u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx) { struct xfrm_policy *pol; @@ -1623,7 +1618,7 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && pol->if_id == if_id && - (mark & pol->mark.m) == pol->mark.v && + xfrm_policy_mark_match(mark, pol) && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) return pol; @@ -1632,11 +1627,10 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, return NULL; } -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, - struct xfrm_selector *sel, - struct xfrm_sec_ctx *ctx, int delete, - int *err) +struct xfrm_policy * +xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete, int *err) { struct xfrm_pol_inexact_bin *bin = NULL; struct xfrm_policy *pol, *ret = NULL; @@ -1703,9 +1697,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy * +xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -1720,8 +1714,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, ret = NULL; hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && - pol->if_id == if_id && - (mark & pol->mark.m) == pol->mark.v) { + pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete( @@ -4156,7 +4149,7 @@ void __init xfrm_init(void) seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); -#ifdef CONFIG_INET_ESPINTCP +#ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e6cfaa680ef3..fbb7d9d06478 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1863,7 +1863,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int delete; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m); u32 if_id = 0; p = nlmsg_data(nlh); @@ -1880,8 +1879,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + xfrm_mark_get(attrs, &m); + if (p->index) - xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, + p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1898,8 +1900,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel, - ctx, delete, &err); + xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, + &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -2166,7 +2168,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m); u32 if_id = 0; err = copy_from_user_policy_type(&type, attrs); @@ -2180,8 +2181,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + xfrm_mark_get(attrs, &m); + if (p->index) - xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index, + 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -2198,7 +2202,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, + xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } |
