diff options
Diffstat (limited to 'net')
50 files changed, 960 insertions, 232 deletions
diff --git a/net/atm/lec.c b/net/atm/lec.c index ad4f829193f0..a0311493b01b 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -726,9 +726,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) struct lec_priv *priv; if (arg < 0) - i = 0; - else - i = arg; + arg = 0; if (arg >= MAX_LEC_ITF) return -EINVAL; i = array_index_nospec(arg, MAX_LEC_ITF); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3d9175f130b3..b81bf53c5ac4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4381,6 +4381,9 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, return; } + /* If we reach this point this event matches the last command sent */ + hci_dev_clear_flag(hdev, HCI_CMD_PENDING); + /* If the command succeeded and there's still more commands in * this request the request is not yet complete. */ @@ -4491,6 +4494,8 @@ static void hci_cmd_work(struct work_struct *work) hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); if (hdev->sent_cmd) { + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); atomic_dec(&hdev->cmd_cnt); hci_send_frame(hdev, skb); if (test_bit(HCI_RESET, &hdev->flags)) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 66b631ab0d35..9e4fcf406d9c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3404,6 +3404,12 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_req_cmd_complete(hdev, *opcode, *status, req_complete, req_complete_skb); + if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { + bt_dev_err(hdev, + "unexpected event for opcode 0x%4.4x", *opcode); + return; + } + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); } @@ -3511,6 +3517,12 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); + if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { + bt_dev_err(hdev, + "unexpected event for opcode 0x%4.4x", *opcode); + return; + } + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ca73d36cc149..e9a95ed65491 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -46,6 +46,11 @@ void hci_req_purge(struct hci_request *req) skb_queue_purge(&req->cmd_q); } +bool hci_req_status_pend(struct hci_dev *hdev) +{ + return hdev->req_status == HCI_REQ_PEND; +} + static int req_run(struct hci_request *req, hci_req_complete_t complete, hci_req_complete_skb_t complete_skb) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 692cc8b13368..55b2050cc9ff 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -37,6 +37,7 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); void hci_req_purge(struct hci_request *req); +bool hci_req_status_pend(struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, diff --git a/net/core/dev.c b/net/core/dev.c index 22f2640f559a..108ac8137b9b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4987,7 +4987,8 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); if (pt_prev) - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); return ret; } @@ -5033,7 +5034,8 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, else list_for_each_entry_safe(skb, next, head, list) { skb_list_del_init(skb); - pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); } } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index c0734028c7dc..cf855352a440 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -17,6 +17,17 @@ menuconfig NET_DSA if NET_DSA +# tagging formats +config NET_DSA_TAG_8021Q + tristate "Tag driver for switches using custom 802.1Q VLAN headers" + select VLAN_8021Q + help + Unlike the other tagging protocols, the 802.1Q config option simply + provides helpers for other tagging implementations that might rely on + VLAN in one way or another. It is not a complete solution. + + Drivers which use these helpers should select this as dependency. + config NET_DSA_TAG_BRCM_COMMON tristate default n @@ -91,6 +102,15 @@ config NET_DSA_TAG_LAN9303 Say Y or M if you want to enable support for tagging frames for the SMSC/Microchip LAN9303 family of switches. +config NET_DSA_TAG_SJA1105 + tristate "Tag driver for NXP SJA1105 switches" + select NET_DSA_TAG_8021Q + help + Say Y or M if you want to enable support for tagging frames with the + NXP SJA1105 switch family. Both the native tagging protocol (which + is only for link-local traffic) as well as non-native tagging (based + on a custom 802.1Q VLAN header) are available. + config NET_DSA_TAG_TRAILER tristate "Tag driver for switches using a trailer tag" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8a737b6ee94c..c342f54715ba 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o # tagging formats +obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o @@ -12,4 +13,5 @@ obj-$(CONFIG_NET_DSA_TAG_KSZ_COMMON) += tag_ksz.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index bbc9f56e89b9..3b5f434cad3f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -371,14 +371,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err) return err; - err = ds->ops->setup(ds); - if (err < 0) - return err; - err = dsa_switch_register_notifier(ds); if (err) return err; + err = ds->ops->setup(ds); + if (err < 0) + return err; + if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) @@ -586,6 +586,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) } dp->type = DSA_PORT_TYPE_CPU; + dp->filter = tag_ops->filter; dp->rcv = tag_ops->rcv; dp->tag_ops = tag_ops; dp->master = master; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index b434f5ff55ab..8f1222324646 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -174,6 +174,8 @@ int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev); + static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); diff --git a/net/dsa/port.c b/net/dsa/port.c index 1ed287b2badd..ed8ba9daa3ba 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -389,6 +389,7 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) trans.ph_prepare = false; return dsa_port_vlan_add(dp, &vlan, &trans); } +EXPORT_SYMBOL(dsa_port_vid_add); int dsa_port_vid_del(struct dsa_port *dp, u16 vid) { @@ -400,6 +401,7 @@ int dsa_port_vid_del(struct dsa_port *dp, u16 vid) return dsa_port_vlan_del(dp, &vlan); } +EXPORT_SYMBOL(dsa_port_vid_del); static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8ad9bf957da1..316bce9e0fbf 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -120,6 +120,9 @@ static int dsa_slave_close(struct net_device *dev) struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); + cancel_work_sync(&dp->xmit_work); + skb_queue_purge(&dp->xmit_queue); + phylink_stop(dp->pl); dsa_port_disable(dp); @@ -430,6 +433,24 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, kfree_skb(clone); } +netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) +{ + /* SKB for netpoll still need to be mangled with the protocol-specific + * tag to be successfully transmitted + */ + if (unlikely(netpoll_tx_running(dev))) + return dsa_slave_netpoll_send_skb(dev, skb); + + /* Queue the SKB for transmission on the parent interface, but + * do not modify its EtherType + */ + skb->dev = dsa_slave_to_master(dev); + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} +EXPORT_SYMBOL_GPL(dsa_enqueue_skb); + static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -452,23 +473,37 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) */ nskb = p->xmit(skb, dev); if (!nskb) { - kfree_skb(skb); + if (!DSA_SKB_CB(skb)->deferred_xmit) + kfree_skb(skb); return NETDEV_TX_OK; } - /* SKB for netpoll still need to be mangled with the protocol-specific - * tag to be successfully transmitted - */ - if (unlikely(netpoll_tx_running(dev))) - return dsa_slave_netpoll_send_skb(dev, nskb); + return dsa_enqueue_skb(nskb, dev); +} - /* Queue the SKB for transmission on the parent interface, but - * do not modify its EtherType - */ - nskb->dev = dsa_slave_to_master(dev); - dev_queue_xmit(nskb); +void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); - return NETDEV_TX_OK; + DSA_SKB_CB(skb)->deferred_xmit = true; + + skb_queue_tail(&dp->xmit_queue, skb); + schedule_work(&dp->xmit_work); + return NULL; +} +EXPORT_SYMBOL_GPL(dsa_defer_xmit); + +static void dsa_port_xmit_work(struct work_struct *work) +{ + struct dsa_port *dp = container_of(work, struct dsa_port, xmit_work); + struct dsa_switch *ds = dp->ds; + struct sk_buff *skb; + + if (unlikely(!ds->ops->port_deferred_xmit)) + return; + + while ((skb = skb_dequeue(&dp->xmit_queue)) != NULL) + ds->ops->port_deferred_xmit(ds, dp->index, skb); } /* ethtool operations *******************************************************/ @@ -778,27 +813,25 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, struct dsa_mall_tc_entry *mall_tc_entry; __be16 protocol = cls->common.protocol; struct dsa_switch *ds = dp->ds; - struct net_device *to_dev; - const struct tc_action *a; + struct flow_action_entry *act; struct dsa_port *to_dp; int err = -EOPNOTSUPP; if (!ds->ops->port_mirror_add) return err; - if (!tcf_exts_has_one_action(cls->exts)) + if (!flow_offload_has_one_action(&cls->rule->action)) return err; - a = tcf_exts_first_action(cls->exts); + act = &cls->rule->action.entries[0]; - if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { + if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) { struct dsa_mall_mirror_tc_entry *mirror; - to_dev = tcf_mirred_dev(a); - if (!to_dev) + if (!act->dev) return -EINVAL; - if (!dsa_slave_dev_check(to_dev)) + if (!dsa_slave_dev_check(act->dev)) return -EOPNOTSUPP; mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); @@ -809,7 +842,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, mall_tc_entry->type = DSA_PORT_MALL_MIRROR; mirror = &mall_tc_entry->mirror; - to_dp = dsa_slave_to_port(to_dev); + to_dp = dsa_slave_to_port(act->dev); mirror->to_local_port = to_dp->index; mirror->ingress = ingress; @@ -1320,6 +1353,9 @@ int dsa_slave_suspend(struct net_device *slave_dev) if (!netif_running(slave_dev)) return 0; + cancel_work_sync(&dp->xmit_work); + skb_queue_purge(&dp->xmit_queue); + netif_device_detach(slave_dev); rtnl_lock(); @@ -1407,6 +1443,8 @@ int dsa_slave_create(struct dsa_port *port) } p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); + INIT_WORK(&port->xmit_work, dsa_port_xmit_work); + skb_queue_head_init(&port->xmit_queue); p->xmit = cpu_dp->tag_ops->xmit; port->slave = slave_dev; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c new file mode 100644 index 000000000000..8ae48c7e1e76 --- /dev/null +++ b/net/dsa/tag_8021q.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> + * + * This module is not a complete tagger implementation. It only provides + * primitives for taggers that rely on 802.1Q VLAN tags to use. The + * dsa_8021q_netdev_ops is registered for API compliance and not used + * directly by callers. + */ +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> + +#include "dsa_priv.h" + +/* Allocating two VLAN tags per port - one for the RX VID and + * the other for the TX VID - see below + */ +#define DSA_8021Q_VID_RANGE (DSA_MAX_SWITCHES * DSA_MAX_PORTS) +#define DSA_8021Q_VID_BASE (VLAN_N_VID - 2 * DSA_8021Q_VID_RANGE - 1) +#define DSA_8021Q_RX_VID_BASE (DSA_8021Q_VID_BASE) +#define DSA_8021Q_TX_VID_BASE (DSA_8021Q_VID_BASE + DSA_8021Q_VID_RANGE) + +/* Returns the VID to be inserted into the frame from xmit for switch steering + * instructions on egress. Encodes switch ID and port ID. + */ +u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port) +{ + return DSA_8021Q_TX_VID_BASE + (DSA_MAX_PORTS * ds->index) + port; +} +EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid); + +/* Returns the VID that will be installed as pvid for this switch port, sent as + * tagged egress towards the CPU port and decoded by the rcv function. + */ +u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) +{ + return DSA_8021Q_RX_VID_BASE + (DSA_MAX_PORTS * ds->index) + port; +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid); + +/* Returns the decoded switch ID from the RX VID. */ +int dsa_8021q_rx_switch_id(u16 vid) +{ + return ((vid - DSA_8021Q_RX_VID_BASE) / DSA_MAX_PORTS); +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_switch_id); + +/* Returns the decoded port ID from the RX VID. */ +int dsa_8021q_rx_source_port(u16 vid) +{ + return ((vid - DSA_8021Q_RX_VID_BASE) % DSA_MAX_PORTS); +} +EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); + +/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single + * front-panel switch port (here swp0). + * + * Port identification through VLAN (802.1Q) tags has different requirements + * for it to work effectively: + * - On RX (ingress from network): each front-panel port must have a pvid + * that uniquely identifies it, and the egress of this pvid must be tagged + * towards the CPU port, so that software can recover the source port based + * on the VID in the frame. But this would only work for standalone ports; + * if bridged, this VLAN setup would break autonomous forwarding and would + * force all switched traffic to pass through the CPU. So we must also make + * the other front-panel ports members of this VID we're adding, albeit + * we're not making it their PVID (they'll still have their own). + * By the way - just because we're installing the same VID in multiple + * switch ports doesn't mean that they'll start to talk to one another, even + * while not bridged: the final forwarding decision is still an AND between + * the L2 forwarding information (which is limiting forwarding in this case) + * and the VLAN-based restrictions (of which there are none in this case, + * since all ports are members). + * - On TX (ingress from CPU and towards network) we are faced with a problem. + * If we were to tag traffic (from within DSA) with the port's pvid, all + * would be well, assuming the switch ports were standalone. Frames would + * have no choice but to be directed towards the correct front-panel port. + * But because we also want the RX VLAN to not break bridging, then + * inevitably that means that we have to give them a choice (of what + * front-panel port to go out on), and therefore we cannot steer traffic + * based on the RX VID. So what we do is simply install one more VID on the + * front-panel and CPU ports, and profit off of the fact that steering will + * work just by virtue of the fact that there is only one other port that's + * a member of the VID we're tagging the traffic with - the desired one. + * + * So at the end, each front-panel port will have one RX VID (also the PVID), + * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU + * port will have the RX and TX VIDs of all front-panel ports, and on top of + * that, is also tagged-input and tagged-output (VLAN trunk). + * + * CPU port CPU port + * +-------------+-----+-------------+ +-------------+-----+-------------+ + * | RX VID | | | | TX VID | | | + * | of swp0 | | | | of swp0 | | | + * | +-----+ | | +-----+ | + * | ^ T | | | Tagged | + * | | | | | ingress | + * | +-------+---+---+-------+ | | +-----------+ | + * | | | | | | | | Untagged | + * | | U v U v U v | | v egress | + * | +-----+ +-----+ +-----+ +-----+ | | +-----+ +-----+ +-----+ +-----+ | + * | | | | | | | | | | | | | | | | | | | | + * | |PVID | | | | | | | | | | | | | | | | | | + * +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+ + * swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3 + */ +int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) +{ + int upstream = dsa_upstream_port(ds, port); + struct dsa_port *dp = &ds->ports[port]; + struct dsa_port *upstream_dp = &ds->ports[upstream]; + u16 rx_vid = dsa_8021q_rx_vid(ds, port); + u16 tx_vid = dsa_8021q_tx_vid(ds, port); + int i, err; + + /* The CPU port is implicitly configured by + * configuring the front-panel ports + */ + if (!dsa_is_user_port(ds, port)) + return 0; + + /* Add this user port's RX VID to the membership list of all others + * (including itself). This is so that bridging will not be hindered. + * L2 forwarding rules still take precedence when there are no VLAN + * restrictions, so there are no concerns about leaking traffic. + */ + for (i = 0; i < ds->num_ports; i++) { + struct dsa_port *other_dp = &ds->ports[i]; + u16 flags; + + if (i == upstream) + /* CPU port needs to see this port's RX VID + * as tagged egress. + */ + flags = 0; + else if (i == port) + /* The RX VID is pvid on this port */ + flags = BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_PVID; + else + /* The RX VID is a regular VLAN on all others */ + flags = BRIDGE_VLAN_INFO_UNTAGGED; + + if (enabled) + err = dsa_port_vid_add(other_dp, rx_vid, flags); + else + err = dsa_port_vid_del(other_dp, rx_vid); + if (err) { + dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", + rx_vid, port, err); + return err; + } + } + /* Finally apply the TX VID on this port and on the CPU port */ + if (enabled) + err = dsa_port_vid_add(dp, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED); + else + err = dsa_port_vid_del(dp, tx_vid); + if (err) { + dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", + tx_vid, port, err); + return err; + } + if (enabled) + err = dsa_port_vid_add(upstream_dp, tx_vid, 0); + else + err = dsa_port_vid_del(upstream_dp, tx_vid); + if (err) { + dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", + tx_vid, upstream, err); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); + +struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, + u16 tpid, u16 tci) +{ + /* skb->data points at skb_mac_header, which + * is fine for vlan_insert_tag. + */ + return vlan_insert_tag(skb, htons(tpid), tci); +} +EXPORT_SYMBOL_GPL(dsa_8021q_xmit); + +struct sk_buff *dsa_8021q_rcv(struct sk_buff *skb, struct net_device *netdev, + struct packet_type *pt, u16 *tpid, u16 *tci) +{ + struct vlan_ethhdr *tag; + + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) + return NULL; + + tag = vlan_eth_hdr(skb); + *tpid = ntohs(tag->h_vlan_proto); + *tci = ntohs(tag->h_vlan_TCI); + + /* skb->data points in the middle of the VLAN tag, + * after tpid and before tci. This is because so far, + * ETH_HLEN (DMAC, SMAC, EtherType) bytes were pulled. + * There are 2 bytes of VLAN tag left in skb->data, and upper + * layers expect the 'real' EtherType to be consumed as well. + * Coincidentally, a VLAN header is also of the same size as + * the number of bytes that need to be pulled. + */ + skb_pull_rcsum(skb, VLAN_HLEN); + + return skb; +} +EXPORT_SYMBOL_GPL(dsa_8021q_rcv); + +static const struct dsa_device_ops dsa_8021q_netdev_ops = { + .name = "8021q", + .proto = DSA_TAG_PROTO_8021Q, + .overhead = VLAN_HLEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_8021Q); + +module_dsa_tag_driver(dsa_8021q_netdev_ops); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c new file mode 100644 index 000000000000..969402c7dbf1 --- /dev/null +++ b/net/dsa/tag_sja1105.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> + */ +#include <linux/if_vlan.h> +#include <linux/dsa/sja1105.h> +#include <linux/dsa/8021q.h> +#include <linux/packing.h> +#include "dsa_priv.h" + +/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */ +static inline bool sja1105_is_link_local(const struct sk_buff *skb) +{ + const struct ethhdr *hdr = eth_hdr(skb); + u64 dmac = ether_addr_to_u64(hdr->h_dest); + + if ((dmac & SJA1105_LINKLOCAL_FILTER_A_MASK) == + SJA1105_LINKLOCAL_FILTER_A) + return true; + if ((dmac & SJA1105_LINKLOCAL_FILTER_B_MASK) == + SJA1105_LINKLOCAL_FILTER_B) + return true; + return false; +} + +/* This is the first time the tagger sees the frame on RX. + * Figure out if we can decode it, and if we can, annotate skb->cb with how we + * plan to do that, so we don't need to check again in the rcv function. + */ +static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev) +{ + if (sja1105_is_link_local(skb)) { + SJA1105_SKB_CB(skb)->type = SJA1105_FRAME_TYPE_LINK_LOCAL; + return true; + } + if (!dsa_port_is_vlan_filtering(dev->dsa_ptr)) { + SJA1105_SKB_CB(skb)->type = SJA1105_FRAME_TYPE_NORMAL; + return true; + } + return false; +} + +static struct sk_buff *sja1105_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_switch *ds = dp->ds; + u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index); + u8 pcp = skb->priority; + + /* Transmitting management traffic does not rely upon switch tagging, + * but instead SPI-installed management routes. Part 2 of this + * is the .port_deferred_xmit driver callback. + */ + if (unlikely(sja1105_is_link_local(skb))) + return dsa_defer_xmit(skb, netdev); + + /* If we are under a vlan_filtering bridge, IP termination on + * switch ports based on 802.1Q tags is simply too brittle to + * be passable. So just defer to the dsa_slave_notag_xmit + * implementation. + */ + if (dsa_port_is_vlan_filtering(dp)) + return skb; + + return dsa_8021q_xmit(skb, netdev, ETH_P_SJA1105, + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); +} + +static struct sk_buff *sja1105_rcv(struct sk_buff *skb, + struct net_device *netdev, + struct packet_type *pt) +{ + struct ethhdr *hdr = eth_hdr(skb); + u64 source_port, switch_id; + struct sk_buff *nskb; + u16 tpid, vid, tci; + bool is_tagged; + + nskb = dsa_8021q_rcv(skb, netdev, pt, &tpid, &tci); + is_tagged = (nskb && tpid == ETH_P_SJA1105); + + skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + vid = tci & VLAN_VID_MASK; + + skb->offload_fwd_mark = 1; + + if (SJA1105_SKB_CB(skb)->type == SJA1105_FRAME_TYPE_LINK_LOCAL) { + /* Management traffic path. Switch embeds the switch ID and + * port ID into bytes of the destination MAC, courtesy of + * the incl_srcpt options. + */ + source_port = hdr->h_dest[3]; + switch_id = hdr->h_dest[4]; + /* Clear the DMAC bytes that were mangled by the switch */ + hdr->h_dest[3] = 0; + hdr->h_dest[4] = 0; + } else { + /* Normal traffic path. */ + source_port = dsa_8021q_rx_source_port(vid); + switch_id = dsa_8021q_rx_switch_id(vid); + } + + skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + if (!skb->dev) { + netdev_warn(netdev, "Couldn't decode source port\n"); + return NULL; + } + + /* Delete/overwrite fake VLAN header, DSA expects to not find + * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN). + */ + if (is_tagged) + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - VLAN_HLEN, + ETH_HLEN - VLAN_HLEN); + + return skb; +} + +static struct dsa_device_ops sja1105_netdev_ops = { + .name = "sja1105", + .proto = DSA_TAG_PROTO_SJA1105, + .xmit = sja1105_xmit, + .rcv = sja1105_rcv, + .filter = sja1105_filter, + .overhead = VLAN_HLEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1105); + +module_dsa_tag_driver(sja1105_netdev_ops); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 0f9863dc4d44..fddcee38c1da 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -185,8 +185,12 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * at all, so we check here whether one of those tagging * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. + * The DSA tagging protocol may be able to decode some but not all + * traffic (for example only for management). In that case give it the + * option to filter the packets from which it can decode source port + * information. */ - if (unlikely(netdev_uses_dsa(dev))) + if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev)) return htons(ETH_P_XDSA); if (likely(eth_proto_is_802_3(eth->h_proto))) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 71c2165a2ce3..d3da6a10f86f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -159,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp) dst_release_immediate(&rt->dst); } -static void free_nh_exceptions(struct fib_nh *nh) +static void free_nh_exceptions(struct fib_nh_common *nhc) { struct fnhe_hash_bucket *hash; int i; - hash = rcu_dereference_protected(nh->nh_exceptions, 1); + hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!hash) return; for (i = 0; i < FNHE_HASH_SIZE; i++) { @@ -212,6 +212,9 @@ void fib_nh_common_release(struct fib_nh_common *nhc) dev_put(nhc->nhc_dev); lwtstate_put(nhc->nhc_lwtstate); + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + rt_fibinfo_free(&nhc->nhc_rth_input); + free_nh_exceptions(nhc); } EXPORT_SYMBOL_GPL(fib_nh_common_release); @@ -222,9 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) net->ipv4.fib_num_tclassid_users--; #endif fib_nh_common_release(&fib_nh->nh_common); - free_nh_exceptions(fib_nh); - rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output); - rt_fibinfo_free(&fib_nh->nh_rth_input); } /* Release a nexthop info record */ @@ -491,23 +491,35 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, u16 encap_type, void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { + int err; + + nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, + gfp_flags); + if (!nhc->nhc_pcpu_rth_output) + return -ENOMEM; + if (encap) { struct lwtunnel_state *lwtstate; - int err; if (encap_type == LWTUNNEL_ENCAP_NONE) { NL_SET_ERR_MSG(extack, "LWT encap type not specified"); - return -EINVAL; + err = -EINVAL; + goto lwt_failure; } err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, cfg, &lwtstate, extack); if (err) - return err; + goto lwt_failure; nhc->nhc_lwtstate = lwtstate_get(lwtstate); } return 0; + +lwt_failure: + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + nhc->nhc_pcpu_rth_output = NULL; + return err; } EXPORT_SYMBOL_GPL(fib_nh_common_init); @@ -515,18 +527,14 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, struct fib_config *cfg, int nh_weight, struct netlink_ext_ack *extack) { - int err = -ENOMEM; + int err; nh->fib_nh_family = AF_INET; - nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); - if (!nh->nh_pcpu_rth_output) - goto err_out; - err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) - goto init_failure; + return err; nh->fib_nh_oif = cfg->fc_oif; nh->fib_nh_gw_family = cfg->fc_gw_family; @@ -546,12 +554,6 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_weight = nh_weight; #endif return 0; - -init_failure: - rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output); - nh->nh_pcpu_rth_output = NULL; -err_out: - return err; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1711,12 +1713,12 @@ static int call_fib_nh_notifiers(struct fib_nh *nh, * - if the new MTU is greater than the PMTU, don't make any change * - otherwise, unlock and set PMTU */ -static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) { struct fnhe_hash_bucket *bucket; int i; - bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!bucket) return; @@ -1747,7 +1749,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev) - nh_update_mtu(nh, dev->mtu, orig_mtu); + nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1132d6d1796a..ed97724c5e33 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -130,6 +130,7 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/indirect_call_wrapper.h> #include <net/snmp.h> #include <net/ip.h> @@ -188,6 +189,8 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } +INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *)); void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) { const struct net_protocol *ipprot; @@ -205,7 +208,8 @@ resubmit: } nf_reset(skb); } - ret = ipprot->handler(skb); + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, + skb); if (ret < 0) { protocol = -ret; goto resubmit; @@ -305,6 +309,8 @@ drop: return true; } +INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); static int ip_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *dev) { @@ -322,7 +328,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - err = edemux(skb); + err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, + udp_v4_early_demux, skb); if (unlikely(err)) goto drop_error; /* must reload iph, skb->head might have changed */ diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 4e6b53ab6c33..7875c98072eb 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -631,4 +631,4 @@ module_exit(fini); MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); MODULE_DESCRIPTION("H.323 NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_h323"); +MODULE_ALIAS_NF_NAT_HELPER("h323"); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 68b4d450391b..e17b4ee7604c 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -37,7 +37,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); -MODULE_ALIAS("ip_nat_pptp"); +MODULE_ALIAS_NF_NAT_HELPER("pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 795aed6e4720..11ddc276776e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -643,8 +643,9 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh } } -static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, bool lock, unsigned long expires) +static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, + __be32 gw, u32 pmtu, bool lock, + unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -653,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, unsigned int i; int depth; - genid = fnhe_genid(dev_net(nh->fib_nh_dev)); + genid = fnhe_genid(dev_net(nhc->nhc_dev)); hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); - hash = rcu_dereference(nh->nh_exceptions); + hash = rcu_dereference(nhc->nhc_exceptions); if (!hash) { hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; - rcu_assign_pointer(nh->nh_exceptions, hash); + rcu_assign_pointer(nhc->nhc_exceptions, hash); } hash += hval; @@ -715,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ - rt = rcu_dereference(nh->nh_rth_input); + rt = rcu_dereference(nhc->nhc_rth_input); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; for_each_possible_cpu(i) { struct rtable __rcu **prt; - prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -788,10 +789,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow } else { if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc = FIB_RES_NHC(res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - update_or_create_fnhe(nh, fl4->daddr, new_gw, + update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); } @@ -1039,10 +1038,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh_common *nhc = FIB_RES_NHC(res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1328,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe, __rcu **fnhe_p; @@ -1336,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_lock_bh(&fnhe_lock); - hash = rcu_dereference_protected(nh->nh_exceptions, + hash = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&fnhe_lock)); hash += hval; @@ -1362,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) +static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, + __be32 daddr) { - struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); + struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); struct fib_nh_exception *fnhe; u32 hval; @@ -1378,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { if (fnhe->fnhe_expires && time_after(jiffies, fnhe->fnhe_expires)) { - ip_del_fnhe(nh, daddr); + ip_del_fnhe(nhc, daddr); break; } return fnhe; @@ -1405,10 +1403,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) mtu = fi->fib_mtu; if (likely(!mtu)) { - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct fib_nh_exception *fnhe; - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) mtu = fnhe->fnhe_pmtu; } @@ -1469,15 +1466,15 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, return ret; } -static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) { struct rtable *orig, *prev, **p; bool ret = true; if (rt_is_input_route(rt)) { - p = (struct rtable **)&nh->nh_rth_input; + p = (struct rtable **)&nhc->nhc_rth_input; } else { - p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); + p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } orig = *p; @@ -1574,7 +1571,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { rt->rt_gw_family = nhc->nhc_gw_family; @@ -1587,15 +1583,19 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, ip_dst_init_metrics(&rt->dst, fi->fib_metrics); - nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = nh->nh_tclassid; + { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + rt->dst.tclassid = nh->nh_tclassid; + } #endif - rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws); + rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr, do_cache); else if (do_cache) - cached = rt_cache_route(nh, rt); + cached = rt_cache_route(nhc, rt); if (unlikely(!cached)) { /* Routes we intend to cache in nexthop exception or * FIB nexthop have the DST_NOCACHE bit clear. @@ -1756,7 +1756,6 @@ static int __mkroute_input(struct sk_buff *skb, struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; struct rtable *rth; - struct fib_nh *nh; int err; struct in_device *out_dev; bool do_cache; @@ -1804,13 +1803,12 @@ static int __mkroute_input(struct sk_buff *skb, } } - nh = container_of(nhc, struct fib_nh, nh_common); - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else - rth = rcu_dereference(nh->nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2105,10 +2103,8 @@ local_input: if (res->fi) { if (!itag) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - rth = rcu_dereference(nh->nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -2139,7 +2135,6 @@ local_input: if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { @@ -2148,8 +2143,7 @@ local_input: rth->dst.input = lwtunnel_input; } - nh = container_of(nhc, struct fib_nh, nh_common); - if (unlikely(!rt_cache_route(nh, rth))) + if (unlikely(!rt_cache_route(nhc, rth))) rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); @@ -2321,10 +2315,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache &= fi != NULL; if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct rtable __rcu **prth; - fnhe = find_exception(nh, fl4->daddr); + fnhe = find_exception(nhc, fl4->daddr); if (!do_cache) goto add; if (fnhe) { @@ -2337,7 +2330,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache = false; goto add; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c7ed2b6d5a1d..b50b1af1f530 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -29,6 +29,7 @@ #include <linux/icmpv6.h> #include <linux/mroute6.h> #include <linux/slab.h> +#include <linux/indirect_call_wrapper.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -47,6 +48,8 @@ #include <net/inet_ecn.h> #include <net/dst_metadata.h> +INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -57,7 +60,8 @@ static void ip6_rcv_finish_core(struct net *net, struct sock *sk, ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - edemux(skb); + INDIRECT_CALL_2(edemux, tcp_v6_early_demux, + udp_v6_early_demux, skb); } if (!skb_valid_dst(skb)) ip6_route_input(skb); @@ -316,6 +320,9 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, ip6_sublist_rcv(&sublist, curr_dev, curr_net); } +INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); + /* * Deliver the packet to the host */ @@ -391,7 +398,8 @@ resubmit_final: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(skb); + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, + skb); if (ret > 0) { if (ipprot->flags & INET6_PROTO_FINAL) { /* Not an extension header, most likely UDP diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 82018bdce863..beaf28456301 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -43,6 +43,7 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> +#include <linux/indirect_call_wrapper.h> #include <net/tcp.h> #include <net/ndisc.h> @@ -1435,7 +1436,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } -static int tcp_v6_rcv(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); @@ -1654,7 +1655,7 @@ do_time_wait: goto discard_it; } -static void tcp_v6_early_demux(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2464fba569b4..07fa579dfb96 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -36,6 +36,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/indirect_call_wrapper.h> #include <net/addrconf.h> #include <net/ndisc.h> @@ -980,7 +981,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -static void udp_v6_early_demux(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1021,7 +1022,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) } } -static __inline__ int udpv6_rcv(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); } diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index f2681ec5b5f6..dbec6fca0d9e 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -28,11 +28,13 @@ static unsigned int master_timeout __read_mostly = 300; static char *ts_algo = "kmp"; +#define HELPER_NAME "amanda" + MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); MODULE_DESCRIPTION("Amanda connection tracking module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_amanda"); -MODULE_ALIAS_NFCT_HELPER("amanda"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); module_param(master_timeout, uint, 0600); MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); @@ -179,13 +181,14 @@ static const struct nf_conntrack_expect_policy amanda_exp_policy = { static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { { - .name = "amanda", + .name = HELPER_NAME, .me = THIS_MODULE, .help = amanda_help, .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = cpu_to_be16(10080), .tuple.dst.protonum = IPPROTO_UDP, .expect_policy = &amanda_exp_policy, + .nat_mod_name = NF_NAT_HELPER_NAME(HELPER_NAME), }, { .name = "amanda", @@ -195,6 +198,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .tuple.src.u.udp.port = cpu_to_be16(10080), .tuple.dst.protonum = IPPROTO_UDP, .expect_policy = &amanda_exp_policy, + .nat_mod_name = NF_NAT_HELPER_NAME(HELPER_NAME), }, }; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index a11c304fb771..32aeac1c4760 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -29,11 +29,13 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/nf_conntrack_ftp.h> +#define HELPER_NAME "ftp" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp connection tracking helper"); MODULE_ALIAS("ip_conntrack_ftp"); -MODULE_ALIAS_NFCT_HELPER("ftp"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); /* This is slow, but it's simple. --RR */ static char *ftp_buffer; @@ -588,12 +590,14 @@ static int __init nf_conntrack_ftp_init(void) /* FIXME should be configurable whether IPv4 and IPv6 FTP connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp", - FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); - nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp", - FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); + nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, + HELPER_NAME, FTP_PORT, ports[i], ports[i], + &ftp_exp_policy, 0, help, + nf_ct_ftp_from_nlattr, THIS_MODULE); + nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, + HELPER_NAME, FTP_PORT, ports[i], ports[i], + &ftp_exp_policy, 0, help, + nf_ct_ftp_from_nlattr, THIS_MODULE); } ret = nf_conntrack_helpers_register(ftp, ports_c * 2); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 274baf1dab87..918df7f71c8f 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -42,6 +42,9 @@ module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); MODULE_PARM_DESC(nf_conntrack_helper, "Enable automatic conntrack helper assignment (default 0)"); +static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); +static struct list_head nf_ct_nat_helpers __read_mostly; + /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) @@ -130,6 +133,70 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); +static struct nf_conntrack_nat_helper * +nf_conntrack_nat_helper_find(const char *mod_name) +{ + struct nf_conntrack_nat_helper *cur; + bool found = false; + + list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) { + if (!strcmp(cur->mod_name, mod_name)) { + found = true; + break; + } + } + return found ? cur : NULL; +} + +int +nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum) +{ + struct nf_conntrack_helper *h; + struct nf_conntrack_nat_helper *nat; + char mod_name[NF_CT_HELPER_NAME_LEN]; + int ret = 0; + + rcu_read_lock(); + h = __nf_conntrack_helper_find(name, l3num, protonum); + if (!h) { + rcu_read_unlock(); + return -ENOENT; + } + + nat = nf_conntrack_nat_helper_find(h->nat_mod_name); + if (!nat) { + snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name); + rcu_read_unlock(); + request_module(mod_name); + + rcu_read_lock(); + nat = nf_conntrack_nat_helper_find(mod_name); + if (!nat) { + rcu_read_unlock(); + return -ENOENT; + } + } + + if (!try_module_get(nat->module)) + ret = -ENOENT; + + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get); + +void nf_nat_helper_put(struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_nat_helper *nat; + + nat = nf_conntrack_nat_helper_find(helper->nat_mod_name); + if (WARN_ON_ONCE(!nat)) + return; + + module_put(nat->module); +} +EXPORT_SYMBOL_GPL(nf_nat_helper_put); + struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { @@ -430,6 +497,8 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; + snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name), + NF_NAT_HELPER_PREFIX "%s", name); if (spec_port == default_port) snprintf(helper->name, sizeof(helper->name), "%s", name); @@ -466,6 +535,22 @@ void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); +void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat) +{ + mutex_lock(&nf_ct_nat_helpers_mutex); + list_add_rcu(&nat->list, &nf_ct_nat_helpers); + mutex_unlock(&nf_ct_nat_helpers_mutex); +} +EXPORT_SYMBOL_GPL(nf_nat_helper_register); + +void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) +{ + mutex_lock(&nf_ct_nat_helpers_mutex); + list_del_rcu(&nat->list); + mutex_unlock(&nf_ct_nat_helpers_mutex); +} +EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); + static const struct nf_ct_ext_type helper_extend = { .len = sizeof(struct nf_conn_help), .align = __alignof__(struct nf_conn_help), @@ -493,6 +578,7 @@ int nf_conntrack_helper_init(void) goto out_extend; } + INIT_LIST_HEAD(&nf_ct_nat_helpers); return 0; out_extend: kvfree(nf_ct_helper_hash); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 4099f4d79bae..79e5014b3b0d 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -42,11 +42,13 @@ unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_irc_hook); +#define HELPER_NAME "irc" + MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_irc"); -MODULE_ALIAS_NFCT_HELPER("irc"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); @@ -259,7 +261,7 @@ static int __init nf_conntrack_irc_init(void) ports[ports_c++] = IRC_PORT; for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc", + nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, HELPER_NAME, IRC_PORT, ports[i], i, &irc_exp_policy, 0, help, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 5072ff96ab33..83306648dd0f 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -30,10 +30,12 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_sane.h> +#define HELPER_NAME "sane" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Schmidt <mschmidt@redhat.com>"); MODULE_DESCRIPTION("SANE connection tracking helper"); -MODULE_ALIAS_NFCT_HELPER("sane"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); static char *sane_buffer; @@ -195,12 +197,12 @@ static int __init nf_conntrack_sane_init(void) /* FIXME should be configurable whether IPv4 and IPv6 connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane", - SANE_PORT, ports[i], ports[i], + nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, + HELPER_NAME, SANE_PORT, ports[i], ports[i], &sane_exp_policy, 0, help, NULL, THIS_MODULE); - nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane", - SANE_PORT, ports[i], ports[i], + nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, + HELPER_NAME, SANE_PORT, ports[i], ports[i], &sane_exp_policy, 0, help, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d5454d1031a3..c30c883c370b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -30,11 +30,13 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_conntrack_sip.h> +#define HELPER_NAME "sip" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); MODULE_DESCRIPTION("SIP connection tracking helper"); MODULE_ALIAS("ip_conntrack_sip"); -MODULE_ALIAS_NFCT_HELPER("sip"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; @@ -1669,21 +1671,21 @@ static int __init nf_conntrack_sip_init(void) ports[ports_c++] = SIP_PORT; for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_udp, + nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); - nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_tcp, + nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); - nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_udp, + nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); - nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip", - SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, sip_help_tcp, + nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, + HELPER_NAME, SIP_PORT, ports[i], i, + sip_exp_policy, SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index c2ae14c720b4..e0d392cb3075 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -511,6 +511,8 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) /* Log invalid packets of a given protocol */ static int log_invalid_proto_min __read_mostly; static int log_invalid_proto_max __read_mostly = 255; +static int zero; +static int one = 1; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; @@ -624,9 +626,11 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_CHECKSUM] = { .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", @@ -647,33 +651,41 @@ static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_ACCT] = { .procname = "nf_conntrack_acct", .data = &init_net.ct.sysctl_acct, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_HELPER] = { .procname = "nf_conntrack_helper", .data = &init_net.ct.sysctl_auto_assign_helper, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #ifdef CONFIG_NF_CONNTRACK_EVENTS [NF_SYSCTL_CT_EVENTS] = { .procname = "nf_conntrack_events", .data = &init_net.ct.sysctl_events, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP [NF_SYSCTL_CT_TIMESTAMP] = { .procname = "nf_conntrack_timestamp", .data = &init_net.ct.sysctl_tstamp, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { @@ -744,15 +756,19 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { .procname = "nf_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { .procname = "nf_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", @@ -887,7 +903,9 @@ static struct ctl_table nf_ct_sysctl_table[] = { .procname = "nf_conntrack_dccp_loose", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, #endif #ifdef CONFIG_NF_CT_PROTO_GRE diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 548b673b3625..6977cb91ae9a 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -20,11 +20,13 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <linux/netfilter/nf_conntrack_tftp.h> +#define HELPER_NAME "tftp" + MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); MODULE_DESCRIPTION("TFTP connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_tftp"); -MODULE_ALIAS_NFCT_HELPER("tftp"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; @@ -119,12 +121,14 @@ static int __init nf_conntrack_tftp_init(void) ports[ports_c++] = TFTP_PORT; for (i = 0; i < ports_c; i++) { - nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp", - TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, tftp_help, NULL, THIS_MODULE); - nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp", - TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, tftp_help, NULL, THIS_MODULE); + nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, + HELPER_NAME, TFTP_PORT, ports[i], i, + &tftp_exp_policy, 0, tftp_help, NULL, + THIS_MODULE); + nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, + HELPER_NAME, TFTP_PORT, ports[i], i, + &tftp_exp_policy, 0, tftp_help, NULL, + THIS_MODULE); } ret = nf_conntrack_helpers_register(tftp, ports_c * 2); diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index e4d61a7a5258..4e59416ea709 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -19,10 +19,15 @@ #include <net/netfilter/nf_nat_helper.h> #include <linux/netfilter/nf_conntrack_amanda.h> +#define NAT_HELPER_NAME "amanda" + MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); MODULE_DESCRIPTION("Amanda NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_amanda"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); + +static struct nf_conntrack_nat_helper nat_helper_amanda = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, @@ -74,6 +79,7 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_amanda_fini(void) { + nf_nat_helper_unregister(&nat_helper_amanda); RCU_INIT_POINTER(nf_nat_amanda_hook, NULL); synchronize_rcu(); } @@ -81,6 +87,7 @@ static void __exit nf_nat_amanda_fini(void) static int __init nf_nat_amanda_init(void) { BUG_ON(nf_nat_amanda_hook != NULL); + nf_nat_helper_register(&nat_helper_amanda); RCU_INIT_POINTER(nf_nat_amanda_hook, help); return 0; } diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index 5063cbf1689c..0ea6b1bc52de 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -21,13 +21,18 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_ftp.h> +#define NAT_HELPER_NAME "ftp" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp NAT helper"); -MODULE_ALIAS("ip_nat_ftp"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); /* FIXME: Time out? --RR */ +static struct nf_conntrack_nat_helper nat_helper_ftp = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); + static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type, char *buffer, size_t buflen, union nf_inet_addr *addr, u16 port) @@ -124,6 +129,7 @@ out: static void __exit nf_nat_ftp_fini(void) { + nf_nat_helper_unregister(&nat_helper_ftp); RCU_INIT_POINTER(nf_nat_ftp_hook, NULL); synchronize_rcu(); } @@ -131,6 +137,7 @@ static void __exit nf_nat_ftp_fini(void) static int __init nf_nat_ftp_init(void) { BUG_ON(nf_nat_ftp_hook != NULL); + nf_nat_helper_register(&nat_helper_ftp); RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp); return 0; } diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 3aa35a43100d..d87cbe5e03ec 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -23,10 +23,15 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <linux/netfilter/nf_conntrack_irc.h> +#define NAT_HELPER_NAME "irc" + MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_irc"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); + +static struct nf_conntrack_nat_helper nat_helper_irc = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, @@ -96,6 +101,7 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_irc_fini(void) { + nf_nat_helper_unregister(&nat_helper_irc); RCU_INIT_POINTER(nf_nat_irc_hook, NULL); synchronize_rcu(); } @@ -103,6 +109,7 @@ static void __exit nf_nat_irc_fini(void) static int __init nf_nat_irc_init(void) { BUG_ON(nf_nat_irc_hook != NULL); + nf_nat_helper_register(&nat_helper_irc); RCU_INIT_POINTER(nf_nat_irc_hook, help); return 0; } diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index aa1be643d7a0..464387b3600f 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -24,11 +24,15 @@ #include <net/netfilter/nf_conntrack_seqadj.h> #include <linux/netfilter/nf_conntrack_sip.h> +#define NAT_HELPER_NAME "sip" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); MODULE_DESCRIPTION("SIP NAT helper"); -MODULE_ALIAS("ip_nat_sip"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); +static struct nf_conntrack_nat_helper nat_helper_sip = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff, unsigned int dataoff, @@ -656,8 +660,8 @@ static struct nf_ct_helper_expectfn sip_nat = { static void __exit nf_nat_sip_fini(void) { + nf_nat_helper_unregister(&nat_helper_sip); RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); - nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } @@ -675,6 +679,7 @@ static const struct nf_nat_sip_hooks sip_hooks = { static int __init nf_nat_sip_init(void) { BUG_ON(nf_nat_sip_hooks != NULL); + nf_nat_helper_register(&nat_helper_sip); RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks); nf_ct_helper_expectfn_register(&sip_nat); return 0; diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c index 7f67e1d5310d..e633b3863e33 100644 --- a/net/netfilter/nf_nat_tftp.c +++ b/net/netfilter/nf_nat_tftp.c @@ -13,10 +13,15 @@ #include <net/netfilter/nf_nat_helper.h> #include <linux/netfilter/nf_conntrack_tftp.h> +#define NAT_HELPER_NAME "tftp" + MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); MODULE_DESCRIPTION("TFTP NAT helper"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat_tftp"); +MODULE_ALIAS_NF_NAT_HELPER(NAT_HELPER_NAME); + +static struct nf_conntrack_nat_helper nat_helper_tftp = + NF_CT_NAT_HELPER_INIT(NAT_HELPER_NAME); static unsigned int help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, @@ -37,6 +42,7 @@ static unsigned int help(struct sk_buff *skb, static void __exit nf_nat_tftp_fini(void) { + nf_nat_helper_unregister(&nat_helper_tftp); RCU_INIT_POINTER(nf_nat_tftp_hook, NULL); synchronize_rcu(); } @@ -44,6 +50,7 @@ static void __exit nf_nat_tftp_fini(void) static int __init nf_nat_tftp_init(void) { BUG_ON(nf_nat_tftp_hook != NULL); + nf_nat_helper_register(&nat_helper_tftp); RCU_INIT_POINTER(nf_nat_tftp_hook, help); return 0; } diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c index 814789644bd3..a9fce8d10051 100644 --- a/net/netfilter/nf_tables_set_core.c +++ b/net/netfilter/nf_tables_set_core.c @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/module.h> #include <net/netfilter/nf_tables_core.h> static int __init nf_tables_set_module_init(void) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index b422b74bfe08..f043936763f3 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -178,6 +178,11 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; } #endif + case NFT_CT_ID: + if (!nf_ct_is_confirmed(ct)) + goto err; + *dest = nf_ct_get_id(ct); + return; default: break; } @@ -479,6 +484,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, len = sizeof(u16); break; #endif + case NFT_CT_ID: + len = sizeof(u32); + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index e461007558e8..8394560aa695 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -28,6 +28,23 @@ struct nft_dynset { struct nft_set_binding binding; }; +static int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) +{ + int err; + + if (src->ops->clone) { + dst->ops = src->ops; + err = src->ops->clone(dst, src); + if (err < 0) + return err; + } else { + memcpy(dst, src, src->ops->size); + } + + __module_get(src->ops->type->owner); + return 0; +} + static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, struct nft_regs *regs) { diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 4fa4efd24353..893374ac3758 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -15,7 +15,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); -MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); +MODULE_DESCRIPTION("Xtables: add/match connection tracking labels"); MODULE_ALIAS("ipt_connlabel"); MODULE_ALIAS("ip6t_connlabel"); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 8d86e39d6280..a30536b17ee1 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -288,8 +288,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, size = 16; } /* FIXME: don't use vmalloc() here or anywhere else -HW */ - hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) + - sizeof(struct hlist_head) * size); + hinfo = vmalloc(struct_size(hinfo, hash, size)); if (hinfo == NULL) return -ENOMEM; *out_hinfo = hinfo; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index bded32144619..333ec5f298fe 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1307,6 +1307,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, { struct nf_conntrack_helper *helper; struct nf_conn_help *help; + int ret = 0; helper = nf_conntrack_helper_try_module_get(name, info->family, key->ip.proto); @@ -1321,13 +1322,21 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, return -ENOMEM; } +#ifdef CONFIG_NF_NAT_NEEDED + if (info->nat) { + ret = nf_nat_helper_try_module_get(name, info->family, + key->ip.proto); + if (ret) { + nf_conntrack_helper_put(helper); + OVS_NLERR(log, "Failed to load \"%s\" NAT helper, error: %d", + name, ret); + return ret; + } + } +#endif rcu_assign_pointer(help->helper, helper); info->helper = helper; - - if (info->nat) - request_module("ip_nat_%s", name); - - return 0; + return ret; } #if IS_ENABLED(CONFIG_NF_NAT) @@ -1801,8 +1810,13 @@ void ovs_ct_free_action(const struct nlattr *a) static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) { - if (ct_info->helper) + if (ct_info->helper) { +#ifdef CONFIG_NF_NAT_NEEDED + if (ct_info->nat) + nf_nat_helper_put(ct_info->helper); +#endif nf_conntrack_helper_put(ct_info->helper); + } if (ct_info->ct) { if (ct_info->timeout[0]) nf_ct_destroy_timeout(ct_info->ct); @@ -2161,6 +2175,10 @@ static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(reply); nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); + if (!nla_reply) { + err = -EMSGSIZE; + goto exit_err; + } if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { err = ovs_ct_limit_get_zone_limit( diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b48e40c69ad0..61731944742a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,42 +22,7 @@ #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> - -struct tcf_police_params { - int tcfp_result; - u32 tcfp_ewma_rate; - s64 tcfp_burst; - u32 tcfp_mtu; - s64 tcfp_mtu_ptoks; - struct psched_ratecfg rate; - bool rate_present; - struct psched_ratecfg peak; - bool peak_present; - struct rcu_head rcu; -}; - -struct tcf_police { - struct tc_action common; - struct tcf_police_params __rcu *params; - - spinlock_t tcfp_lock ____cacheline_aligned_in_smp; - s64 tcfp_toks; - s64 tcfp_ptoks; - s64 tcfp_t_c; -}; - -#define to_police(pc) ((struct tcf_police *)pc) - -/* old policer structure from before tc actions */ -struct tc_police_compat { - u32 index; - int action; - u32 limit; - u32 burst; - u32 mtu; - struct tc_ratespec rate; - struct tc_ratespec peakrate; -}; +#include <net/tc_act/tc_police.h> /* Each policer is serialized by its individual spinlock */ @@ -317,6 +282,20 @@ static void tcf_police_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } +static void tcf_police_stats_update(struct tc_action *a, + u64 bytes, u32 packets, + u64 lastuse, bool hw) +{ + struct tcf_police *police = to_police(a); + struct tcf_t *tm = &police->tcf_tm; + + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + if (hw) + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { @@ -380,6 +359,7 @@ static struct tc_action_ops act_police_ops = { .kind = "police", .id = TCA_ID_POLICE, .owner = THIS_MODULE, + .stats_update = tcf_police_stats_update, .act = tcf_police_act, .dump = tcf_police_dump, .init = tcf_police_init, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 263c2ec082c9..d4699156974a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -37,6 +37,8 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_police.h> +#include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -3257,6 +3259,18 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_skbedit_mark(act)) { entry->id = FLOW_ACTION_MARK; entry->mark = tcf_skbedit_mark(act); + } else if (is_tcf_sample(act)) { + entry->id = FLOW_ACTION_SAMPLE; + entry->sample.psample_group = + tcf_sample_psample_group(act); + entry->sample.trunc_size = tcf_sample_trunc_size(act); + entry->sample.truncate = tcf_sample_truncate(act); + entry->sample.rate = tcf_sample_rate(act); + } else if (is_tcf_police(act)) { + entry->id = FLOW_ACTION_POLICE; + entry->police.burst = tcf_police_tcfp_burst(act); + entry->police.rate_bytes_ps = + tcf_police_rate_bytes_ps(act); } else { goto err_out; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 9bcf499cce0c..ce7ff286ccb8 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -157,7 +157,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, skip_sw = prog && tc_skip_sw(prog->gen_flags); obj = prog ?: oldprog; - tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, + tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, block, extack); cls_bpf.command = TC_CLSBPF_OFFLOAD; cls_bpf.exts = &obj->exts; @@ -227,7 +227,8 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, struct tcf_block *block = tp->chain->block; struct tc_cls_bpf_offload cls_bpf = {}; - tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL); + tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, block, + NULL); cls_bpf.command = TC_CLSBPF_STATS; cls_bpf.exts = &prog->exts; cls_bpf.prog = prog->filter; @@ -669,7 +670,7 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, continue; tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, - extack); + block, extack); cls_bpf.command = TC_CLSBPF_OFFLOAD; cls_bpf.exts = &prog->exts; cls_bpf.prog = add ? prog->filter : NULL; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6685fc53119..3cb372b0e933 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -389,7 +389,8 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, if (!rtnl_held) rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, block, + extack); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; @@ -422,7 +423,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, goto errout; } - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, block, + extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; cls_flower.rule->match.dissector = &f->mask->dissector; @@ -478,7 +480,8 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, if (!rtnl_held) rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, block, + NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.classid = f->res.classid; @@ -1757,7 +1760,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, } tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, - extack); + block, extack); cls_flower.command = add ? TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long)f; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 46982b4ea70a..820938fa09ed 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -71,7 +71,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, block, + extack); cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; @@ -89,12 +90,30 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(head->flags); int err; - tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); + cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts)); + if (!cls_mall.rule) + return -ENOMEM; + + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, block, + extack); cls_mall.command = TC_CLSMATCHALL_REPLACE; - cls_mall.exts = &head->exts; cls_mall.cookie = cookie; + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + if (err) { + kfree(cls_mall.rule); + mall_destroy_hw_filter(tp, head, cookie, NULL); + if (skip_sw) + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + else + err = 0; + + return err; + } + err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + kfree(cls_mall.rule); + if (err < 0) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; @@ -272,13 +291,28 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, if (tc_skip_hw(head->flags)) return 0; - tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); + cls_mall.rule = flow_rule_alloc(tcf_exts_num_actions(&head->exts)); + if (!cls_mall.rule) + return -ENOMEM; + + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, block, + extack); cls_mall.command = add ? TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; - cls_mall.exts = &head->exts; cls_mall.cookie = (unsigned long)head; + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + if (err) { + kfree(cls_mall.rule); + if (add && tc_skip_sw(head->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + } + err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + kfree(cls_mall.rule); + if (err) { if (add && tc_skip_sw(head->flags)) return err; @@ -290,6 +324,24 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, return 0; } +static void mall_stats_hw_filter(struct tcf_proto *tp, + struct cls_mall_head *head, + unsigned long cookie) +{ + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, block, + NULL); + cls_mall.command = TC_CLSMATCHALL_STATS; + cls_mall.cookie = cookie; + + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + + tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, + cls_mall.stats.pkts, cls_mall.stats.lastused); +} + static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { @@ -301,6 +353,9 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, if (!head) return skb->len; + if (!tc_skip_hw(head->flags)) + mall_stats_hw_filter(tp, head, (unsigned long)head); + t->tcm_handle = head->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4b8710a266cc..2feed0ffa269 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -485,7 +485,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, block, + extack); cls_u32.command = TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -503,7 +504,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, bool offloaded = false; int err; - tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, block, extack); cls_u32.command = TC_CLSU32_NEW_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -529,7 +530,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, block, + extack); cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; @@ -546,7 +548,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, bool skip_sw = tc_skip_sw(flags); int err; - tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, block, extack); cls_u32.command = TC_CLSU32_REPLACE_KNODE; cls_u32.knode.handle = n->handle; cls_u32.knode.fshift = n->fshift; @@ -1170,10 +1172,12 @@ static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { + struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; int err; - tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, block, + extack); cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = ht->divisor; cls_u32.hnode.handle = ht->handle; @@ -1195,7 +1199,8 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, struct tc_cls_u32_offload cls_u32 = {}; int err; - tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, block, + extack); cls_u32.command = add ? TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d27d9bc9d010..909370049fca 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -165,7 +165,8 @@ struct htb_sched { /* non shaped skbs; let them go directly thru */ struct qdisc_skb_head direct_queue; - long direct_pkts; + u32 direct_pkts; + u32 overlimits; struct qdisc_watchdog watchdog; @@ -533,8 +534,10 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; - if (new_mode == HTB_CANT_SEND) + if (new_mode == HTB_CANT_SEND) { cl->overlimits++; + q->overlimits++; + } if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) @@ -937,7 +940,6 @@ ok: goto ok; } } - qdisc_qstats_overlimit(sch); if (likely(next_event > q->now)) qdisc_watchdog_schedule_ns(&q->watchdog, next_event); else @@ -1048,6 +1050,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; + sch->qstats.overlimits = q->overlimits; /* Its safe to not acquire qdisc lock. As we hold RTNL, * no change can happen on the qdisc parameters. */ diff --git a/net/socket.c b/net/socket.c index a180e1a9ff23..472fbefa5d9b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -90,6 +90,7 @@ #include <linux/slab.h> #include <linux/xattr.h> #include <linux/nospec.h> +#include <linux/indirect_call_wrapper.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -108,6 +109,13 @@ #include <net/busy_poll.h> #include <linux/errqueue.h> +/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */ +#if IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__) +#endif + #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; @@ -645,10 +653,12 @@ EXPORT_SYMBOL(__sock_tx_timestamp); * Sends @msg through @sock, passing through LSM. * Returns the number of bytes sent, or an error code. */ - +INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, + size_t)); static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); + int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock, + msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -874,11 +884,13 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); * Receives @msg from @sock, passing through LSM. Returns the total number * of bytes received, or an error. */ - +INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, + size_t , int )); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { - return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); + return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg, + msg_data_left(msg), flags); } int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) |