diff options
Diffstat (limited to 'net')
224 files changed, 6126 insertions, 3895 deletions
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c index c40b98f7743c..868d28583c0a 100644 --- a/net/6lowpan/ndisc.c +++ b/net/6lowpan/ndisc.c @@ -20,9 +20,8 @@ static int lowpan_ndisc_parse_802154_options(const struct net_device *dev, switch (nd_opt->nd_opt_len) { case NDISC_802154_SHORT_ADDR_LENGTH: if (ndopts->nd_802154_opt_array[nd_opt->nd_opt_type]) - ND_PRINTK(2, warn, - "%s: duplicated short addr ND6 option found: type=%d\n", - __func__, nd_opt->nd_opt_type); + net_dbg_ratelimited("%s: duplicated short addr ND6 option found: type=%d\n", + __func__, nd_opt->nd_opt_type); else ndopts->nd_802154_opt_array[nd_opt->nd_opt_type] = nd_opt; return 1; @@ -63,8 +62,7 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_src_lladdr, IEEE802154_SHORT_ADDR_LEN, 0); if (!lladdr_short) { - ND_PRINTK(2, warn, - "NA: invalid short link-layer address length\n"); + net_dbg_ratelimited("NA: invalid short link-layer address length\n"); return; } } @@ -75,8 +73,7 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_tgt_lladdr, IEEE802154_SHORT_ADDR_LEN, 0); if (!lladdr_short) { - ND_PRINTK(2, warn, - "NA: invalid short link-layer address length\n"); + net_dbg_ratelimited("NA: invalid short link-layer address length\n"); return; } } @@ -209,9 +206,8 @@ static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net, sllao, tokenized, valid_lft, prefered_lft); if (err) - ND_PRINTK(2, warn, - "RA: could not add a short address based address for prefix: %pI6c\n", - &pinfo->prefix); + net_dbg_ratelimited("RA: could not add a short address based address for prefix: %pI6c\n", + &pinfo->prefix); } } #endif diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 9c1241292d1d..01787fb6a7bc 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -181,7 +181,7 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), s->sk_state, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(s))); + from_kuid_munged(seq_user_ns(seq), sk_uid(s))); out: return 0; } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 6ad2f72f53f4..ee9bf84c88a7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -815,7 +815,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) refcount_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), - from_kuid(seq_user_ns(seq), sock_i_uid(sk)), + from_kuid(seq_user_ns(seq), sk_uid(sk)), sock_i_ino(sk), bt->parent ? sock_i_ino(bt->parent) : 0LU); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 3c2c98eecc62..34e89bb5f384 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -413,7 +413,7 @@ static int iso_connect_bis(struct sock *sk) sk->sk_state = BT_CONNECT; } else { sk->sk_state = BT_CONNECT; - iso_sock_set_timer(sk, sk->sk_sndtimeo); + iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } release_sock(sk); @@ -503,7 +503,7 @@ static int iso_connect_cis(struct sock *sk) sk->sk_state = BT_CONNECT; } else { sk->sk_state = BT_CONNECT; - iso_sock_set_timer(sk, sk->sk_sndtimeo); + iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } release_sock(sk); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5aa55fa69594..113656489db5 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -255,7 +255,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), &la.l2_bdaddr, la.l2_bdaddr_type, - sk->sk_sndtimeo); + READ_ONCE(sk->sk_sndtimeo)); if (err) return err; @@ -1725,7 +1725,7 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; - return sk->sk_sndtimeo; + return READ_ONCE(sk->sk_sndtimeo); } static struct pid *l2cap_sock_get_peer_pid_cb(struct l2cap_chan *chan) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 2945d27e75dc..d382d980fd9a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -338,7 +338,7 @@ static int sco_connect(struct sock *sk) hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, sco_pi(sk)->setting, &sco_pi(sk)->codec, - sk->sk_sndtimeo); + READ_ONCE(sk->sk_sndtimeo)); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto unlock; @@ -367,7 +367,7 @@ static int sco_connect(struct sock *sk) sk->sk_state = BT_CONNECTED; } else { sk->sk_state = BT_CONNECT; - sco_sock_set_timer(sk, sk->sk_sndtimeo); + sco_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo)); } release_sock(sk); diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 20139fa1be1f..06b604cf9d58 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -351,17 +351,154 @@ int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) return found; } +static int cfctrl_link_setup(struct cfctrl *cfctrl, struct cfpkt *pkt, u8 cmdrsp) +{ + u8 len; + u8 linkid = 0; + enum cfctrl_srv serv; + enum cfctrl_srv servtype; + u8 endpoint; + u8 physlinkid; + u8 prio; + u8 tmp; + u8 *cp; + int i; + struct cfctrl_link_param linkparam; + struct cfctrl_request_info rsp, *req; + + memset(&linkparam, 0, sizeof(linkparam)); + + tmp = cfpkt_extr_head_u8(pkt); + + serv = tmp & CFCTRL_SRV_MASK; + linkparam.linktype = serv; + + servtype = tmp >> 4; + linkparam.chtype = servtype; + + tmp = cfpkt_extr_head_u8(pkt); + physlinkid = tmp & 0x07; + prio = tmp >> 3; + + linkparam.priority = prio; + linkparam.phyid = physlinkid; + endpoint = cfpkt_extr_head_u8(pkt); + linkparam.endpoint = endpoint & 0x03; + + switch (serv) { + case CFCTRL_SRV_VEI: + case CFCTRL_SRV_DBG: + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + break; + case CFCTRL_SRV_VIDEO: + tmp = cfpkt_extr_head_u8(pkt); + linkparam.u.video.connid = tmp; + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + break; + + case CFCTRL_SRV_DATAGRAM: + linkparam.u.datagram.connid = cfpkt_extr_head_u32(pkt); + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + break; + case CFCTRL_SRV_RFM: + /* Construct a frame, convert + * DatagramConnectionID + * to network format long and copy it out... + */ + linkparam.u.rfm.connid = cfpkt_extr_head_u32(pkt); + cp = (u8 *) linkparam.u.rfm.volume; + for (tmp = cfpkt_extr_head_u8(pkt); + cfpkt_more(pkt) && tmp != '\0'; + tmp = cfpkt_extr_head_u8(pkt)) + *cp++ = tmp; + *cp = '\0'; + + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + + break; + case CFCTRL_SRV_UTIL: + /* Construct a frame, convert + * DatagramConnectionID + * to network format long and copy it out... + */ + /* Fifosize KB */ + linkparam.u.utility.fifosize_kb = cfpkt_extr_head_u16(pkt); + /* Fifosize bufs */ + linkparam.u.utility.fifosize_bufs = cfpkt_extr_head_u16(pkt); + /* name */ + cp = (u8 *) linkparam.u.utility.name; + caif_assert(sizeof(linkparam.u.utility.name) + >= UTILITY_NAME_LENGTH); + for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) { + tmp = cfpkt_extr_head_u8(pkt); + *cp++ = tmp; + } + /* Length */ + len = cfpkt_extr_head_u8(pkt); + linkparam.u.utility.paramlen = len; + /* Param Data */ + cp = linkparam.u.utility.params; + while (cfpkt_more(pkt) && len--) { + tmp = cfpkt_extr_head_u8(pkt); + *cp++ = tmp; + } + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + /* Length */ + len = cfpkt_extr_head_u8(pkt); + /* Param Data */ + cfpkt_extr_head(pkt, NULL, len); + break; + default: + pr_warn("Request setup, invalid type (%d)\n", serv); + return -1; + } + + rsp.cmd = CFCTRL_CMD_LINK_SETUP; + rsp.param = linkparam; + spin_lock_bh(&cfctrl->info_list_lock); + req = cfctrl_remove_req(cfctrl, &rsp); + + if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || + cfpkt_erroneous(pkt)) { + pr_err("Invalid O/E bit or parse error " + "on CAIF control channel\n"); + cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 0, + req ? req->client_layer : NULL); + } else { + cfctrl->res.linksetup_rsp(cfctrl->serv.layer.up, linkid, + serv, physlinkid, + req ? req->client_layer : NULL); + } + + kfree(req); + + spin_unlock_bh(&cfctrl->info_list_lock); + + return 0; +} + static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) { u8 cmdrsp; u8 cmd; - int ret = -1; - u8 len; - u8 param[255]; + int ret = 0; u8 linkid = 0; struct cfctrl *cfctrl = container_obj(layer); - struct cfctrl_request_info rsp, *req; - cmdrsp = cfpkt_extr_head_u8(pkt); cmd = cmdrsp & CFCTRL_CMD_MASK; @@ -374,150 +511,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) switch (cmd) { case CFCTRL_CMD_LINK_SETUP: - { - enum cfctrl_srv serv; - enum cfctrl_srv servtype; - u8 endpoint; - u8 physlinkid; - u8 prio; - u8 tmp; - u8 *cp; - int i; - struct cfctrl_link_param linkparam; - memset(&linkparam, 0, sizeof(linkparam)); - - tmp = cfpkt_extr_head_u8(pkt); - - serv = tmp & CFCTRL_SRV_MASK; - linkparam.linktype = serv; - - servtype = tmp >> 4; - linkparam.chtype = servtype; - - tmp = cfpkt_extr_head_u8(pkt); - physlinkid = tmp & 0x07; - prio = tmp >> 3; - - linkparam.priority = prio; - linkparam.phyid = physlinkid; - endpoint = cfpkt_extr_head_u8(pkt); - linkparam.endpoint = endpoint & 0x03; - - switch (serv) { - case CFCTRL_SRV_VEI: - case CFCTRL_SRV_DBG: - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - case CFCTRL_SRV_VIDEO: - tmp = cfpkt_extr_head_u8(pkt); - linkparam.u.video.connid = tmp; - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - - case CFCTRL_SRV_DATAGRAM: - linkparam.u.datagram.connid = - cfpkt_extr_head_u32(pkt); - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - case CFCTRL_SRV_RFM: - /* Construct a frame, convert - * DatagramConnectionID - * to network format long and copy it out... - */ - linkparam.u.rfm.connid = - cfpkt_extr_head_u32(pkt); - cp = (u8 *) linkparam.u.rfm.volume; - for (tmp = cfpkt_extr_head_u8(pkt); - cfpkt_more(pkt) && tmp != '\0'; - tmp = cfpkt_extr_head_u8(pkt)) - *cp++ = tmp; - *cp = '\0'; - - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - - break; - case CFCTRL_SRV_UTIL: - /* Construct a frame, convert - * DatagramConnectionID - * to network format long and copy it out... - */ - /* Fifosize KB */ - linkparam.u.utility.fifosize_kb = - cfpkt_extr_head_u16(pkt); - /* Fifosize bufs */ - linkparam.u.utility.fifosize_bufs = - cfpkt_extr_head_u16(pkt); - /* name */ - cp = (u8 *) linkparam.u.utility.name; - caif_assert(sizeof(linkparam.u.utility.name) - >= UTILITY_NAME_LENGTH); - for (i = 0; - i < UTILITY_NAME_LENGTH - && cfpkt_more(pkt); i++) { - tmp = cfpkt_extr_head_u8(pkt); - *cp++ = tmp; - } - /* Length */ - len = cfpkt_extr_head_u8(pkt); - linkparam.u.utility.paramlen = len; - /* Param Data */ - cp = linkparam.u.utility.params; - while (cfpkt_more(pkt) && len--) { - tmp = cfpkt_extr_head_u8(pkt); - *cp++ = tmp; - } - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - /* Length */ - len = cfpkt_extr_head_u8(pkt); - /* Param Data */ - cfpkt_extr_head(pkt, ¶m, len); - break; - default: - pr_warn("Request setup, invalid type (%d)\n", - serv); - goto error; - } - - rsp.cmd = cmd; - rsp.param = linkparam; - spin_lock_bh(&cfctrl->info_list_lock); - req = cfctrl_remove_req(cfctrl, &rsp); - - if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || - cfpkt_erroneous(pkt)) { - pr_err("Invalid O/E bit or parse error " - "on CAIF control channel\n"); - cfctrl->res.reject_rsp(cfctrl->serv.layer.up, - 0, - req ? req->client_layer - : NULL); - } else { - cfctrl->res.linksetup_rsp(cfctrl->serv. - layer.up, linkid, - serv, physlinkid, - req ? req-> - client_layer : NULL); - } - - kfree(req); - - spin_unlock_bh(&cfctrl->info_list_lock); - } + ret = cfctrl_link_setup(cfctrl, pkt, cmdrsp); break; case CFCTRL_CMD_LINK_DESTROY: linkid = cfpkt_extr_head_u8(pkt); @@ -544,9 +538,9 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) break; default: pr_err("Unrecognized Control Frame\n"); + ret = -1; goto error; } - ret = 0; error: cfpkt_destroy(pkt); return ret; diff --git a/net/can/af_can.c b/net/can/af_can.c index 4aab7033c933..b2387a46794a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -683,7 +683,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_CAN_RX_INVALID_FRAME); return NET_RX_DROP; } @@ -698,7 +698,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_CANFD_RX_INVALID_FRAME); return NET_RX_DROP; } @@ -713,7 +713,7 @@ static int canxl_rcv(struct sk_buff *skb, struct net_device *dev, pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n", dev->type, skb->len); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_CANXL_RX_INVALID_FRAME); return NET_RX_DROP; } diff --git a/net/can/bcm.c b/net/can/bcm.c index 6bc1cc4c94c5..5e690a2377e4 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -359,6 +359,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, unsigned int datalen = head->nframes * op->cfsiz; int err; unsigned int *pflags; + enum skb_drop_reason reason; skb = alloc_skb(sizeof(*head) + datalen, gfp_any()); if (!skb) @@ -413,11 +414,11 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, addr->can_family = AF_CAN; addr->can_ifindex = op->rx_ifindex; - err = sock_queue_rcv_skb(sk, skb); + err = sock_queue_rcv_skb_reason(sk, skb, &reason); if (err < 0) { struct bcm_sock *bo = bcm_sk(sk); - kfree_skb(skb); + sk_skb_reason_drop(sk, skb, reason); /* don't care about overflows in this statistic */ bo->dropped_usr_msgs++; } diff --git a/net/can/isotp.c b/net/can/isotp.c index 1efa377f002e..dee1412b3c9c 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -278,6 +278,7 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus) static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk) { struct sockaddr_can *addr = (struct sockaddr_can *)skb->cb; + enum skb_drop_reason reason; BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can)); @@ -285,8 +286,8 @@ static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk) addr->can_family = AF_CAN; addr->can_ifindex = skb->dev->ifindex; - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + sk_skb_reason_drop(sk, skb, reason); } static u8 padlen(u8 datalen) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6fefe7a68761..3d8b588822f9 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -311,6 +311,7 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) { const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb); struct j1939_sk_buff_cb *skcb; + enum skb_drop_reason reason; struct sk_buff *skb; if (oskb->sk == &jsk->sk) @@ -331,8 +332,8 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) if (skb->sk) skcb->msg_flags |= MSG_DONTROUTE; - if (sock_queue_rcv_skb(&jsk->sk, skb) < 0) - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(&jsk->sk, skb, &reason) < 0) + sk_skb_reason_drop(&jsk->sk, skb, reason); } bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) diff --git a/net/can/raw.c b/net/can/raw.c index 020f21430b1d..76b867d21def 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -129,6 +129,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) { struct sock *sk = (struct sock *)data; struct raw_sock *ro = raw_sk(sk); + enum skb_drop_reason reason; struct sockaddr_can *addr; struct sk_buff *skb; unsigned int *pflags; @@ -205,8 +206,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (oskb->sk == sk) *pflags |= MSG_CONFIRM; - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); + if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + sk_skb_reason_drop(sk, skb, reason); } static int raw_enable_filters(struct net *net, struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index be97c440ecd5..e365b099484e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1267,33 +1267,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * __dev_get_by_flags - find any device with given flags - * @net: the applicable net namespace - * @if_flags: IFF_* values - * @mask: bitmask of bits in if_flags to check + * dev_get_by_flags_rcu - find any device with given flags + * @net: the applicable net namespace + * @if_flags: IFF_* values + * @mask: bitmask of bits in if_flags to check + * + * Search for any interface with the given flags. * - * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. Must be called inside - * rtnl_lock(), and result refcount is unchanged. + * Context: rcu_read_lock() must be held. + * Returns: NULL if a device is not found or a pointer to the device. */ - -struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags, - unsigned short mask) +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, + unsigned short mask) { - struct net_device *dev, *ret; - - ASSERT_RTNL(); + struct net_device *dev; - ret = NULL; - for_each_netdev(net, dev) { - if (((dev->flags ^ if_flags) & mask) == 0) { - ret = dev; - break; + for_each_netdev_rcu(net, dev) { + if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) { + dev_hold(dev); + return dev; } } - return ret; + + return NULL; } -EXPORT_SYMBOL(__dev_get_by_flags); +EXPORT_IPV6_MOD(dev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device @@ -3179,7 +3177,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) { - ASSERT_RTNL(); netdev_ops_assert_locked(dev); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, @@ -3229,7 +3226,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) return -EINVAL; if (dev->reg_state == NETREG_REGISTERED) { - ASSERT_RTNL(); netdev_ops_assert_locked(dev); rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, @@ -4028,7 +4024,10 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) unsigned int hdr_len; /* mac layer + network layer */ - hdr_len = skb_transport_offset(skb); + if (!skb->encapsulation) + hdr_len = skb_transport_offset(skb); + else + hdr_len = skb_inner_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { @@ -5939,8 +5938,6 @@ check_vlan_id: } if (pt_prev) { - if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) - goto drop; *ppt_prev = pt_prev; } else { drop: @@ -6926,6 +6923,43 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) return HRTIMER_NORESTART; } +static void napi_stop_kthread(struct napi_struct *napi) +{ + unsigned long val, new; + + /* Wait until the napi STATE_THREADED is unset. */ + while (true) { + val = READ_ONCE(napi->state); + + /* If napi kthread own this napi or the napi is idle, + * STATE_THREADED can be unset here. + */ + if ((val & NAPIF_STATE_SCHED_THREADED) || + !(val & NAPIF_STATE_SCHED)) { + new = val & (~NAPIF_STATE_THREADED); + } else { + msleep(20); + continue; + } + + if (try_cmpxchg(&napi->state, &val, new)) + break; + } + + /* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by + * the kthread. + */ + while (true) { + if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state)) + break; + + msleep(20); + } + + kthread_stop(napi->thread); + napi->thread = NULL; +} + int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6961,8 +6995,12 @@ int dev_set_threaded(struct net_device *dev, bool threaded) * softirq mode will happen in the next round of napi_schedule(). * This should not cause hiccups/stalls to the live traffic. */ - list_for_each_entry(napi, &dev->napi_list, dev_list) - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (!threaded && napi->thread) + napi_stop_kthread(napi); + else + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + } return err; } @@ -10730,12 +10768,14 @@ sync_lower: * *before* calling udp_tunnel_get_rx_info, * but *after* calling udp_tunnel_drop_rx_info. */ + udp_tunnel_nic_lock(dev); if (features & NETIF_F_RX_UDP_TUNNEL_PORT) { dev->features = features; udp_tunnel_get_rx_info(dev); } else { udp_tunnel_drop_rx_info(dev); } + udp_tunnel_nic_unlock(dev); } if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) { @@ -11715,7 +11755,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_len = sizeof_priv; - ref_tracker_dir_init(&dev->refcnt_tracker, 128, name); + ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev"); #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) @@ -11937,21 +11977,8 @@ static void netdev_rss_contexts_free(struct net_device *dev) mutex_lock(&dev->ethtool->rss_lock); xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { - struct ethtool_rxfh_param rxfh; - - rxfh.indir = ethtool_rxfh_context_indir(ctx); - rxfh.key = ethtool_rxfh_context_key(ctx); - rxfh.hfunc = ctx->hfunc; - rxfh.input_xfrm = ctx->input_xfrm; - rxfh.rss_context = context; - rxfh.rss_delete = true; - xa_erase(&dev->ethtool->rss_ctx, context); - if (dev->ethtool_ops->create_rxfh_context) - dev->ethtool_ops->remove_rxfh_context(dev, ctx, - context, NULL); - else - dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + dev->ethtool_ops->remove_rxfh_context(dev, ctx, context, NULL); kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); diff --git a/net/core/dst.c b/net/core/dst.c index 795ca07e28a4..e2de8b68c41d 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -145,12 +145,12 @@ void dst_dev_put(struct dst_entry *dst) { struct net_device *dev = dst->dev; - dst->obsolete = DST_OBSOLETE_DEAD; + WRITE_ONCE(dst->obsolete, DST_OBSOLETE_DEAD); if (dst->ops->ifdown) dst->ops->ifdown(dst, dev); - dst->input = dst_discard; - dst->output = dst_discard_out; - dst->dev = blackhole_netdev; + WRITE_ONCE(dst->input, dst_discard); + WRITE_ONCE(dst->output, dst_discard_out); + WRITE_ONCE(dst->dev, blackhole_netdev); netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, GFP_ATOMIC); } @@ -263,7 +263,7 @@ unsigned int dst_blackhole_mtu(const struct dst_entry *dst) { unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - return mtu ? : dst->dev->mtu; + return mtu ? : dst_dev(dst)->mtu; } EXPORT_SYMBOL_GPL(dst_blackhole_mtu); diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index 93a04d18e505..9ab4902324e1 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -52,7 +52,7 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache, if (unlikely(!time_after(idst->refresh_ts, READ_ONCE(dst_cache->reset_ts)) || - (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) { + (READ_ONCE(dst->obsolete) && !dst->ops->check(dst, idst->cookie)))) { dst_cache_per_cpu_dst_set(idst, NULL, 0); dst_release(dst); goto fail; diff --git a/net/core/hotdata.c b/net/core/hotdata.c index 0bc893d5f07b..95d0a4df1006 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -2,7 +2,9 @@ #include <linux/cache.h> #include <linux/jiffies.h> #include <linux/list.h> +#include <net/aligned_data.h> #include <net/hotdata.h> +#include <net/ip.h> #include <net/proto_memory.h> struct net_hotdata net_hotdata __cacheline_aligned = { @@ -22,3 +24,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE }; EXPORT_SYMBOL(net_hotdata); + +struct net_aligned_data net_aligned_data; +EXPORT_IPV6_MOD(net_aligned_data); diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c index 759a9b9f3f89..669b357b73b2 100644 --- a/net/core/ieee8021q_helpers.c +++ b/net/core/ieee8021q_helpers.c @@ -7,6 +7,11 @@ #include <net/dscp.h> #include <net/ieee8021q.h> +/* verify that table covers all 8 traffic types */ +#define TT_MAP_SIZE_OK(tbl) \ + compiletime_assert(ARRAY_SIZE(tbl) == IEEE8021Q_TT_MAX, \ + #tbl " size mismatch") + /* The following arrays map Traffic Types (TT) to traffic classes (TC) for * different number of queues as shown in the example provided by * IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and @@ -101,51 +106,28 @@ int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues) switch (num_queues) { case 8: - compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_8queue_tt_tc_map != max - 1"); + TT_MAP_SIZE_OK(ieee8021q_8queue_tt_tc_map); return ieee8021q_8queue_tt_tc_map[tt]; case 7: - compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_7queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_7queue_tt_tc_map); return ieee8021q_7queue_tt_tc_map[tt]; case 6: - compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_6queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_6queue_tt_tc_map); return ieee8021q_6queue_tt_tc_map[tt]; case 5: - compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_5queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_5queue_tt_tc_map); return ieee8021q_5queue_tt_tc_map[tt]; case 4: - compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_4queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_4queue_tt_tc_map); return ieee8021q_4queue_tt_tc_map[tt]; case 3: - compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_3queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_3queue_tt_tc_map); return ieee8021q_3queue_tt_tc_map[tt]; case 2: - compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_2queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_2queue_tt_tc_map); return ieee8021q_2queue_tt_tc_map[tt]; case 1: - compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_1queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_1queue_tt_tc_map); return ieee8021q_1queue_tt_tc_map[tt]; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 49dce9a82295..d1de7f292eea 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -54,7 +54,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid); static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, - struct net_device *dev); + struct net_device *dev, + bool skip_perm); #ifdef CONFIG_PROC_FS static const struct seq_operations neigh_stat_seq_ops; @@ -153,11 +154,12 @@ static void neigh_update_gc_list(struct neighbour *n) if (n->dead) goto out; - /* remove from the gc list if new state is permanent or if neighbor - * is externally learned; otherwise entry should be on the gc list + /* remove from the gc list if new state is permanent or if neighbor is + * externally learned / validated; otherwise entry should be on the gc + * list */ exempt_from_gc = n->nud_state & NUD_PERMANENT || - n->flags & NTF_EXT_LEARNED; + n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED); on_gc_list = !list_empty(&n->gc_list); if (exempt_from_gc && on_gc_list) { @@ -204,6 +206,7 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0; + ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0; if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) { if (ndm_flags & NTF_EXT_LEARNED) @@ -221,6 +224,14 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, *notify = 1; *managed_update = true; } + if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) { + if (ndm_flags & NTF_EXT_VALIDATED) + neigh->flags |= NTF_EXT_VALIDATED; + else + neigh->flags &= ~NTF_EXT_VALIDATED; + *notify = 1; + *gc_update = true; + } } bool neigh_remove_one(struct neighbour *n) @@ -378,7 +389,9 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, dev_head = neigh_get_dev_table(dev, tbl->family); hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) { - if (skip_perm && n->nud_state & NUD_PERMANENT) + if (skip_perm && + (n->nud_state & NUD_PERMANENT || + n->flags & NTF_EXT_VALIDATED)) continue; hlist_del_rcu(&n->hash); @@ -423,7 +436,7 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, { write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); - pneigh_ifdown_and_unlock(tbl, dev); + pneigh_ifdown_and_unlock(tbl, dev, skip_perm); pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) @@ -803,7 +816,8 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, } static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, - struct net_device *dev) + struct net_device *dev, + bool skip_perm) { struct pneigh_entry *n, **np, *freelist = NULL; u32 h; @@ -811,12 +825,15 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, for (h = 0; h <= PNEIGH_HASHMASK; h++) { np = &tbl->phash_buckets[h]; while ((n = *np) != NULL) { + if (skip_perm && n->permanent) + goto skip; if (!dev || n->dev == dev) { *np = n->next; n->next = freelist; freelist = n; continue; } +skip: np = &n->next; } } @@ -937,7 +954,8 @@ static void neigh_periodic_work(struct work_struct *work) state = n->nud_state; if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || - (n->flags & NTF_EXT_LEARNED)) { + (n->flags & + (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) { write_unlock(&n->lock); continue; } @@ -1090,9 +1108,15 @@ static void neigh_timer_handler(struct timer_list *t) if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { - WRITE_ONCE(neigh->nud_state, NUD_FAILED); + if (neigh->nud_state == NUD_PROBE && + neigh->flags & NTF_EXT_VALIDATED) { + WRITE_ONCE(neigh->nud_state, NUD_STALE); + neigh->updated = jiffies; + } else { + WRITE_ONCE(neigh->nud_state, NUD_FAILED); + neigh_invalidate(neigh); + } notify = 1; - neigh_invalidate(neigh); goto out; } @@ -1240,6 +1264,8 @@ static void neigh_update_hhs(struct neighbour *neigh) NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as a router. + NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed + or invalidated. Caller MUST hold reference count on the entry. */ @@ -1402,7 +1428,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, * we can reinject the packet there. */ n2 = NULL; - if (dst && dst->obsolete != DST_OBSOLETE_DEAD) { + if (dst && + READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) { n2 = dst_neigh_lookup_skb(dst, skb); if (n2) n1 = n2; @@ -1974,7 +2001,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (ndm_flags & NTF_PROXY) { struct pneigh_entry *pn; - if (ndm_flags & NTF_MANAGED) { + if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) { NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination"); goto out; } @@ -1983,6 +2010,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { pn->flags = ndm_flags; + pn->permanent = !!(ndm->ndm_state & NUD_PERMANENT); if (protocol) pn->protocol = protocol; err = 0; @@ -2004,7 +2032,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (neigh == NULL) { bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT; bool exempt_from_gc = ndm_permanent || - ndm_flags & NTF_EXT_LEARNED; + ndm_flags & (NTF_EXT_LEARNED | + NTF_EXT_VALIDATED); if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { err = -ENOENT; @@ -2015,10 +2044,27 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; goto out; } + if (ndm_flags & NTF_EXT_VALIDATED) { + u8 state = ndm->ndm_state; + + /* NTF_USE and NTF_MANAGED will result in the neighbor + * being created with an invalid state (NUD_NONE). + */ + if (ndm_flags & (NTF_USE | NTF_MANAGED)) + state = NUD_NONE; + + if (!(state & NUD_VALID)) { + NL_SET_ERR_MSG(extack, + "Cannot create externally validated neighbor with an invalid state"); + err = -EINVAL; + goto out; + } + } neigh = ___neigh_create(tbl, dst, dev, ndm_flags & - (NTF_EXT_LEARNED | NTF_MANAGED), + (NTF_EXT_LEARNED | NTF_MANAGED | + NTF_EXT_VALIDATED), exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); @@ -2030,6 +2076,24 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, neigh_release(neigh); goto out; } + if (ndm_flags & NTF_EXT_VALIDATED) { + u8 state = ndm->ndm_state; + + /* NTF_USE and NTF_MANAGED do not update the existing + * state other than clearing it if it was + * NUD_PERMANENT. + */ + if (ndm_flags & (NTF_USE | NTF_MANAGED)) + state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT; + + if (!(state & NUD_VALID)) { + NL_SET_ERR_MSG(extack, + "Cannot mark neighbor as externally validated with an invalid state"); + err = -EINVAL; + neigh_release(neigh); + goto out; + } + } if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) flags &= ~(NEIGH_UPDATE_F_OVERRIDE | @@ -2046,13 +2110,13 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, flags |= NEIGH_UPDATE_F_MANAGED; if (ndm_flags & NTF_USE) flags |= NEIGH_UPDATE_F_USE; + if (ndm_flags & NTF_EXT_VALIDATED) + flags |= NEIGH_UPDATE_F_EXT_VALIDATED; err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, NETLINK_CB(skb).portid, extack); - if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) { + if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) neigh_event_send(neigh, NULL); - err = 0; - } neigh_release(neigh); out: return err; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1ace0cd01adc..8f897e2c8b4f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -641,12 +641,6 @@ static ssize_t phys_port_id_show(struct device *dev, struct netdev_phys_item_id ppid; ssize_t ret; - /* The check is also done in dev_get_phys_port_id; this helps returning - * early without hitting the locking section below. - */ - if (!netdev->netdev_ops->ndo_get_phys_port_id) - return -EOPNOTSUPP; - ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); if (ret) return ret; @@ -668,13 +662,6 @@ static ssize_t phys_port_name_show(struct device *dev, char name[IFNAMSIZ]; ssize_t ret; - /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the locking section below. - */ - if (!netdev->netdev_ops->ndo_get_phys_port_name && - !netdev->devlink_port) - return -EOPNOTSUPP; - ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); if (ret) return ret; @@ -696,14 +683,6 @@ static ssize_t phys_switch_id_show(struct device *dev, struct netdev_phys_item_id ppid = { }; ssize_t ret; - /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the locking section below. This works - * because recurse is false when calling dev_get_port_parent_id. - */ - if (!netdev->netdev_ops->ndo_get_port_parent_id && - !netdev->devlink_port) - return -EOPNOTSUPP; - ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); if (ret) return ret; @@ -718,6 +697,40 @@ static ssize_t phys_switch_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_switch_id); +static struct attribute *netdev_phys_attrs[] __ro_after_init = { + &dev_attr_phys_port_id.attr, + &dev_attr_phys_port_name.attr, + &dev_attr_phys_switch_id.attr, + NULL, +}; + +static umode_t netdev_phys_is_visible(struct kobject *kobj, + struct attribute *attr, int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct net_device *netdev = to_net_dev(dev); + + if (attr == &dev_attr_phys_port_id.attr) { + if (!netdev->netdev_ops->ndo_get_phys_port_id) + return 0; + } else if (attr == &dev_attr_phys_port_name.attr) { + if (!netdev->netdev_ops->ndo_get_phys_port_name && + !netdev->devlink_port) + return 0; + } else if (attr == &dev_attr_phys_switch_id.attr) { + if (!netdev->netdev_ops->ndo_get_port_parent_id && + !netdev->devlink_port) + return 0; + } + + return attr->mode; +} + +static const struct attribute_group netdev_phys_group = { + .attrs = netdev_phys_attrs, + .is_visible = netdev_phys_is_visible, +}; + static ssize_t threaded_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -783,9 +796,6 @@ static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, &dev_attr_napi_defer_hard_irqs.attr, - &dev_attr_phys_port_id.attr, - &dev_attr_phys_port_name.attr, - &dev_attr_phys_switch_id.attr, &dev_attr_proto_down.attr, &dev_attr_carrier_up_count.attr, &dev_attr_carrier_down_count.attr, @@ -1200,12 +1210,21 @@ static int rx_queue_default_mask(struct net_device *dev, struct netdev_rx_queue *queue) { #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) - struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask); + struct cpumask *rps_default_mask; + int res = 0; + mutex_lock(&rps_default_mask_mutex); + + rps_default_mask = dev_net(dev)->core.rps_default_mask; if (rps_default_mask && !cpumask_empty(rps_default_mask)) - return netdev_rx_queue_set_rps_mask(queue, rps_default_mask); -#endif + res = netdev_rx_queue_set_rps_mask(queue, rps_default_mask); + + mutex_unlock(&rps_default_mask_mutex); + + return res; +#else return 0; +#endif } static int rx_queue_add_kobject(struct net_device *dev, int index) @@ -2328,6 +2347,7 @@ int netdev_register_kobject(struct net_device *ndev) groups++; *groups++ = &netstat_group; + *groups++ = &netdev_phys_group; if (wireless_group_needed(ndev)) *groups++ = &wireless_group; diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 8a5b04c2699a..e938f25e8e86 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -11,4 +11,6 @@ int netdev_queue_update_kobjects(struct net_device *net, int netdev_change_owner(struct net_device *, const struct net *net_old, const struct net *net_new); +extern struct mutex rps_default_mask_mutex; + #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ae54f26709ca..f58ef920a3a1 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -19,9 +19,9 @@ #include <linux/net_namespace.h> #include <linux/sched/task.h> #include <linux/uidgid.h> -#include <linux/cookie.h> #include <linux/proc_fs.h> +#include <net/aligned_data.h> #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> @@ -64,8 +64,6 @@ DECLARE_RWSEM(pernet_ops_rwsem); static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; -DEFINE_COOKIE(net_cookie); - static struct net_generic *net_alloc_generic(void) { unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); @@ -319,10 +317,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) if (refcount_read(&net->ns.count) == 0) return NETNSA_NSID_NOT_ASSIGNED; - spin_lock_bh(&net->nsid_lock); + spin_lock(&net->nsid_lock); id = __peernet2id(net, peer); if (id >= 0) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); return id; } @@ -332,12 +330,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) * just been idr_remove()'d from there in cleanup_net(). */ if (!maybe_get_net(peer)) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); return NETNSA_NSID_NOT_ASSIGNED; } id = alloc_netid(net, peer, -1); - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); put_net(peer); if (id < 0) @@ -403,8 +401,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ { refcount_set(&net->passive, 1); refcount_set(&net->ns.count, 1); - ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); - ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt"); + ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt"); + ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt"); get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; @@ -434,9 +432,7 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); int error = 0; - preempt_disable(); - net->net_cookie = gen_cookie_next(&net_cookie); - preempt_enable(); + net->net_cookie = atomic64_inc_return(&net_aligned_data.net_cookie); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); @@ -628,20 +624,20 @@ static void unhash_nsid(struct net *net, struct net *last) for_each_net(tmp) { int id; - spin_lock_bh(&tmp->nsid_lock); + spin_lock(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); + spin_unlock(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } - spin_lock_bh(&net->nsid_lock); + spin_lock(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); @@ -791,12 +787,40 @@ struct net *get_net_ns_by_pid(pid_t pid) } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); +#ifdef CONFIG_NET_NS_REFCNT_TRACKER +static void net_ns_net_debugfs(struct net *net) +{ + ref_tracker_dir_symlink(&net->refcnt_tracker, "netns-%llx-%u-refcnt", + net->net_cookie, net->ns.inum); + ref_tracker_dir_symlink(&net->notrefcnt_tracker, "netns-%llx-%u-notrefcnt", + net->net_cookie, net->ns.inum); +} + +static int __init init_net_debugfs(void) +{ + ref_tracker_dir_debugfs(&init_net.refcnt_tracker); + ref_tracker_dir_debugfs(&init_net.notrefcnt_tracker); + net_ns_net_debugfs(&init_net); + return 0; +} +late_initcall(init_net_debugfs); +#else +static void net_ns_net_debugfs(struct net *net) +{ +} +#endif + static __net_init int net_ns_net_init(struct net *net) { + int ret; + #ifdef CONFIG_NET_NS net->ns.ops = &netns_operations; #endif - return ns_alloc_inum(&net->ns); + ret = ns_alloc_inum(&net->ns); + if (!ret) + net_ns_net_debugfs(net); + return ret; } static __net_exit void net_ns_net_exit(struct net *net) @@ -852,9 +876,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(peer); } - spin_lock_bh(&net->nsid_lock); + spin_lock(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, @@ -863,7 +887,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, } err = alloc_netid(net, peer, nsid); - spin_unlock_bh(&net->nsid_lock); + spin_unlock(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index d22f0919821e..dff66d8fb325 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -21,7 +21,9 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state struct cgroup_cls_state *task_cls_state(struct task_struct *p) { return css_cls_state(task_css_check(p, net_cls_cgrp_id, - rcu_read_lock_bh_held())); + rcu_read_lock_held() || + rcu_read_lock_bh_held() || + rcu_read_lock_trace_held())); } EXPORT_SYMBOL_GPL(task_cls_state); diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index d126f10197bf..3bf1151d8061 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -97,14 +97,12 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, if (!netdev_need_ops_lock(dev)) return -EOPNOTSUPP; - if (rxq_idx >= dev->real_num_rx_queues) - return -EINVAL; - rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - if (rxq_idx >= dev->real_num_rx_queues) { NL_SET_ERR_MSG(extack, "rx queue index out of range"); return -ERANGE; } + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6ad84d4a2b46..a1da97b5b30b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,13 +58,6 @@ static void zap_completion_queue(void); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); -#define np_info(np, fmt, ...) \ - pr_info("%s: " fmt, np->name, ##__VA_ARGS__) -#define np_err(np, fmt, ...) \ - pr_err("%s: " fmt, np->name, ##__VA_ARGS__) -#define np_notice(np, fmt, ...) \ - pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) - static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) @@ -379,6 +372,31 @@ out: return ret; } +static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb, + int len) +{ + struct udphdr *udph; + int udp_len; + + udp_len = len + sizeof(struct udphdr); + udph = udp_hdr(skb); + + /* check needs to be set, since it will be consumed in csum_partial */ + udph->check = 0; + if (np->ipv6) + udph->check = csum_ipv6_magic(&np->local_ip.in6, + &np->remote_ip.in6, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + else + udph->check = csum_tcpudp_magic(np->local_ip.ip, + np->remote_ip.ip, + udp_len, IPPROTO_UDP, + csum_partial(udph, udp_len, 0)); + if (udph->check == 0) + udph->check = CSUM_MANGLED_0; +} + netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { unsigned long flags; @@ -396,24 +414,101 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } EXPORT_SYMBOL(netpoll_send_skb); +static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len) +{ + struct ipv6hdr *ip6h; + + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + + /* ip6h->version = 6; ip6h->priority = 0; */ + *(unsigned char *)ip6h = 0x60; + ip6h->flow_lbl[0] = 0; + ip6h->flow_lbl[1] = 0; + ip6h->flow_lbl[2] = 0; + + ip6h->payload_len = htons(sizeof(struct udphdr) + len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 32; + ip6h->saddr = np->local_ip.in6; + ip6h->daddr = np->remote_ip.in6; + + skb->protocol = htons(ETH_P_IPV6); +} + +static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len) +{ + static atomic_t ip_ident; + struct iphdr *iph; + int ip_len; + + ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr); + + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + + /* iph->version = 4; iph->ihl = 5; */ + *(unsigned char *)iph = 0x45; + iph->tos = 0; + put_unaligned(htons(ip_len), &iph->tot_len); + iph->id = htons(atomic_inc_return(&ip_ident)); + iph->frag_off = 0; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->check = 0; + put_unaligned(np->local_ip.ip, &iph->saddr); + put_unaligned(np->remote_ip.ip, &iph->daddr); + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + skb->protocol = htons(ETH_P_IP); +} + +static void push_udp(struct netpoll *np, struct sk_buff *skb, int len) +{ + struct udphdr *udph; + int udp_len; + + udp_len = len + sizeof(struct udphdr); + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + + udph = udp_hdr(skb); + udph->source = htons(np->local_port); + udph->dest = htons(np->remote_port); + udph->len = htons(udp_len); + + netpoll_udp_checksum(np, skb, len); +} + +static void push_eth(struct netpoll *np, struct sk_buff *skb) +{ + struct ethhdr *eth; + + eth = skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + ether_addr_copy(eth->h_source, np->dev->dev_addr); + ether_addr_copy(eth->h_dest, np->remote_mac); + if (np->ipv6) + eth->h_proto = htons(ETH_P_IPV6); + else + eth->h_proto = htons(ETH_P_IP); +} + int netpoll_send_udp(struct netpoll *np, const char *msg, int len) { int total_len, ip_len, udp_len; struct sk_buff *skb; - struct udphdr *udph; - struct iphdr *iph; - struct ethhdr *eth; - static atomic_t ip_ident; - struct ipv6hdr *ip6h; if (!IS_ENABLED(CONFIG_PREEMPT_RT)) WARN_ON_ONCE(!irqs_disabled()); - udp_len = len + sizeof(*udph); + udp_len = len + sizeof(struct udphdr); if (np->ipv6) - ip_len = udp_len + sizeof(*ip6h); + ip_len = udp_len + sizeof(struct ipv6hdr); else - ip_len = udp_len + sizeof(*iph); + ip_len = udp_len + sizeof(struct iphdr); total_len = ip_len + LL_RESERVED_SPACE(np->dev); @@ -425,117 +520,18 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb_copy_to_linear_data(skb, msg, len); skb_put(skb, len); - skb_push(skb, sizeof(*udph)); - skb_reset_transport_header(skb); - udph = udp_hdr(skb); - udph->source = htons(np->local_port); - udph->dest = htons(np->remote_port); - udph->len = htons(udp_len); - - udph->check = 0; - if (np->ipv6) { - udph->check = csum_ipv6_magic(&np->local_ip.in6, - &np->remote_ip.in6, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*ip6h)); - skb_reset_network_header(skb); - ip6h = ipv6_hdr(skb); - - /* ip6h->version = 6; ip6h->priority = 0; */ - *(unsigned char *)ip6h = 0x60; - ip6h->flow_lbl[0] = 0; - ip6h->flow_lbl[1] = 0; - ip6h->flow_lbl[2] = 0; - - ip6h->payload_len = htons(sizeof(struct udphdr) + len); - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = 32; - ip6h->saddr = np->local_ip.in6; - ip6h->daddr = np->remote_ip.in6; - - eth = skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IPV6); - } else { - udph->check = csum_tcpudp_magic(np->local_ip.ip, - np->remote_ip.ip, - udp_len, IPPROTO_UDP, - csum_partial(udph, udp_len, 0)); - if (udph->check == 0) - udph->check = CSUM_MANGLED_0; - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - iph = ip_hdr(skb); - - /* iph->version = 4; iph->ihl = 5; */ - *(unsigned char *)iph = 0x45; - iph->tos = 0; - put_unaligned(htons(ip_len), &(iph->tot_len)); - iph->id = htons(atomic_inc_return(&ip_ident)); - iph->frag_off = 0; - iph->ttl = 64; - iph->protocol = IPPROTO_UDP; - iph->check = 0; - put_unaligned(np->local_ip.ip, &(iph->saddr)); - put_unaligned(np->remote_ip.ip, &(iph->daddr)); - iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); - - eth = skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb->protocol = eth->h_proto = htons(ETH_P_IP); - } - - ether_addr_copy(eth->h_source, np->dev->dev_addr); - ether_addr_copy(eth->h_dest, np->remote_mac); - + push_udp(np, skb, len); + if (np->ipv6) + push_ipv6(np, skb, len); + else + push_ipv4(np, skb, len); + push_eth(np, skb); skb->dev = np->dev; return (int)netpoll_send_skb(np, skb); } EXPORT_SYMBOL(netpoll_send_udp); -void netpoll_print_options(struct netpoll *np) -{ - np_info(np, "local port %d\n", np->local_port); - if (np->ipv6) - np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); - else - np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); - np_info(np, "interface name '%s'\n", np->dev_name); - np_info(np, "local ethernet address '%pM'\n", np->dev_mac); - np_info(np, "remote port %d\n", np->remote_port); - if (np->ipv6) - np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); - else - np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); - np_info(np, "remote ethernet address %pM\n", np->remote_mac); -} -EXPORT_SYMBOL(netpoll_print_options); - -static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) -{ - const char *end; - - if (!strchr(str, ':') && - in4_pton(str, -1, (void *)addr, -1, &end) > 0) { - if (!*end) - return 0; - } - if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { -#if IS_ENABLED(CONFIG_IPV6) - if (!*end) - return 1; -#else - return -1; -#endif - } - return -1; -} static void skb_pool_flush(struct netpoll *np) { @@ -546,95 +542,6 @@ static void skb_pool_flush(struct netpoll *np) skb_queue_purge_reason(skb_pool, SKB_CONSUMED); } -int netpoll_parse_options(struct netpoll *np, char *opt) -{ - char *cur=opt, *delim; - int ipv6; - bool ipversion_set = false; - - if (*cur != '@') { - if ((delim = strchr(cur, '@')) == NULL) - goto parse_failed; - *delim = 0; - if (kstrtou16(cur, 10, &np->local_port)) - goto parse_failed; - cur = delim; - } - cur++; - - if (*cur != '/') { - ipversion_set = true; - if ((delim = strchr(cur, '/')) == NULL) - goto parse_failed; - *delim = 0; - ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); - if (ipv6 < 0) - goto parse_failed; - else - np->ipv6 = (bool)ipv6; - cur = delim; - } - cur++; - - if (*cur != ',') { - /* parse out dev_name or dev_mac */ - if ((delim = strchr(cur, ',')) == NULL) - goto parse_failed; - *delim = 0; - - np->dev_name[0] = '\0'; - eth_broadcast_addr(np->dev_mac); - if (!strchr(cur, ':')) - strscpy(np->dev_name, cur, sizeof(np->dev_name)); - else if (!mac_pton(cur, np->dev_mac)) - goto parse_failed; - - cur = delim; - } - cur++; - - if (*cur != '@') { - /* dst port */ - if ((delim = strchr(cur, '@')) == NULL) - goto parse_failed; - *delim = 0; - if (*cur == ' ' || *cur == '\t') - np_info(np, "warning: whitespace is not allowed\n"); - if (kstrtou16(cur, 10, &np->remote_port)) - goto parse_failed; - cur = delim; - } - cur++; - - /* dst ip */ - if ((delim = strchr(cur, '/')) == NULL) - goto parse_failed; - *delim = 0; - ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); - if (ipv6 < 0) - goto parse_failed; - else if (ipversion_set && np->ipv6 != (bool)ipv6) - goto parse_failed; - else - np->ipv6 = (bool)ipv6; - cur = delim + 1; - - if (*cur != 0) { - /* MAC address */ - if (!mac_pton(cur, np->remote_mac)) - goto parse_failed; - } - - netpoll_print_options(np); - - return 0; - - parse_failed: - np_info(np, "couldn't parse config at '%s'!\n", cur); - return -1; -} -EXPORT_SYMBOL(netpoll_parse_options); - static void refill_skbs_work_handler(struct work_struct *work) { struct netpoll *np = @@ -716,13 +623,97 @@ static char *egress_dev(struct netpoll *np, char *buf) return buf; } +static void netpoll_wait_carrier(struct netpoll *np, struct net_device *ndev, + unsigned int timeout) +{ + unsigned long atmost; + + atmost = jiffies + timeout * HZ; + while (!netif_carrier_ok(ndev)) { + if (time_after(jiffies, atmost)) { + np_notice(np, "timeout waiting for carrier\n"); + break; + } + msleep(1); + } +} + +/* + * Take the IPv6 from ndev and populate local_ip structure in netpoll + */ +static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev) +{ + char buf[MAC_ADDR_STR_LEN + 1]; + int err = -EDESTADDRREQ; + struct inet6_dev *idev; + + if (!IS_ENABLED(CONFIG_IPV6)) { + np_err(np, "IPv6 is not supported %s, aborting\n", + egress_dev(np, buf)); + return -EINVAL; + } + + idev = __in6_dev_get(ndev); + if (idev) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) != + !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL)) + continue; + /* Got the IP, let's return */ + np->local_ip.in6 = ifp->addr; + err = 0; + break; + } + read_unlock_bh(&idev->lock); + } + if (err) { + np_err(np, "no IPv6 address for %s, aborting\n", + egress_dev(np, buf)); + return err; + } + + np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); + return 0; +} + +/* + * Take the IPv4 from ndev and populate local_ip structure in netpoll + */ +static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev) +{ + char buf[MAC_ADDR_STR_LEN + 1]; + const struct in_ifaddr *ifa; + struct in_device *in_dev; + + in_dev = __in_dev_get_rtnl(ndev); + if (!in_dev) { + np_err(np, "no IP address for %s, aborting\n", + egress_dev(np, buf)); + return -EDESTADDRREQ; + } + + ifa = rtnl_dereference(in_dev->ifa_list); + if (!ifa) { + np_err(np, "no IP address for %s, aborting\n", + egress_dev(np, buf)); + return -EDESTADDRREQ; + } + + np->local_ip.ip = ifa->ifa_local; + np_info(np, "local IP %pI4\n", &np->local_ip.ip); + + return 0; +} + int netpoll_setup(struct netpoll *np) { struct net *net = current->nsproxy->net_ns; char buf[MAC_ADDR_STR_LEN + 1]; struct net_device *ndev = NULL; bool ip_overwritten = false; - struct in_device *in_dev; int err; rtnl_lock(); @@ -746,85 +737,31 @@ int netpoll_setup(struct netpoll *np) } if (!netif_running(ndev)) { - unsigned long atmost; - np_info(np, "device %s not up yet, forcing it\n", egress_dev(np, buf)); err = dev_open(ndev, NULL); - if (err) { np_err(np, "failed to open %s\n", ndev->name); goto put; } rtnl_unlock(); - atmost = jiffies + carrier_timeout * HZ; - while (!netif_carrier_ok(ndev)) { - if (time_after(jiffies, atmost)) { - np_notice(np, "timeout waiting for carrier\n"); - break; - } - msleep(1); - } - + netpoll_wait_carrier(np, ndev, carrier_timeout); rtnl_lock(); } if (!np->local_ip.ip) { if (!np->ipv6) { - const struct in_ifaddr *ifa; - - in_dev = __in_dev_get_rtnl(ndev); - if (!in_dev) - goto put_noaddr; - - ifa = rtnl_dereference(in_dev->ifa_list); - if (!ifa) { -put_noaddr: - np_err(np, "no IP address for %s, aborting\n", - egress_dev(np, buf)); - err = -EDESTADDRREQ; + err = netpoll_take_ipv4(np, ndev); + if (err) goto put; - } - - np->local_ip.ip = ifa->ifa_local; - ip_overwritten = true; - np_info(np, "local IP %pI4\n", &np->local_ip.ip); } else { -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_dev *idev; - - err = -EDESTADDRREQ; - idev = __in6_dev_get(ndev); - if (idev) { - struct inet6_ifaddr *ifp; - - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) != - !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL)) - continue; - np->local_ip.in6 = ifp->addr; - ip_overwritten = true; - err = 0; - break; - } - read_unlock_bh(&idev->lock); - } - if (err) { - np_err(np, "no IPv6 address for %s, aborting\n", - egress_dev(np, buf)); + err = netpoll_take_ipv6(np, ndev); + if (err) goto put; - } else - np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); -#else - np_err(np, "IPv6 is not supported %s, aborting\n", - egress_dev(np, buf)); - err = -EINVAL; - goto put; -#endif } + ip_overwritten = true; } err = __netpoll_setup(np, ndev); @@ -863,7 +800,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) kfree(npinfo); } -void __netpoll_cleanup(struct netpoll *np) +static void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -885,7 +822,6 @@ void __netpoll_cleanup(struct netpoll *np) skb_pool_flush(np); } -EXPORT_SYMBOL_GPL(__netpoll_cleanup); void __netpoll_free(struct netpoll *np) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index ba7cf3e3c32f..05e2e22a8f7c 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -371,7 +371,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); -static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem); +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem); static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) { @@ -409,7 +409,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); alloc_stat_inc(pool, waive); netmem = 0; break; @@ -544,8 +544,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, } /* slow path */ -static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, - gfp_t gfp) +static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool, + gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; unsigned int pp_order = pool->p.order; @@ -615,7 +615,7 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) netmem = pool->mp_ops->alloc_netmems(pool, gfp); else - netmem = __page_pool_alloc_pages_slow(pool, gfp); + netmem = __page_pool_alloc_netmems_slow(pool, gfp); return netmem; } EXPORT_SYMBOL(page_pool_alloc_netmems); @@ -673,8 +673,8 @@ void page_pool_clear_pp_info(netmem_ref netmem) netmem_set_pp(netmem, NULL); } -static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, - netmem_ref netmem) +static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool, + netmem_ref netmem) { struct page *old, *page = netmem_to_page(netmem); unsigned long id; @@ -712,7 +712,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, * a regular page (that will eventually be returned to the normal * page-allocator via put_page). */ -void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) +static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem) { int count; bool put; @@ -721,7 +721,7 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) put = pool->mp_ops->release_netmem(pool, netmem); else - __page_pool_release_page_dma(pool, netmem); + __page_pool_release_netmem_dma(pool, netmem); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -826,7 +826,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem, * will be invoking put_page. */ recycle_stat_inc(pool, released_refcnt); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); return 0; } @@ -869,7 +869,7 @@ void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } EXPORT_SYMBOL(page_pool_put_unrefed_netmem); @@ -912,7 +912,7 @@ static void page_pool_recycle_ring_bulk(struct page_pool *pool, * since put_page() with refcnt == 1 can be an expensive operation. */ for (; i < bulk_len; i++) - page_pool_return_page(pool, bulk[i]); + page_pool_return_netmem(pool, bulk[i]); } /** @@ -995,7 +995,7 @@ static netmem_ref page_pool_drain_frag(struct page_pool *pool, return netmem; } - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); return 0; } @@ -1009,7 +1009,7 @@ static void page_pool_free_frag(struct page_pool *pool) if (!netmem || page_pool_unref_netmem(netmem, drain_count)) return; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, @@ -1076,7 +1076,7 @@ static void page_pool_empty_ring(struct page_pool *pool) pr_crit("%s() page_pool refcnt %d violation\n", __func__, netmem_ref_count(netmem)); - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } @@ -1109,7 +1109,7 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } @@ -1136,7 +1136,7 @@ static void page_pool_scrub(struct page_pool *pool) } xa_for_each(&pool->dma_mapped, id, ptr) - __page_pool_release_page_dma(pool, page_to_netmem(ptr)); + __page_pool_release_netmem_dma(pool, page_to_netmem((struct page *)ptr)); } /* No more consumers should exist, but producers could still @@ -1253,7 +1253,7 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, netmem); + page_pool_return_netmem(pool, netmem); } } EXPORT_SYMBOL(page_pool_update_nid); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c57692eb8da9..a9555bfc372f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1026,9 +1026,11 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, .rta_error = error, .rta_id = id, }; + unsigned long delta; if (dst) { - ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); + delta = jiffies - READ_ONCE(dst->lastuse); + ci.rta_lastuse = jiffies_delta_to_clock_t(delta); ci.rta_used = dst->__use; ci.rta_clntref = rcuref_read(&dst->__rcuref); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d6420b74ea9c..ee0274417948 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -384,8 +384,7 @@ static inline void __finalize_skb_around(struct sk_buff *skb, void *data, skb_set_kcov_handle(skb, kcov_common_handle()); } -static inline void *__slab_build_skb(struct sk_buff *skb, void *data, - unsigned int *size) +static inline void *__slab_build_skb(void *data, unsigned int *size) { void *resized; @@ -418,7 +417,7 @@ struct sk_buff *slab_build_skb(void *data) return NULL; memset(skb, 0, offsetof(struct sk_buff, tail)); - data = __slab_build_skb(skb, data, &size); + data = __slab_build_skb(data, &size); __finalize_skb_around(skb, data, size); return skb; @@ -435,7 +434,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, * using slab buffer should use slab_build_skb() instead. */ if (WARN_ONCE(size == 0, "Use slab_build_skb() instead")) - data = __slab_build_skb(skb, data, &size); + data = __slab_build_skb(data, &size); __finalize_skb_around(skb, data, size); } @@ -3060,10 +3059,8 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd, /* * Fill page/offset/length into spd, if it can hold more pages. */ -static bool spd_fill_page(struct splice_pipe_desc *spd, - struct pipe_inode_info *pipe, struct page *page, - unsigned int *len, unsigned int offset, - bool linear, +static bool spd_fill_page(struct splice_pipe_desc *spd, struct page *page, + unsigned int *len, unsigned int offset, bool linear, struct sock *sk) { if (unlikely(spd->nr_pages == MAX_SKB_FRAGS)) @@ -3091,8 +3088,7 @@ static bool __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct splice_pipe_desc *spd, bool linear, - struct sock *sk, - struct pipe_inode_info *pipe) + struct sock *sk) { if (!*len) return true; @@ -3111,8 +3107,7 @@ static bool __splice_segment(struct page *page, unsigned int poff, do { unsigned int flen = min(*len, plen); - if (spd_fill_page(spd, pipe, page, &flen, poff, - linear, sk)) + if (spd_fill_page(spd, page, &flen, poff, linear, sk)) return true; poff += flen; plen -= flen; @@ -3130,8 +3125,8 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, unsigned int *offset, unsigned int *len, struct splice_pipe_desc *spd, struct sock *sk) { - int seg; struct sk_buff *iter; + int seg; /* map the linear part : * If skb->head_frag is set, this 'linear' part is backed by a @@ -3143,7 +3138,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, skb_headlen(skb), offset, len, spd, skb_head_is_locked(skb), - sk, pipe)) + sk)) return true; /* @@ -3160,7 +3155,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, if (__splice_segment(skb_frag_page(f), skb_frag_off(f), skb_frag_size(f), - offset, len, spd, false, sk, pipe)) + offset, len, spd, false, sk)) return true; } @@ -3235,6 +3230,7 @@ typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg); static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len, sendmsg_func sendmsg, int flags) { + int more_hint = sk_is_tcp(sk) ? MSG_MORE : 0; unsigned int orig_len = len; struct sk_buff *head = skb; unsigned short fragidx; @@ -3252,6 +3248,8 @@ do_frag_list: kv.iov_len = slen; memset(&msg, 0, sizeof(msg)); msg.msg_flags = MSG_DONTWAIT | flags; + if (slen < len) + msg.msg_flags |= more_hint; iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen); ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, @@ -3292,6 +3290,8 @@ do_frag_list: flags, }; + if (slen < len) + msg.msg_flags |= more_hint; bvec_set_page(&bvec, skb_frag_page(frag), slen, skb_frag_off(frag) + offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, @@ -6763,8 +6763,7 @@ static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp); /* carve out the first eat bytes from skb's frag_list. May recurse into * pskb_carve() */ -static int pskb_carve_frag_list(struct sk_buff *skb, - struct skb_shared_info *shinfo, int eat, +static int pskb_carve_frag_list(struct skb_shared_info *shinfo, int eat, gfp_t gfp_mask) { struct sk_buff *list = shinfo->frag_list; @@ -6869,7 +6868,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb_clone_fraglist(skb); /* split line is in frag list */ - if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) { + if (k == 0 && pskb_carve_frag_list(shinfo, off - pos, gfp_mask)) { /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ if (skb_has_frag_list(skb)) kfree_skb_list(skb_shinfo(skb)->frag_list); @@ -7234,7 +7233,6 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, * @skb: The buffer to add pages to * @iter: Iterator representing the pages to be added * @maxsize: Maximum amount of pages to be added - * @gfp: Allocation flags * * This is a common helper function for supporting MSG_SPLICE_PAGES. It * extracts pages from an iterator and adds them to the socket buffer if @@ -7245,7 +7243,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, * insufficient space in the buffer to transfer anything. */ ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, - ssize_t maxsize, gfp_t gfp) + ssize_t maxsize) { size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags); struct page *pages[8], **ppages = pages; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 34c51eb1a14f..83c78379932e 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -656,6 +656,13 @@ static void sk_psock_backlog(struct work_struct *work) bool ingress; int ret; + /* If sk is quickly removed from the map and then added back, the old + * psock should not be scheduled, because there are now two psocks + * pointing to the same sk. + */ + if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + return; + /* Increment the psock refcnt to synchronize with close(fd) path in * sock_map_close(), ensuring we wait for backlog thread completion * before sk_socket freed. If refcnt increment fails, it indicates diff --git a/net/core/sock.c b/net/core/sock.c index 3b409bc8ef6d..8b7623c7d547 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -602,7 +602,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = __sk_dst_get(sk); - if (dst && dst->obsolete && + if (dst && READ_ONCE(dst->obsolete) && INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, dst, cookie) == NULL) { sk_tx_queue_clear(sk); @@ -620,7 +620,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = sk_dst_get(sk); - if (dst && dst->obsolete && + if (dst && READ_ONCE(dst->obsolete) && INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, dst, cookie) == NULL) { sk_dst_reset(sk); @@ -818,12 +818,10 @@ EXPORT_SYMBOL(sock_set_priority); void sock_set_sndtimeo(struct sock *sk, s64 secs) { - lock_sock(sk); if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) WRITE_ONCE(sk->sk_sndtimeo, secs * HZ); else WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT); - release_sock(sk); } EXPORT_SYMBOL(sock_set_sndtimeo); @@ -837,14 +835,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) } } -void sock_enable_timestamps(struct sock *sk) -{ - lock_sock(sk); - __sock_set_timestamps(sk, true, false, true); - release_sock(sk); -} -EXPORT_SYMBOL(sock_enable_timestamps); - void sock_set_timestamp(struct sock *sk, int optname, bool valbool) { switch (optname) { @@ -1295,6 +1285,14 @@ int sk_setsockopt(struct sock *sk, int level, int optname, case SO_DEVMEM_DONTNEED: return sock_devmem_dontneed(sk, optval, optlen); #endif + case SO_SNDTIMEO_OLD: + case SO_SNDTIMEO_NEW: + return sock_set_timeout(&sk->sk_sndtimeo, optval, + optlen, optname == SO_SNDTIMEO_OLD); + case SO_RCVTIMEO_OLD: + case SO_RCVTIMEO_NEW: + return sock_set_timeout(&sk->sk_rcvtimeo, optval, + optlen, optname == SO_RCVTIMEO_OLD); } sockopt_lock_sock(sk); @@ -1450,18 +1448,6 @@ set_sndbuf: WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); break; } - case SO_RCVTIMEO_OLD: - case SO_RCVTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, - optlen, optname == SO_RCVTIMEO_OLD); - break; - - case SO_SNDTIMEO_OLD: - case SO_SNDTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, - optlen, optname == SO_SNDTIMEO_OLD); - break; - case SO_ATTACH_FILTER: { struct sock_fprog fprog; @@ -2602,8 +2588,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); #endif /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ - max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) : - READ_ONCE(dst->dev->gso_ipv4_max_size); + max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : + READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) max_size = GSO_LEGACY_MAX_SIZE; @@ -2614,7 +2600,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; - sk->sk_route_caps = dst->dev->features; + sk->sk_route_caps = dst_dev(dst)->features; if (sk_is_tcp(sk)) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -2632,7 +2618,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ - max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); + max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); } } sk->sk_gso_max_segs = max_segs; @@ -2788,17 +2774,6 @@ void sock_pfree(struct sk_buff *skb) EXPORT_SYMBOL(sock_pfree); #endif /* CONFIG_INET */ -kuid_t sock_i_uid(struct sock *sk) -{ - kuid_t uid; - - read_lock_bh(&sk->sk_callback_lock); - uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID; - read_unlock_bh(&sk->sk_callback_lock); - return uid; -} -EXPORT_SYMBOL(sock_i_uid); - unsigned long __sock_i_ino(struct sock *sk) { unsigned long ino; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5dbb2c6f371d..8cf04b57ade1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -28,6 +28,7 @@ #include <net/rps.h> #include "dev.h" +#include "net-sysfs.h" static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; @@ -96,50 +97,40 @@ free_buf: #ifdef CONFIG_RPS -static struct cpumask *rps_default_mask_cow_alloc(struct net *net) -{ - struct cpumask *rps_default_mask; - - if (net->core.rps_default_mask) - return net->core.rps_default_mask; - - rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); - if (!rps_default_mask) - return NULL; - - /* pairs with READ_ONCE in rx_queue_default_mask() */ - WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); - return rps_default_mask; -} +DEFINE_MUTEX(rps_default_mask_mutex); static int rps_default_mask_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)table->data; + struct cpumask *mask; int err = 0; - rtnl_lock(); + mutex_lock(&rps_default_mask_mutex); + mask = net->core.rps_default_mask; if (write) { - struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); - + if (!mask) { + mask = kzalloc(cpumask_size(), GFP_KERNEL); + net->core.rps_default_mask = mask; + } err = -ENOMEM; - if (!rps_default_mask) + if (!mask) goto done; - err = cpumask_parse(buffer, rps_default_mask); + err = cpumask_parse(buffer, mask); if (err) goto done; - err = rps_cpumask_housekeeping(rps_default_mask); + err = rps_cpumask_housekeeping(mask); if (err) goto done; } else { err = dump_cpumask(buffer, lenp, ppos, - net->core.rps_default_mask ? : cpu_none_mask); + mask ?: cpu_none_mask); } done: - rtnl_unlock(); + mutex_unlock(&rps_default_mask_mutex); return err; } diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index e340d955cf3b..c50436433c18 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -45,6 +45,11 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_ [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15), }; +const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1] = { + [DEVLINK_ATTR_RATE_TC_INDEX] = NLA_POLICY_MAX(NLA_U8, DEVLINK_RATE_TC_INDEX_MAX), + [DEVLINK_ATTR_RATE_TC_BW] = { .type = NLA_U32, }, +}; + const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = { [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, }, }; @@ -523,7 +528,7 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_ }; /* DEVLINK_CMD_RATE_SET - do */ -static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = { +static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, @@ -532,10 +537,11 @@ static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_W [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, }, [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, }, [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy), }; /* DEVLINK_CMD_RATE_NEW - do */ -static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = { +static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, @@ -544,6 +550,7 @@ static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_W [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, }, [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, }, [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy), }; /* DEVLINK_CMD_RATE_DEL - do */ @@ -1191,7 +1198,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_set_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT, + .maxattr = DEVLINK_ATTR_RATE_TC_BWS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1201,7 +1208,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_new_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_new_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT, + .maxattr = DEVLINK_ATTR_RATE_TC_BWS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 8f2bd50ddf5e..fb733b5d4ff1 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -13,6 +13,7 @@ /* Common nested types */ extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1]; +extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1]; extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ diff --git a/net/devlink/param.c b/net/devlink/param.c index b29abf8d3ed4..41dcc86cfd94 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -92,6 +92,16 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME, .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, + .name = DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE, + }, + { + .id = DEVLINK_PARAM_GENERIC_ID_CLOCK_ID, + .name = DEVLINK_PARAM_GENERIC_CLOCK_ID_NAME, + .type = DEVLINK_PARAM_GENERIC_CLOCK_ID_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -195,6 +205,11 @@ devlink_nl_param_value_fill_one(struct sk_buff *msg, if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32)) goto value_nest_cancel; break; + case DEVLINK_PARAM_TYPE_U64: + if (devlink_nl_put_u64(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, + val.vu64)) + goto value_nest_cancel; + break; case DEVLINK_PARAM_TYPE_STRING: if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vstr)) @@ -429,6 +444,11 @@ devlink_param_value_get_from_info(const struct devlink_param *param, return -EINVAL; value->vu32 = nla_get_u32(param_data); break; + case DEVLINK_PARAM_TYPE_U64: + if (nla_len(param_data) != sizeof(u64)) + return -EINVAL; + value->vu64 = nla_get_u64(param_data); + break; case DEVLINK_PARAM_TYPE_STRING: len = strnlen(nla_data(param_data), nla_len(param_data)); if (len == nla_len(param_data) || diff --git a/net/devlink/rate.c b/net/devlink/rate.c index 8828ffaf6cbc..d39300a9b3d4 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -80,6 +80,29 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) return ERR_PTR(-EINVAL); } +static int devlink_rate_put_tc_bws(struct sk_buff *msg, u32 *tc_bw) +{ + struct nlattr *nla_tc_bw; + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) { + nla_tc_bw = nla_nest_start(msg, DEVLINK_ATTR_RATE_TC_BWS); + if (!nla_tc_bw) + return -EMSGSIZE; + + if (nla_put_u8(msg, DEVLINK_ATTR_RATE_TC_INDEX, i) || + nla_put_u32(msg, DEVLINK_ATTR_RATE_TC_BW, tc_bw[i])) + goto nla_put_failure; + + nla_nest_end(msg, nla_tc_bw); + } + return 0; + +nla_put_failure: + nla_nest_cancel(msg, nla_tc_bw); + return -EMSGSIZE; +} + static int devlink_nl_rate_fill(struct sk_buff *msg, struct devlink_rate *devlink_rate, enum devlink_command cmd, u32 portid, u32 seq, @@ -129,6 +152,9 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, devlink_rate->parent->name)) goto nla_put_failure; + if (devlink_rate_put_tc_bws(msg, devlink_rate->tc_bw)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -316,6 +342,87 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, return 0; } +static int devlink_nl_rate_tc_bw_parse(struct nlattr *parent_nest, u32 *tc_bw, + unsigned long *bitmap, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[DEVLINK_ATTR_MAX + 1]; + u8 tc_index; + int err; + + err = nla_parse_nested(tb, DEVLINK_ATTR_MAX, parent_nest, + devlink_dl_rate_tc_bws_nl_policy, extack); + if (err) + return err; + + if (!tb[DEVLINK_ATTR_RATE_TC_INDEX]) { + NL_SET_ERR_ATTR_MISS(extack, parent_nest, + DEVLINK_ATTR_RATE_TC_INDEX); + return -EINVAL; + } + + tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]); + + if (!tb[DEVLINK_ATTR_RATE_TC_BW]) { + NL_SET_ERR_ATTR_MISS(extack, parent_nest, + DEVLINK_ATTR_RATE_TC_BW); + return -EINVAL; + } + + if (test_and_set_bit(tc_index, bitmap)) { + NL_SET_ERR_MSG_FMT(extack, + "Duplicate traffic class index specified (%u)", + tc_index); + return -EINVAL; + } + + tc_bw[tc_index] = nla_get_u32(tb[DEVLINK_ATTR_RATE_TC_BW]); + + return 0; +} + +static int devlink_nl_rate_tc_bw_set(struct devlink_rate *devlink_rate, + struct genl_info *info) +{ + DECLARE_BITMAP(bitmap, DEVLINK_RATE_TCS_MAX) = {}; + struct devlink *devlink = devlink_rate->devlink; + const struct devlink_ops *ops = devlink->ops; + u32 tc_bw[DEVLINK_RATE_TCS_MAX] = {}; + int rem, err = -EOPNOTSUPP, i; + struct nlattr *attr; + + nlmsg_for_each_attr_type(attr, DEVLINK_ATTR_RATE_TC_BWS, info->nlhdr, + GENL_HDRLEN, rem) { + err = devlink_nl_rate_tc_bw_parse(attr, tc_bw, bitmap, + info->extack); + if (err) + return err; + } + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) { + if (!test_bit(i, bitmap)) { + NL_SET_ERR_MSG_FMT(info->extack, + "Bandwidth values must be specified for all %u traffic classes", + DEVLINK_RATE_TCS_MAX); + return -EINVAL; + } + } + + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tc_bw_set(devlink_rate, devlink_rate->priv, + tc_bw, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tc_bw_set(devlink_rate, devlink_rate->priv, + tc_bw, info->extack); + + if (err) + return err; + + memcpy(devlink_rate->tc_bw, tc_bw, sizeof(tc_bw)); + + return 0; +} + static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, const struct devlink_ops *ops, struct genl_info *info) @@ -388,6 +495,12 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, return err; } + if (attrs[DEVLINK_ATTR_RATE_TC_BWS]) { + err = devlink_nl_rate_tc_bw_set(devlink_rate, info); + if (err) + return err; + } + return 0; } @@ -423,6 +536,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, "TX weight set isn't supported for the leafs"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TC_BWS] && + !ops->rate_leaf_tc_bw_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TC_BWS], + "TC bandwidth set isn't supported for the leafs"); + return false; + } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes"); @@ -449,6 +569,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, "TX weight set isn't supported for the nodes"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TC_BWS] && + !ops->rate_node_tc_bw_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TC_BWS], + "TC bandwidth set isn't supported for the nodes"); + return false; + } } else { WARN(1, "Unknown type of rate object"); return false; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 2dfe9063613f..869cbe57162f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -42,12 +42,24 @@ config NET_DSA_TAG_BRCM Broadcom switches which place the tag after the MAC source address. config NET_DSA_TAG_BRCM_LEGACY - tristate "Tag driver for Broadcom legacy switches using in-frame headers" + tristate "Tag driver for BCM63xx legacy switches using in-frame headers" select NET_DSA_TAG_BRCM_COMMON help Say Y if you want to enable support for tagging frames for the - Broadcom legacy switches which place the tag after the MAC source + BCM63xx legacy switches which place the tag after the MAC source address. + This tag is used in BCM63xx legacy switches which work without the + original FCS and length before the tag insertion. + +config NET_DSA_TAG_BRCM_LEGACY_FCS + tristate "Tag driver for BCM53xx legacy switches using in-frame headers" + select NET_DSA_TAG_BRCM_COMMON + help + Say Y if you want to enable support for tagging frames for the + BCM53xx legacy switches which place the tag after the MAC source + address. + This tag is used in BCM53xx legacy switches which expect original + FCS and length before the tag insertion to be present. config NET_DSA_TAG_BRCM_PREPEND tristate "Tag driver for Broadcom switches using prepended headers" diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index fe75821623a4..26bb657ceac3 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -15,6 +15,7 @@ #define BRCM_NAME "brcm" #define BRCM_LEGACY_NAME "brcm-legacy" +#define BRCM_LEGACY_FCS_NAME "brcm-legacy-fcs" #define BRCM_PREPEND_NAME "brcm-prepend" /* Legacy Broadcom tag (6 bytes) */ @@ -32,6 +33,10 @@ #define BRCM_LEG_MULTICAST (1 << 5) #define BRCM_LEG_EGRESS (2 << 5) #define BRCM_LEG_INGRESS (3 << 5) +#define BRCM_LEG_LEN_HI(x) (((x) >> 8) & 0x7) + +/* 4th byte in the tag */ +#define BRCM_LEG_LEN_LO(x) ((x) & 0xff) /* 6th byte in the tag */ #define BRCM_LEG_PORT_ID (0xf) @@ -212,6 +217,41 @@ DSA_TAG_DRIVER(brcm_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM, BRCM_NAME); #endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) || \ + IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS) +static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, + struct net_device *dev) +{ + int len = BRCM_LEG_TAG_LEN; + int source_port; + u8 *brcm_tag; + + if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) + return NULL; + + brcm_tag = dsa_etype_header_pos_rx(skb); + + source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; + + skb->dev = dsa_conduit_find_user(dev, 0, source_port); + if (!skb->dev) + return NULL; + + /* VLAN tag is added by BCM63xx internal switch */ + if (netdev_uses_dsa(skb->dev)) + len += VLAN_HLEN; + + /* Remove Broadcom tag and update checksum */ + skb_pull_rcsum(skb, len); + + dsa_default_offload_fwd_mark(skb); + + dsa_strip_etype_header(skb, len); + + return skb; +} +#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY || CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS */ + #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, struct net_device *dev) @@ -250,49 +290,77 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, - struct net_device *dev) +static const struct dsa_device_ops brcm_legacy_netdev_ops = { + .name = BRCM_LEGACY_NAME, + .proto = DSA_TAG_PROTO_BRCM_LEGACY, + .xmit = brcm_leg_tag_xmit, + .rcv = brcm_leg_tag_rcv, + .needed_headroom = BRCM_LEG_TAG_LEN, +}; + +DSA_TAG_DRIVER(brcm_legacy_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY, BRCM_LEGACY_NAME); +#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */ + +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS) +static struct sk_buff *brcm_leg_fcs_tag_xmit(struct sk_buff *skb, + struct net_device *dev) { - int len = BRCM_LEG_TAG_LEN; - int source_port; + struct dsa_port *dp = dsa_user_to_port(dev); + unsigned int fcs_len; + __le32 fcs_val; u8 *brcm_tag; - if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) + /* The Ethernet switch we are interfaced with needs packets to be at + * least 64 bytes (including FCS) otherwise they will be discarded when + * they enter the switch port logic. When Broadcom tags are enabled, we + * need to make sure that packets are at least 70 bytes (including FCS + * and tag) because the length verification is done after the Broadcom + * tag is stripped off the ingress packet. + * + * Let dsa_user_xmit() free the SKB. + */ + if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false)) return NULL; - brcm_tag = dsa_etype_header_pos_rx(skb); + fcs_len = skb->len; + fcs_val = cpu_to_le32(crc32_le(~0, skb->data, fcs_len) ^ ~0); - source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; + skb_push(skb, BRCM_LEG_TAG_LEN); - skb->dev = dsa_conduit_find_user(dev, 0, source_port); - if (!skb->dev) - return NULL; + dsa_alloc_etype_header(skb, BRCM_LEG_TAG_LEN); - /* VLAN tag is added by BCM63xx internal switch */ - if (netdev_uses_dsa(skb->dev)) - len += VLAN_HLEN; + brcm_tag = skb->data + 2 * ETH_ALEN; - /* Remove Broadcom tag and update checksum */ - skb_pull_rcsum(skb, len); + /* Broadcom tag type */ + brcm_tag[0] = BRCM_LEG_TYPE_HI; + brcm_tag[1] = BRCM_LEG_TYPE_LO; - dsa_default_offload_fwd_mark(skb); + /* Broadcom tag value */ + brcm_tag[2] = BRCM_LEG_EGRESS | BRCM_LEG_LEN_HI(fcs_len); + brcm_tag[3] = BRCM_LEG_LEN_LO(fcs_len); + brcm_tag[4] = 0; + brcm_tag[5] = dp->index & BRCM_LEG_PORT_ID; - dsa_strip_etype_header(skb, len); + /* Original FCS value */ + if (__skb_pad(skb, ETH_FCS_LEN, false)) + return NULL; + skb_put_data(skb, &fcs_val, ETH_FCS_LEN); return skb; } -static const struct dsa_device_ops brcm_legacy_netdev_ops = { - .name = BRCM_LEGACY_NAME, - .proto = DSA_TAG_PROTO_BRCM_LEGACY, - .xmit = brcm_leg_tag_xmit, +static const struct dsa_device_ops brcm_legacy_fcs_netdev_ops = { + .name = BRCM_LEGACY_FCS_NAME, + .proto = DSA_TAG_PROTO_BRCM_LEGACY_FCS, + .xmit = brcm_leg_fcs_tag_xmit, .rcv = brcm_leg_tag_rcv, .needed_headroom = BRCM_LEG_TAG_LEN, }; -DSA_TAG_DRIVER(brcm_legacy_netdev_ops); -MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY, BRCM_LEGACY_NAME); -#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */ +DSA_TAG_DRIVER(brcm_legacy_fcs_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY_FCS, BRCM_LEGACY_FCS_NAME); +#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS */ #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, @@ -328,6 +396,9 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = { #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) &DSA_TAG_DRIVER_NAME(brcm_legacy_netdev_ops), #endif +#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS) + &DSA_TAG_DRIVER_NAME(brcm_legacy_fcs_netdev_ops), +#endif #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND) &DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops), #endif diff --git a/net/ethtool/common.c b/net/ethtool/common.c index eb253e0fd61b..d62dc56f2f5b 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -707,7 +707,9 @@ static u32 ethtool_get_max_rxfh_channel(struct net_device *dev) if (!rxfh.indir) return U32_MAX; + mutex_lock(&dev->ethtool->rss_lock); ret = dev->ethtool_ops->get_rxfh(dev, &rxfh); + mutex_unlock(&dev->ethtool->rss_lock); if (ret) { current_max = U32_MAX; goto out_free; diff --git a/net/ethtool/common.h b/net/ethtool/common.h index b4683d286a5a..c41db1595621 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -74,4 +74,12 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, bool __ethtool_dev_mm_supported(struct net_device *dev); +#if IS_ENABLED(CONFIG_ETHTOOL_NETLINK) +void ethtool_rss_notify(struct net_device *dev, u32 rss_context); +#else +static inline void ethtool_rss_notify(struct net_device *dev, u32 rss_context) +{ +} +#endif + #endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 71c828d0bf31..cccb4694f5e1 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -617,8 +617,8 @@ static int ethtool_set_link_ksettings(struct net_device *dev, err = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); if (err >= 0) { - ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL); - ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF); + ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF); } return err; } @@ -708,8 +708,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) __ETHTOOL_LINK_MODE_MASK_NU32; ret = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); if (ret >= 0) { - ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL); - ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF); + ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF); } return ret; } @@ -981,6 +981,7 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr, static bool flow_type_hashable(u32 flow_type) { switch (flow_type) { + case ETHER_FLOW: case TCP_V4_FLOW: case UDP_V4_FLOW: case SCTP_V4_FLOW: @@ -1037,22 +1038,21 @@ static int ethtool_check_xfrm_rxfh(u32 input_xfrm, u64 rxfh) static int ethtool_check_flow_types(struct net_device *dev, u32 input_xfrm) { const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rxnfc info = { - .cmd = ETHTOOL_GRXFH, - }; int err; u32 i; for (i = 0; i < __FLOW_TYPE_COUNT; i++) { + struct ethtool_rxfh_fields fields = { + .flow_type = i, + }; + if (!flow_type_hashable(i)) continue; - info.flow_type = i; - err = ops->get_rxnfc(dev, &info, NULL); - if (err) + if (ops->get_rxfh_fields(dev, &fields)) continue; - err = ethtool_check_xfrm_rxfh(input_xfrm, info.data); + err = ethtool_check_xfrm_rxfh(input_xfrm, fields.data); if (err) return err; } @@ -1060,6 +1060,89 @@ static int ethtool_check_flow_types(struct net_device *dev, u32 input_xfrm) return 0; } +static noinline_for_stack int +ethtool_set_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxfh_fields fields = {}; + struct ethtool_rxnfc info; + size_t info_size = sizeof(info); + int rc; + + if (!ops->set_rxfh_fields) + return -EOPNOTSUPP; + + rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr); + if (rc) + return rc; + + if (info.flow_type & FLOW_RSS && info.rss_context && + !ops->rxfh_per_ctx_fields) + return -EINVAL; + + mutex_lock(&dev->ethtool->rss_lock); + if (ops->get_rxfh) { + struct ethtool_rxfh_param rxfh = {}; + + rc = ops->get_rxfh(dev, &rxfh); + if (rc) + goto exit_unlock; + + rc = ethtool_check_xfrm_rxfh(rxfh.input_xfrm, info.data); + if (rc) + goto exit_unlock; + } + + fields.data = info.data; + fields.flow_type = info.flow_type & ~FLOW_RSS; + if (info.flow_type & FLOW_RSS) + fields.rss_context = info.rss_context; + + rc = ops->set_rxfh_fields(dev, &fields, NULL); +exit_unlock: + mutex_unlock(&dev->ethtool->rss_lock); + if (rc) + return rc; + + ethtool_rss_notify(dev, fields.rss_context); + return 0; +} + +static noinline_for_stack int +ethtool_get_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr) +{ + struct ethtool_rxnfc info; + size_t info_size = sizeof(info); + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxfh_fields fields = {}; + int ret; + + if (!ops->get_rxfh_fields) + return -EOPNOTSUPP; + + ret = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr); + if (ret) + return ret; + + if (info.flow_type & FLOW_RSS && info.rss_context && + !ops->rxfh_per_ctx_fields) + return -EINVAL; + + fields.flow_type = info.flow_type & ~FLOW_RSS; + if (info.flow_type & FLOW_RSS) + fields.rss_context = info.rss_context; + + mutex_lock(&dev->ethtool->rss_lock); + ret = ops->get_rxfh_fields(dev, &fields); + mutex_unlock(&dev->ethtool->rss_lock); + if (ret < 0) + return ret; + + info.data = fields.data; + + return ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL); +} + static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { @@ -1088,18 +1171,6 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return -EINVAL; } - if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) { - struct ethtool_rxfh_param rxfh = {}; - - rc = ops->get_rxfh(dev, &rxfh); - if (rc) - return rc; - - rc = ethtool_check_xfrm_rxfh(rxfh.input_xfrm, info.data); - if (rc) - return rc; - } - rc = ops->set_rxnfc(dev, &info); if (rc) return rc; @@ -1209,7 +1280,9 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, if (!rxfh.indir) return -ENOMEM; + mutex_lock(&dev->ethtool->rss_lock); ret = dev->ethtool_ops->get_rxfh(dev, &rxfh); + mutex_unlock(&dev->ethtool->rss_lock); if (ret) goto out; if (copy_to_user(useraddr + @@ -1274,9 +1347,11 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, } rxfh_dev.hfunc = ETH_RSS_HASH_NO_CHANGE; + + mutex_lock(&dev->ethtool->rss_lock); ret = ops->set_rxfh(dev, &rxfh_dev, extack); if (ret) - goto out; + goto out_unlock; /* indicate whether rxfh was set to default */ if (user_size == 0) @@ -1284,6 +1359,8 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, else dev->priv_flags |= IFF_RXFH_CONFIGURED; +out_unlock: + mutex_unlock(&dev->ethtool->rss_lock); out: kfree(rxfh_dev.indir); return ret; @@ -1319,8 +1396,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !(ops->cap_rss_ctx_supported || - ops->create_rxfh_context)) + if (rxfh.rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; rxfh.indir_size = rxfh_dev.indir_size; @@ -1344,6 +1420,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) rxfh_dev.key = rss_config + indir_bytes; + mutex_lock(&dev->ethtool->rss_lock); if (rxfh.rss_context) { ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); if (!ctx) { @@ -1389,6 +1466,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, ret = -EFAULT; } out: + mutex_unlock(&dev->ethtool->rss_lock); kfree(rss_config); return ret; @@ -1440,12 +1518,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; - bool locked = false; /* dev->ethtool->rss_lock taken */ bool create = false; + bool mod = false; u8 *rss_config; int ret; - if (!ops->get_rxnfc || !ops->set_rxfh) + if (!ops->get_rxnfc || !ops->get_rxfh_fields || !ops->set_rxfh) return -EOPNOTSUPP; if (ops->get_rxfh_indir_size) @@ -1460,8 +1538,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !(ops->cap_rss_ctx_supported || - ops->create_rxfh_context)) + if (rxfh.rss_context && !ops->create_rxfh_context) return -EOPNOTSUPP; /* Check input data transformation capabilities */ if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR && @@ -1489,10 +1566,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; - ret = ethtool_check_flow_types(dev, rxfh.input_xfrm); - if (ret) - return ret; - indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); /* Check settings which may be global rather than per RSS-context */ @@ -1509,7 +1582,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rx_rings.cmd = ETHTOOL_GRXRINGS; ret = ops->get_rxnfc(dev, &rx_rings, NULL); if (ret) - goto out; + goto out_free; /* rxfh.indir_size == 0 means reset the indir table to default (master * context) or delete the context (other RSS contexts). @@ -1525,7 +1598,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, &rx_rings, rxfh.indir_size); if (ret) - goto out; + goto out_free; } else if (rxfh.indir_size == 0) { if (rxfh.rss_context == 0) { u32 *indir; @@ -1547,87 +1620,78 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, useraddr + rss_cfg_offset + user_indir_len, rxfh.key_size)) { ret = -EFAULT; - goto out; + goto out_free; } } - if (rxfh.rss_context) { - mutex_lock(&dev->ethtool->rss_lock); - locked = true; - } + mutex_lock(&dev->ethtool->rss_lock); + + ret = ethtool_check_flow_types(dev, rxfh.input_xfrm); + if (ret) + goto out_unlock; if (rxfh.rss_context && rxfh_dev.rss_delete) { ret = ethtool_check_rss_ctx_busy(dev, rxfh.rss_context); if (ret) - goto out; + goto out_unlock; } if (create) { + u32 limit, ctx_id; + if (rxfh_dev.rss_delete) { ret = -EINVAL; - goto out; + goto out_unlock; } ctx = ethtool_rxfh_ctx_alloc(ops, dev_indir_size, dev_key_size); if (!ctx) { ret = -ENOMEM; - goto out; + goto out_unlock; } - if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; - u32 ctx_id; - - /* driver uses new API, core allocates ID */ - ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit - 1), - GFP_KERNEL_ACCOUNT); - if (ret < 0) { - kfree(ctx); - goto out; - } - WARN_ON(!ctx_id); /* can't happen */ - rxfh.rss_context = ctx_id; + limit = ops->rxfh_max_num_contexts ?: U32_MAX; + ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, + XA_LIMIT(1, limit - 1), GFP_KERNEL_ACCOUNT); + if (ret < 0) { + kfree(ctx); + goto out_unlock; } + WARN_ON(!ctx_id); /* can't happen */ + rxfh.rss_context = ctx_id; } else if (rxfh.rss_context) { ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); if (!ctx) { ret = -ENOENT; - goto out; + goto out_unlock; } } rxfh_dev.hfunc = rxfh.hfunc; rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; - if (rxfh.rss_context && ops->create_rxfh_context) { - if (create) { - ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, - extack); - /* Make sure driver populates defaults */ - WARN_ON_ONCE(!ret && !rxfh_dev.key && - ops->rxfh_per_ctx_key && - !memchr_inv(ethtool_rxfh_context_key(ctx), - 0, ctx->key_size)); - } else if (rxfh_dev.rss_delete) { - ret = ops->remove_rxfh_context(dev, ctx, - rxfh.rss_context, - extack); - } else { - ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, - extack); - } - } else { + if (!rxfh.rss_context) { ret = ops->set_rxfh(dev, &rxfh_dev, extack); + } else if (create) { + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack); + /* Make sure driver populates defaults */ + WARN_ON_ONCE(!ret && !rxfh_dev.key && ops->rxfh_per_ctx_key && + !memchr_inv(ethtool_rxfh_context_key(ctx), 0, + ctx->key_size)); + } else if (rxfh_dev.rss_delete) { + ret = ops->remove_rxfh_context(dev, ctx, rxfh.rss_context, + extack); + } else { + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, extack); } if (ret) { if (create) { /* failed to create, free our new tracking entry */ - if (ops->create_rxfh_context) - xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); kfree(ctx); } - goto out; + goto out_unlock; } + mod = !create && !rxfh_dev.rss_delete; if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) @@ -1641,36 +1705,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, dev->priv_flags |= IFF_RXFH_CONFIGURED; } /* Update rss_ctx tracking */ - if (create && !ops->create_rxfh_context) { - /* driver uses old API, it chose context ID */ - if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh_dev.rss_context))) { - /* context ID reused, our tracking is screwed */ - kfree(ctx); - goto out; - } - /* Allocate the exact ID the driver gave us */ - if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh_dev.rss_context, - ctx, GFP_KERNEL))) { - kfree(ctx); - goto out; - } - - /* Fetch the defaults for the old API, in the new API drivers - * should write defaults into ctx themselves. - */ - rxfh_dev.indir = (u32 *)rss_config; - rxfh_dev.indir_size = dev_indir_size; - - rxfh_dev.key = rss_config + indir_bytes; - rxfh_dev.key_size = dev_key_size; - - ret = ops->get_rxfh(dev, &rxfh_dev); - if (WARN_ON(ret)) { - xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); - kfree(ctx); - goto out; - } - } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); kfree(ctx); @@ -1693,10 +1727,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, ctx->input_xfrm = rxfh_dev.input_xfrm; } -out: - if (locked) - mutex_unlock(&dev->ethtool->rss_lock); +out_unlock: + mutex_unlock(&dev->ethtool->rss_lock); +out_free: kfree(rss_config); + if (mod) + ethtool_rss_notify(dev, rxfh.rss_context); return ret; } @@ -1808,7 +1844,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) return ret; dev->ethtool->wol_enabled = !!wol.wolopts; - ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF); return 0; } @@ -1884,7 +1920,7 @@ static int ethtool_set_eee(struct net_device *dev, char __user *useraddr) eee_to_keee(&keee, &eee); ret = dev->ethtool_ops->set_eee(dev, &keee); if (!ret) - ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF); return ret; } @@ -2124,7 +2160,7 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce, NULL); if (!ret) - ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF); return ret; } @@ -2168,7 +2204,7 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, NULL); if (!ret) - ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF); return ret; } @@ -2235,7 +2271,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, ret = dev->ethtool_ops->set_channels(dev, &channels); if (!ret) - ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF); return ret; } @@ -2266,7 +2302,7 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) ret = dev->ethtool_ops->set_pauseparam(dev, &pauseparam); if (!ret) - ethtool_notify(dev, ETHTOOL_MSG_PAUSE_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_PAUSE_NTF); return ret; } @@ -3268,7 +3304,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, rc = ethtool_set_value_void(dev, useraddr, dev->ethtool_ops->set_msglevel); if (!rc) - ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF); break; case ETHTOOL_GEEE: rc = ethtool_get_eee(dev, useraddr); @@ -3332,20 +3368,24 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr, rc = ethtool_get_value(dev, useraddr, ethcmd, dev->ethtool_ops->get_priv_flags); if (!rc) - ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF); break; case ETHTOOL_SPFLAGS: rc = ethtool_set_value(dev, useraddr, dev->ethtool_ops->set_priv_flags); break; case ETHTOOL_GRXFH: + rc = ethtool_get_rxfh_fields(dev, ethcmd, useraddr); + break; + case ETHTOOL_SRXFH: + rc = ethtool_set_rxfh_fields(dev, ethcmd, useraddr); + break; case ETHTOOL_GRXRINGS: case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: rc = ethtool_get_rxnfc(dev, ethcmd, useraddr); break; - case ETHTOOL_SRXFH: case ETHTOOL_SRXCLSRLDEL: case ETHTOOL_SRXCLSRLINS: rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 9de828df46cd..b1f8999c1adc 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -455,10 +455,15 @@ static int ethnl_default_parse(struct ethnl_req_info *req_info, if (request_ops->parse_request) { ret = request_ops->parse_request(req_info, tb, info->extack); if (ret < 0) - return ret; + goto err_dev; } return 0; + +err_dev: + netdev_put(req_info->dev, &req_info->dev_tracker); + req_info->dev = NULL; + return ret; } /** @@ -508,7 +513,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ret = ethnl_default_parse(req_info, info, ops, !ops->allow_nodev_do); if (ret < 0) - goto err_dev; + goto err_free; ethnl_init_reply_data(reply_data, ops, req_info->dev); rtnl_lock(); @@ -554,6 +559,7 @@ err_cleanup: ops->cleanup_data(reply_data); err_dev: netdev_put(req_info->dev, &req_info->dev_tracker); +err_free: kfree(reply_data); kfree(req_info); return ret; @@ -656,6 +662,8 @@ static int ethnl_default_start(struct netlink_callback *cb) } ret = ethnl_default_parse(req_info, &info->info, ops, false); + if (ret < 0) + goto free_reply_data; if (req_info->dev) { /* We ignore device specification in dump requests but as the * same parser as for non-dump (doit) requests is used, it @@ -664,8 +672,6 @@ static int ethnl_default_start(struct netlink_callback *cb) netdev_put(req_info->dev, &req_info->dev_tracker); req_info->dev = NULL; } - if (ret < 0) - goto free_reply_data; ctx->ops = ops; ctx->req_info = req_info; @@ -714,13 +720,13 @@ static int ethnl_perphy_start(struct netlink_callback *cb) * the dev's ifindex, .dumpit() will grab and release the netdev itself. */ ret = ethnl_default_parse(req_info, &info->info, ops, false); + if (ret < 0) + goto free_reply_data; if (req_info->dev) { phy_ctx->ifindex = req_info->dev->ifindex; netdev_put(req_info->dev, &req_info->dev_tracker); req_info->dev = NULL; } - if (ret < 0) - goto free_reply_data; ctx->ops = ops; ctx->req_info = req_info; @@ -863,8 +869,8 @@ static int ethnl_default_done(struct netlink_callback *cb) static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) { const struct ethnl_request_ops *ops; - struct ethnl_req_info req_info = {}; const u8 cmd = info->genlhdr->cmd; + struct ethnl_req_info *req_info; struct net_device *dev; int ret; @@ -874,20 +880,22 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr)) return -EINVAL; - ret = ethnl_parse_header_dev_get(&req_info, info->attrs[ops->hdr_attr], - genl_info_net(info), info->extack, - true); + req_info = kzalloc(ops->req_info_size, GFP_KERNEL); + if (!req_info) + return -ENOMEM; + + ret = ethnl_default_parse(req_info, info, ops, true); if (ret < 0) - return ret; + goto out_free_req; if (ops->set_validate) { - ret = ops->set_validate(&req_info, info); + ret = ops->set_validate(req_info, info); /* 0 means nothing to do */ if (ret <= 0) goto out_dev; } - dev = req_info.dev; + dev = req_info->dev; rtnl_lock(); netdev_lock_ops(dev); @@ -902,14 +910,14 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) if (ret < 0) goto out_free_cfg; - ret = ops->set(&req_info, info); + ret = ops->set(req_info, info); if (ret < 0) goto out_ops; swap(dev->cfg, dev->cfg_pending); if (!ret) goto out_ops; - ethtool_notify(dev, ops->set_ntf_cmd, NULL); + ethnl_notify(dev, ops->set_ntf_cmd, req_info); ret = 0; out_ops: @@ -921,7 +929,9 @@ out_tie_cfg: netdev_unlock_ops(dev); rtnl_unlock(); out_dev: - ethnl_parse_header_dev_put(&req_info); + ethnl_parse_header_dev_put(req_info); +out_free_req: + kfree(req_info); return ret; } @@ -942,11 +952,12 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_MODULE_NTF] = ðnl_module_request_ops, [ETHTOOL_MSG_PLCA_NTF] = ðnl_plca_cfg_request_ops, [ETHTOOL_MSG_MM_NTF] = ðnl_mm_request_ops, + [ETHTOOL_MSG_RSS_NTF] = ðnl_rss_request_ops, }; /* default notification handler */ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, - const void *data) + const struct ethnl_req_info *orig_req_info) { struct ethnl_reply_data *reply_data; const struct ethnl_request_ops *ops; @@ -975,6 +986,11 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, req_info->dev = dev; req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS; + if (orig_req_info) { + req_info->phy_index = orig_req_info->phy_index; + memcpy(&req_info[1], &orig_req_info[1], + ops->req_info_size - sizeof(*req_info)); + } netdev_ops_assert_locked(dev); @@ -1025,7 +1041,7 @@ err_rep: /* notifications */ typedef void (*ethnl_notify_handler_t)(struct net_device *dev, unsigned int cmd, - const void *data); + const struct ethnl_req_info *req_info); static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_LINKINFO_NTF] = ethnl_default_notify, @@ -1043,9 +1059,11 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_PLCA_NTF] = ethnl_default_notify, [ETHTOOL_MSG_MM_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_RSS_NTF] = ethnl_default_notify, }; -void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) +void ethnl_notify(struct net_device *dev, unsigned int cmd, + const struct ethnl_req_info *req_info) { if (unlikely(!ethnl_ok)) return; @@ -1053,18 +1071,23 @@ void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) if (likely(cmd < ARRAY_SIZE(ethnl_notify_handlers) && ethnl_notify_handlers[cmd])) - ethnl_notify_handlers[cmd](dev, cmd, data); + ethnl_notify_handlers[cmd](dev, cmd, req_info); else WARN_ONCE(1, "notification %u not implemented (dev=%s)\n", cmd, netdev_name(dev)); } + +void ethtool_notify(struct net_device *dev, unsigned int cmd) +{ + ethnl_notify(dev, cmd, NULL); +} EXPORT_SYMBOL(ethtool_notify); static void ethnl_notify_features(struct netdev_notifier_info *info) { struct net_device *dev = netdev_notifier_info_to_dev(info); - ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); + ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF); } static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 91b953924af3..94a7eb402022 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -23,6 +23,8 @@ void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd); void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd); void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd); int ethnl_multicast(struct sk_buff *skb, struct net_device *dev); +void ethnl_notify(struct net_device *dev, unsigned int cmd, + const struct ethnl_req_info *req_info); /** * ethnl_strz_size() - calculate attribute length for fixed size string @@ -337,6 +339,8 @@ int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, * header is already filled on entry, the rest up to @repdata_offset * is zero initialized. This callback should only modify type specific * request info by parsed attributes from request message. + * Called for both GET and SET. Information parsed for SET will + * be conveyed to the req_info used during NTF generation. * @prepare_data: * Retrieve and prepare data needed to compose a reply message. Calls to * ethtool_ops handlers are limited to this callback. Common reply data @@ -463,7 +467,7 @@ extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMB extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1]; extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC + 1]; -extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1]; +extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_STATS_SRC + 1]; extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1]; extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1]; extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1]; diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index f7c847aeb1a2..0f9af1e66548 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -168,6 +168,7 @@ const struct nla_policy ethnl_pause_set_policy[] = { [ETHTOOL_A_PAUSE_AUTONEG] = { .type = NLA_U8 }, [ETHTOOL_A_PAUSE_RX] = { .type = NLA_U8 }, [ETHTOOL_A_PAUSE_TX] = { .type = NLA_U8 }, + [ETHTOOL_A_PAUSE_STATS_SRC] = { .type = NLA_REJECT }, }; static int diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 4f6b99eab2a6..24def9c9dd54 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -11,6 +11,7 @@ #include "netlink.h" #include <linux/ethtool_netlink.h> #include <linux/ethtool.h> +#include <linux/export.h> #include <linux/phy.h> struct pse_req_info { @@ -83,6 +84,8 @@ static int pse_reply_size(const struct ethnl_req_info *req_base, const struct ethtool_pse_control_status *st = &data->status; int len = 0; + if (st->pw_d_id) + len += nla_total_size(sizeof(u32)); /* _PSE_PW_D_ID */ if (st->podl_admin_state > 0) len += nla_total_size(sizeof(u32)); /* _PODL_PSE_ADMIN_STATE */ if (st->podl_pw_status > 0) @@ -109,6 +112,9 @@ static int pse_reply_size(const struct ethnl_req_info *req_base, len += st->c33_pw_limit_nb_ranges * (nla_total_size(0) + nla_total_size(sizeof(u32)) * 2); + if (st->prio_max) + /* _PSE_PRIO_MAX + _PSE_PRIO */ + len += nla_total_size(sizeof(u32)) * 2; return len; } @@ -148,6 +154,11 @@ static int pse_fill_reply(struct sk_buff *skb, const struct pse_reply_data *data = PSE_REPDATA(reply_base); const struct ethtool_pse_control_status *st = &data->status; + if (st->pw_d_id && + nla_put_u32(skb, ETHTOOL_A_PSE_PW_D_ID, + st->pw_d_id)) + return -EMSGSIZE; + if (st->podl_admin_state > 0 && nla_put_u32(skb, ETHTOOL_A_PODL_PSE_ADMIN_STATE, st->podl_admin_state)) @@ -198,6 +209,11 @@ static int pse_fill_reply(struct sk_buff *skb, pse_put_pw_limit_ranges(skb, st)) return -EMSGSIZE; + if (st->prio_max && + (nla_put_u32(skb, ETHTOOL_A_PSE_PRIO_MAX, st->prio_max) || + nla_put_u32(skb, ETHTOOL_A_PSE_PRIO, st->prio))) + return -EMSGSIZE; + return 0; } @@ -219,6 +235,7 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = { NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED, ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED), [ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT] = { .type = NLA_U32 }, + [ETHTOOL_A_PSE_PRIO] = { .type = NLA_U32 }, }; static int @@ -267,6 +284,15 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) if (ret) return ret; + if (tb[ETHTOOL_A_PSE_PRIO]) { + unsigned int prio; + + prio = nla_get_u32(tb[ETHTOOL_A_PSE_PRIO]); + ret = pse_ethtool_set_prio(phydev->psec, info->extack, prio); + if (ret) + return ret; + } + if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) { unsigned int pw_limit; @@ -315,3 +341,42 @@ const struct ethnl_request_ops ethnl_pse_request_ops = { .set = ethnl_set_pse, /* PSE has no notification */ }; + +void ethnl_pse_send_ntf(struct net_device *netdev, unsigned long notifs) +{ + void *reply_payload; + struct sk_buff *skb; + int reply_len; + int ret; + + ASSERT_RTNL(); + + if (!netdev || !notifs) + return; + + reply_len = ethnl_reply_header_size() + + nla_total_size(sizeof(u32)); /* _PSE_NTF_EVENTS */ + + skb = genlmsg_new(reply_len, GFP_KERNEL); + if (!skb) + return; + + reply_payload = ethnl_bcastmsg_put(skb, ETHTOOL_MSG_PSE_NTF); + if (!reply_payload) + goto err_skb; + + ret = ethnl_fill_reply_header(skb, netdev, ETHTOOL_A_PSE_NTF_HEADER); + if (ret < 0) + goto err_skb; + + if (nla_put_uint(skb, ETHTOOL_A_PSE_NTF_EVENTS, notifs)) + goto err_skb; + + genlmsg_end(skb, reply_payload); + ethnl_multicast(skb, netdev); + return; + +err_skb: + nlmsg_free(skb); +} +EXPORT_SYMBOL_GPL(ethnl_pse_send_ntf); diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 6d9b1769896b..41ab9fc67652 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -12,6 +12,7 @@ struct rss_req_info { struct rss_reply_data { struct ethnl_reply_data base; + bool has_flow_hash; bool no_key_fields; u32 indir_size; u32 hkey_size; @@ -19,6 +20,37 @@ struct rss_reply_data { u32 input_xfrm; u32 *indir_table; u8 *hkey; + int flow_hash[__ETHTOOL_A_FLOW_CNT]; +}; + +static const u8 ethtool_rxfh_ft_nl2ioctl[] = { + [ETHTOOL_A_FLOW_ETHER] = ETHER_FLOW, + [ETHTOOL_A_FLOW_IP4] = IPV4_FLOW, + [ETHTOOL_A_FLOW_IP6] = IPV6_FLOW, + [ETHTOOL_A_FLOW_TCP4] = TCP_V4_FLOW, + [ETHTOOL_A_FLOW_UDP4] = UDP_V4_FLOW, + [ETHTOOL_A_FLOW_SCTP4] = SCTP_V4_FLOW, + [ETHTOOL_A_FLOW_AH_ESP4] = AH_ESP_V4_FLOW, + [ETHTOOL_A_FLOW_TCP6] = TCP_V6_FLOW, + [ETHTOOL_A_FLOW_UDP6] = UDP_V6_FLOW, + [ETHTOOL_A_FLOW_SCTP6] = SCTP_V6_FLOW, + [ETHTOOL_A_FLOW_AH_ESP6] = AH_ESP_V6_FLOW, + [ETHTOOL_A_FLOW_AH4] = AH_V4_FLOW, + [ETHTOOL_A_FLOW_ESP4] = ESP_V4_FLOW, + [ETHTOOL_A_FLOW_AH6] = AH_V6_FLOW, + [ETHTOOL_A_FLOW_ESP6] = ESP_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU4] = GTPU_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU6] = GTPU_V6_FLOW, + [ETHTOOL_A_FLOW_GTPC4] = GTPC_V4_FLOW, + [ETHTOOL_A_FLOW_GTPC6] = GTPC_V6_FLOW, + [ETHTOOL_A_FLOW_GTPC_TEID4] = GTPC_TEID_V4_FLOW, + [ETHTOOL_A_FLOW_GTPC_TEID6] = GTPC_TEID_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU_EH4] = GTPU_EH_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU_EH6] = GTPU_EH_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU_UL4] = GTPU_UL_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU_UL6] = GTPU_UL_V6_FLOW, + [ETHTOOL_A_FLOW_GTPU_DL4] = GTPU_DL_V4_FLOW, + [ETHTOOL_A_FLOW_GTPU_DL6] = GTPU_DL_V6_FLOW, }; #define RSS_REQINFO(__req_base) \ @@ -49,6 +81,37 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, return 0; } +static void +rss_prepare_flow_hash(const struct rss_req_info *req, struct net_device *dev, + struct rss_reply_data *data, const struct genl_info *info) +{ + int i; + + data->has_flow_hash = false; + + if (!dev->ethtool_ops->get_rxfh_fields) + return; + if (req->rss_context && !dev->ethtool_ops->rxfh_per_ctx_fields) + return; + + mutex_lock(&dev->ethtool->rss_lock); + for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++) { + struct ethtool_rxfh_fields fields = { + .flow_type = ethtool_rxfh_ft_nl2ioctl[i], + .rss_context = req->rss_context, + }; + + if (dev->ethtool_ops->get_rxfh_fields(dev, &fields)) { + data->flow_hash[i] = -1; /* Unsupported */ + continue; + } + + data->flow_hash[i] = fields.data; + data->has_flow_hash = true; + } + mutex_unlock(&dev->ethtool->rss_lock); +} + static int rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, struct rss_reply_data *data, const struct genl_info *info) @@ -64,6 +127,7 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, ret = ethnl_ops_begin(dev); if (ret < 0) return ret; + mutex_lock(&dev->ethtool->rss_lock); data->indir_size = 0; data->hkey_size = 0; @@ -77,7 +141,7 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, rss_config = kzalloc(total_size, GFP_KERNEL); if (!rss_config) { ret = -ENOMEM; - goto out_ops; + goto out_unlock; } if (data->indir_size) @@ -92,11 +156,12 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, ret = ops->get_rxfh(dev, &rxfh); if (ret) - goto out_ops; + goto out_unlock; data->hfunc = rxfh.hfunc; data->input_xfrm = rxfh.input_xfrm; -out_ops: +out_unlock: + mutex_unlock(&dev->ethtool->rss_lock); ethnl_ops_complete(dev); return ret; } @@ -108,12 +173,16 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, struct ethtool_rxfh_context *ctx; u32 total_size, indir_bytes; u8 *rss_config; + int ret; data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + mutex_lock(&dev->ethtool->rss_lock); ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); - if (!ctx) - return -ENOENT; + if (!ctx) { + ret = -ENOENT; + goto out_unlock; + } data->indir_size = ctx->indir_size; data->hkey_size = ctx->key_size; @@ -123,8 +192,10 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, indir_bytes = data->indir_size * sizeof(u32); total_size = indir_bytes + data->hkey_size; rss_config = kzalloc(total_size, GFP_KERNEL); - if (!rss_config) - return -ENOMEM; + if (!rss_config) { + ret = -ENOMEM; + goto out_unlock; + } data->indir_table = (u32 *)rss_config; memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes); @@ -135,7 +206,21 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, data->hkey_size); } - return 0; + ret = 0; +out_unlock: + mutex_unlock(&dev->ethtool->rss_lock); + return ret; +} + +static int +rss_prepare(const struct rss_req_info *request, struct net_device *dev, + struct rss_reply_data *data, const struct genl_info *info) +{ + rss_prepare_flow_hash(request, dev, data, info); + + if (request->rss_context) + return rss_prepare_ctx(request, dev, data, info); + return rss_prepare_get(request, dev, data, info); } static int @@ -153,14 +238,10 @@ rss_prepare_data(const struct ethnl_req_info *req_base, return -EOPNOTSUPP; /* Some drivers don't handle rss_context */ - if (request->rss_context) { - if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) - return -EOPNOTSUPP; - - return rss_prepare_ctx(request, dev, data, info); - } + if (request->rss_context && !ops->create_rxfh_context) + return -EOPNOTSUPP; - return rss_prepare_get(request, dev, data, info); + return rss_prepare(request, dev, data, info); } static int @@ -174,7 +255,10 @@ rss_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ - nla_total_size(data->hkey_size); /* _RSS_HKEY */ + nla_total_size(data->hkey_size) + /* _RSS_HKEY */ + nla_total_size(0) + /* _RSS_FLOW_HASH */ + nla_total_size(sizeof(u32)) * ETHTOOL_A_FLOW_MAX + + 0; return len; } @@ -195,17 +279,34 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, sizeof(u32) * data->indir_size, data->indir_table))) return -EMSGSIZE; - if (data->no_key_fields) - return 0; - - if ((data->hfunc && - nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || - (data->input_xfrm && - nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || - (data->hkey_size && - nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))) + if (!data->no_key_fields && + ((data->hfunc && + nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || + (data->input_xfrm && + nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || + (data->hkey_size && + nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))) return -EMSGSIZE; + if (data->has_flow_hash) { + struct nlattr *nest; + int i; + + nest = nla_nest_start(skb, ETHTOOL_A_RSS_FLOW_HASH); + if (!nest) + return -EMSGSIZE; + + for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++) { + if (data->flow_hash[i] >= 0 && + nla_put_uint(skb, i, data->flow_hash[i])) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + } + + nla_nest_end(skb, nest); + } + return 0; } @@ -284,11 +385,7 @@ rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb, if (ret < 0) goto err_cancel; - /* Context 0 is not currently storred or cached in the XArray */ - if (!rss_context) - ret = rss_prepare_get(&req, dev, &data, info); - else - ret = rss_prepare_ctx(&req, dev, &data, info); + ret = rss_prepare(&req, dev, &data, info); if (ret) goto err_cancel; @@ -358,6 +455,17 @@ int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return ret; } +/* RSS_NTF */ + +void ethtool_rss_notify(struct net_device *dev, u32 rss_context) +{ + struct rss_req_info req_info = { + .rss_context = rss_context, + }; + + ethnl_notify(dev, ETHTOOL_MSG_RSS_NTF, &req_info.base); +} + const struct ethnl_request_ops ethnl_rss_request_ops = { .request_cmd = ETHTOOL_MSG_RSS_GET, .reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY, diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index d6f52839827e..081093dfd553 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -230,6 +230,12 @@ static int tls_handshake_accept(struct handshake_req *req, if (ret < 0) goto out_cancel; } + if (treq->th_keyring) { + ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_KEYRING, + treq->th_keyring); + if (ret < 0) + goto out_cancel; + } ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_AUTH_MODE, treq->th_auth_mode); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a648fff71ea7..c0440d61cf2f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -966,6 +966,7 @@ static int arp_is_multicast(const void *pkey) static int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + enum skb_drop_reason drop_reason; const struct arphdr *arp; /* do not tweak dropwatch on an ARP we will ignore */ @@ -979,12 +980,15 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, goto out_of_mem; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull(skb, arp_hdr_len(dev))) + drop_reason = pskb_may_pull_reason(skb, arp_hdr_len(dev)); + if (drop_reason != SKB_NOT_DROPPED_YET) goto freeskb; arp = arp_hdr(skb); - if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4) + if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4) { + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; goto freeskb; + } memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); @@ -996,7 +1000,7 @@ consumeskb: consume_skb(skb); return NET_RX_SUCCESS; freeskb: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); out_of_mem: return NET_RX_DROP; } diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 4b5bc6eb52e7..c2b2cda1a7e5 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -109,7 +109,7 @@ void ip4_datagram_release_cb(struct sock *sk) rcu_read_lock(); dst = __sk_dst_get(sk); - if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) { + if (!dst || !READ_ONCE(dst->obsolete) || dst->ops->check(dst, 0)) { rcu_read_unlock(); return; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d643bd1a0d9d..a2f04992f579 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -625,11 +625,6 @@ int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, if (encap) { struct lwtunnel_state *lwtstate; - if (encap_type == LWTUNNEL_ENCAP_NONE) { - NL_SET_ERR_MSG(extack, "LWT encap type not specified"); - err = -EINVAL; - goto lwt_failure; - } err = lwtunnel_build_state(net, encap_type, encap, nhc->nhc_family, cfg, &lwtstate, extack); @@ -1640,8 +1635,7 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) goto nla_put_failure; - if (nhc->nhc_lwtstate && - lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, + if (lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 717cb7d3607a..2ffe73ea644f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -311,18 +311,20 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, { struct dst_entry *dst = &rt->dst; struct inet_peer *peer; + struct net_device *dev; bool rc = true; if (!apply_ratelimit) return true; /* No rate limit on loopback */ - if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) + dev = dst_dev(dst); + if (dev && (dev->flags & IFF_LOOPBACK)) goto out; rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, - l3mdev_master_ifindex_rcu(dst->dev)); + l3mdev_master_ifindex_rcu(dev)); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); rcu_read_unlock(); @@ -466,13 +468,13 @@ out_bh_enable: */ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb) { - struct net_device *route_lookup_dev = NULL; + struct net_device *dev = skb->dev; + const struct dst_entry *dst; - if (skb->dev) - route_lookup_dev = skb->dev; - else if (skb_dst(skb)) - route_lookup_dev = skb_dst(skb)->dev; - return route_lookup_dev; + if (dev) + return dev; + dst = skb_dst(skb); + return dst ? dst_dev(dst) : NULL; } static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, @@ -869,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) struct net *net; u32 info = 0; - net = dev_net_rcu(skb_dst(skb)->dev); + net = skb_dst_dev_net_rcu(skb); /* * Incomplete header ? @@ -1012,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) struct icmp_bxm icmp_param; struct net *net; - net = dev_net_rcu(skb_dst(skb)->dev); + net = skb_dst_dev_net_rcu(skb); /* should there be an ICMP stat for ignored echos? */ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; @@ -1182,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) return SKB_NOT_DROPPED_YET; out_err: - __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(skb_dst_dev_net_rcu(skb), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d1769034b643..7182f1419c2a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -427,7 +427,7 @@ static int igmpv3_sendpack(struct sk_buff *skb) pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); - return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); + return ip_local_out(skb_dst_dev_net(skb), skb->sk, skb); } static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6906bedad19a..1e2df51427fe 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -168,7 +168,7 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk) } static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, - kuid_t sk_uid, bool relax, + kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { int bound_dev_if2; @@ -185,12 +185,12 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, if (!relax || (!reuseport_ok && sk->sk_reuseport && sk2->sk_reuseport && reuseport_cb_ok && (sk2->sk_state == TCP_TIME_WAIT || - uid_eq(sk_uid, sock_i_uid(sk2))))) + uid_eq(uid, sk_uid(sk2))))) return true; } else if (!reuseport_ok || !sk->sk_reuseport || !sk2->sk_reuseport || !reuseport_cb_ok || (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(sk_uid, sock_i_uid(sk2)))) { + !uid_eq(uid, sk_uid(sk2)))) { return true; } } @@ -198,7 +198,7 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, } static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, - kuid_t sk_uid, bool relax, + kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { if (ipv6_only_sock(sk2)) { @@ -211,20 +211,20 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, #endif } - return inet_bind_conflict(sk, sk2, sk_uid, relax, + return inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok); } static bool inet_bhash2_conflict(const struct sock *sk, const struct inet_bind2_bucket *tb2, - kuid_t sk_uid, + kuid_t uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { struct sock *sk2; sk_for_each_bound(sk2, &tb2->owners) { - if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, + if (__inet_bhash2_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok)) return true; } @@ -242,8 +242,8 @@ static int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind2_bucket *tb2, /* may be null */ bool relax, bool reuseport_ok) { - kuid_t uid = sock_i_uid((struct sock *)sk); struct sock_reuseport *reuseport_cb; + kuid_t uid = sk_uid(sk); bool reuseport_cb_ok; struct sock *sk2; @@ -287,11 +287,11 @@ static int inet_csk_bind_conflict(const struct sock *sk, static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev, bool relax, bool reuseport_ok) { - kuid_t uid = sock_i_uid((struct sock *)sk); const struct net *net = sock_net(sk); struct sock_reuseport *reuseport_cb; struct inet_bind_hashbucket *head2; struct inet_bind2_bucket *tb2; + kuid_t uid = sk_uid(sk); bool conflict = false; bool reuseport_cb_ok; @@ -425,15 +425,13 @@ success: static inline int sk_reuseport_match(struct inet_bind_bucket *tb, struct sock *sk) { - kuid_t uid = sock_i_uid(sk); - if (tb->fastreuseport <= 0) return 0; if (!sk->sk_reuseport) return 0; if (rcu_access_pointer(sk->sk_reuseport_cb)) return 0; - if (!uid_eq(tb->fastuid, uid)) + if (!uid_eq(tb->fastuid, sk_uid(sk))) return 0; /* We only need to check the rcv_saddr if this tb was once marked * without fastreuseport and then was reset, as we can only know that @@ -458,14 +456,13 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct sock *sk) { - kuid_t uid = sock_i_uid(sk); bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; if (hlist_empty(&tb->bhash2)) { tb->fastreuse = reuse; if (sk->sk_reuseport) { tb->fastreuseport = FASTREUSEPORT_ANY; - tb->fastuid = uid; + tb->fastuid = sk_uid(sk); tb->fast_rcv_saddr = sk->sk_rcv_saddr; tb->fast_ipv6_only = ipv6_only_sock(sk); tb->fast_sk_family = sk->sk_family; @@ -492,7 +489,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, */ if (!sk_reuseport_match(tb, sk)) { tb->fastreuseport = FASTREUSEPORT_STRICT; - tb->fastuid = uid; + tb->fastuid = sk_uid(sk); tb->fast_rcv_saddr = sk->sk_rcv_saddr; tb->fast_ipv6_only = ipv6_only_sock(sk); tb->fast_sk_family = sk->sk_family; @@ -812,7 +809,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sk->sk_uid); + htons(ireq->ir_num), sk_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -849,7 +846,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num), sk->sk_uid); + htons(ireq->ir_num), sk_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -887,15 +884,6 @@ static void syn_ack_recalc(struct request_sock *req, req->num_timeout >= rskq_defer_accept - 1; } -int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) -{ - int err = req->rsk_ops->rtx_syn_ack(parent, req); - - if (!err) - req->num_retrans++; - return err; -} - static struct request_sock * reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) @@ -1135,7 +1123,7 @@ static void reqsk_timer_handler(struct timer_list *t) req->rsk_ops->syn_ack_timeout(req); if (!expire && (!resend || - !inet_rtx_syn_ack(sk_listener, req) || + !tcp_rtx_synack(sk_listener, req) || inet_rsk(req)->acked)) { if (req->num_timeout++ == 0) atomic_dec(&queue->young); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1d1d6ad53f4c..2fa53b16fe77 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -181,7 +181,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, goto errout; #endif - r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + r->idiag_uid = from_kuid_munged(user_ns, sk_uid(sk)); r->idiag_inode = sock_i_ino(sk); memset(&inet_sockopt, 0, sizeof(inet_sockopt)); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 77a0b52b2eab..ceeeec9b7290 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -721,8 +721,8 @@ static int inet_reuseport_add_sock(struct sock *sk, { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; const struct hlist_nulls_node *node; + kuid_t uid = sk_uid(sk); struct sock *sk2; - kuid_t uid = sock_i_uid(sk); sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && @@ -730,7 +730,7 @@ static int inet_reuseport_add_sock(struct sock *sk, ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && inet_csk(sk2)->icsk_bind_hash == tb && - sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && + sk2->sk_reuseport && uid_eq(uid, sk_uid(sk2)) && inet_rcv_saddr_equal(sk, sk2, false)) return reuseport_add_sock(sk, sk2, inet_rcv_saddr_any(sk)); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 64b3fb3208af..b2584cce90ae 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -476,7 +476,7 @@ out_fail: /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { - struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; + struct net_device *dev = skb->dev ? : skb_dst_dev(skb); int vif = l3mdev_master_ifindex_rcu(dev); struct ipq *qp; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 5a49eb99e5c4..fc323994b1fa 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -226,6 +226,12 @@ resubmit: static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); + return 0; + } + skb_clear_delivery_time(skb); __skb_pull(skb, skb_network_header_len(skb)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a2705d454fd6..10a1d182fd84 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -116,7 +116,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst(skb)->dev, + net, sk, skb, NULL, skb_dst_dev(skb), dst_output); } @@ -199,7 +199,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = dst_rtable(dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; @@ -425,7 +425,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; + struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev; skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -1222,8 +1222,7 @@ alloc_new_skb: if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) goto error; copy = err; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 678b8f96e3e9..aaeb5d16f0c9 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -668,7 +668,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_adj_headroom(dev, headroom); iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, - df, !net_eq(tunnel->net, dev_net(dev))); + df, !net_eq(tunnel->net, dev_net(dev)), 0); return; tx_error: DEV_STATS_INC(dev, tx_errors); @@ -857,7 +857,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_adj_headroom(dev, max_headroom); iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, - df, !net_eq(tunnel->net, dev_net(dev))); + df, !net_eq(tunnel->net, dev_net(dev)), 0); return; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index f65d2f727381..cc9915543637 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -49,7 +49,8 @@ EXPORT_SYMBOL(ip6tun_encaps); void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, - __u8 tos, __u8 ttl, __be16 df, bool xnet) + __u8 tos, __u8 ttl, __be16 df, bool xnet, + u16 ipcb_flags) { int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); @@ -62,6 +63,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_clear_hash_if_not_l4(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + IPCB(skb)->flags = ipcb_flags; /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 686e4f3d83aa..95b6bb78fcd2 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -229,7 +229,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error_icmp; } - tdev = dst->dev; + tdev = dst_dev(dst); if (tdev == dev) { dst_release(dst); @@ -259,7 +259,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, xmit: skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; + skb->dev = skb_dst_dev(skb); err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index c56b6fe6f0d7..22a7889876c1 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -274,9 +274,9 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; - next_msg = start + msecs_to_jiffies(20000); + next_msg = start + secs_to_jiffies(20); while (time_before(jiffies, start + - msecs_to_jiffies(carrier_timeout * 1000))) { + secs_to_jiffies(carrier_timeout))) { int wait, elapsed; rtnl_lock(); @@ -295,7 +295,7 @@ static int __init ic_open_devs(void) elapsed = jiffies_to_msecs(jiffies - start); wait = (carrier_timeout * 1000 - elapsed + 500) / 1000; pr_info("Waiting up to %d more seconds for network.\n", wait); - next_msg = jiffies + msecs_to_jiffies(20000); + next_msg = jiffies + secs_to_jiffies(20); } have_carrier: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a7d09ae9d761..3a2044e6033d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1853,20 +1853,19 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, /* Processing handlers for ipmr_forward, under rcu_read_lock() */ -static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, - int in_vifi, struct sk_buff *skb, int vifi) +static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *vif_dev; - struct net_device *dev; struct rtable *rt; struct flowi4 fl4; int encap = 0; vif_dev = vif_dev_read(vif); if (!vif_dev) - goto out_free; + return -1; if (vif->flags & VIFF_REGISTER) { WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); @@ -1874,12 +1873,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); DEV_STATS_INC(vif_dev, tx_packets); ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); - goto out_free; + return -1; } - if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) - goto out_free; - if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, vif->remote, vif->local, @@ -1887,7 +1883,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, IPPROTO_IPIP, iph->tos & INET_DSCP_MASK, vif->link); if (IS_ERR(rt)) - goto out_free; + return -1; encap = sizeof(struct iphdr); } else { rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0, @@ -1895,11 +1891,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, IPPROTO_IPIP, iph->tos & INET_DSCP_MASK, vif->link); if (IS_ERR(rt)) - goto out_free; + return -1; } - dev = rt->dst.dev; - if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not * allow to send ICMP, so that packets will disappear @@ -1907,14 +1901,14 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, */ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); - goto out_free; + return -1; } - encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; + encap += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; if (skb_cow(skb, encap)) { ip_rt_put(rt); - goto out_free; + return -1; } WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); @@ -1934,6 +1928,22 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); } + return 0; +} + +static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt, + int in_vifi, struct sk_buff *skb, int vifi) +{ + struct rtable *rt; + + if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) + goto out_free; + + if (ipmr_prepare_xmit(net, mrt, skb, vifi)) + goto out_free; + + rt = skb_rtable(skb); + IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally @@ -1947,7 +1957,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * result in receiving multiple packets. */ NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, - net, NULL, skb, skb->dev, dev, + net, NULL, skb, skb->dev, rt->dst.dev, ipmr_forward_finish); return; @@ -1955,6 +1965,19 @@ out_free: kfree_skb(skb); } +static void ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) +{ + if (ipmr_prepare_xmit(net, mrt, skb, vifi)) + goto out_free; + + ip_mc_output(net, NULL, skb); + return; + +out_free: + kfree_skb(skb); +} + /* Called with mrt_lock or rcu_read_lock() */ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) { @@ -2065,8 +2088,8 @@ forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, true_vifi, - skb2, psend); + ipmr_queue_fwd_xmit(net, mrt, true_vifi, + skb2, psend); } psend = ct; } @@ -2077,10 +2100,10 @@ last_forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, true_vifi, skb2, - psend); + ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb2, + psend); } else { - ipmr_queue_xmit(net, mrt, true_vifi, skb, psend); + ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb, psend); return; } } @@ -2214,6 +2237,110 @@ dont_forward: return 0; } +static void ip_mr_output_finish(struct net *net, struct mr_table *mrt, + struct net_device *dev, struct sk_buff *skb, + struct mfc_cache *c) +{ + int psend = -1; + int ct; + + atomic_long_inc(&c->_c.mfc_un.res.pkt); + atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); + WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); + + /* Forward the frame */ + if (c->mfc_origin == htonl(INADDR_ANY) && + c->mfc_mcastgrp == htonl(INADDR_ANY)) { + if (ip_hdr(skb)->ttl > + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = c->_c.mfc_parent; + goto last_xmit; + } + goto dont_xmit; + } + + for (ct = c->_c.mfc_un.res.maxvif - 1; + ct >= c->_c.mfc_un.res.minvif; ct--) { + if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { + if (psend != -1) { + struct sk_buff *skb2; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) + ipmr_queue_output_xmit(net, mrt, + skb2, psend); + } + psend = ct; + } + } + +last_xmit: + if (psend != -1) { + ipmr_queue_output_xmit(net, mrt, skb, psend); + return; + } + +dont_xmit: + kfree_skb(skb); +} + +/* Multicast packets for forwarding arrive here + * Called with rcu_read_lock(); + */ +int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct rtable *rt = skb_rtable(skb); + struct mfc_cache *cache; + struct net_device *dev; + struct mr_table *mrt; + int vif; + + guard(rcu)(); + + dev = rt->dst.dev; + + if (IPCB(skb)->flags & IPSKB_FORWARDED) + goto mc_output; + if (!(IPCB(skb)->flags & IPSKB_MCROUTE)) + goto mc_output; + + skb->dev = dev; + + mrt = ipmr_rt_fib_lookup(net, skb); + if (IS_ERR(mrt)) + goto mc_output; + + cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); + if (!cache) { + vif = ipmr_find_vif(mrt, dev); + if (vif >= 0) + cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr, + vif); + } + + /* No usable cache entry */ + if (!cache) { + vif = ipmr_find_vif(mrt, dev); + if (vif >= 0) + return ipmr_cache_unresolved(mrt, vif, skb, dev); + goto mc_output; + } + + vif = cache->_c.mfc_parent; + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) + goto mc_output; + + ip_mr_output_finish(net, mrt, dev, skb, cache); + return 0; + +mc_output: + return ip_mc_output(net, sk, skb); +} + #ifdef CONFIG_IP_PIMSM_V1 /* Handle IGMP messages of PIMv1 */ int pim_rcv_v1(struct sk_buff *skb) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 08bc3f2c0078..0565f001120d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -20,12 +20,12 @@ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { + struct net_device *dev = skb_dst_dev(skb); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; __u8 flags; - struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; @@ -74,7 +74,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un #endif /* Change in oif may mean change in hh_len. */ - hh_len = skb_dst(skb)->dev->hard_header_len; + hh_len = skb_dst_dev(skb)->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 4397e89d3123..e808801ab9b8 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -985,8 +985,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, break; } - if (nhi->fib_nhc.nhc_lwtstate && - lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, + if (lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, NHA_ENCAP, NHA_ENCAP_TYPE) < 0) goto nla_put_failure; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index c14baa6589c7..031df4c19fcc 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -781,7 +781,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, ipc.tos & INET_DSCP_MASK, scope, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, - saddr, 0, 0, sk->sk_uid); + saddr, 0, 0, sk_uid(sk)); fl4.fl4_icmp_type = user_icmph.type; fl4.fl4_icmp_code = user_icmph.code; @@ -1116,7 +1116,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + from_kuid_munged(seq_user_ns(f), sk_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6aace4d55733..1d2c89d63cc7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -610,7 +610,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) hdrincl ? ipc.protocol : sk->sk_protocol, inet_sk_flowi_flags(sk) | (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0, sk->sk_uid); + daddr, saddr, 0, 0, sk_uid(sk)); fl4.fl4_icmp_type = 0; fl4.fl4_icmp_code = 0; @@ -1043,7 +1043,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fccb05fb3a79..64ba377cd6cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -413,7 +413,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct neighbour *n; rcu_read_lock(); @@ -440,7 +440,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); const __be32 *pkey = daddr; if (rt->rt_gw_family == AF_INET) { @@ -556,7 +556,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0, sk->sk_uid); + daddr, inet->inet_saddr, 0, 0, + sk_uid(sk)); rcu_read_unlock(); } @@ -716,7 +717,7 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, */ rt = rcu_dereference(nhc->nhc_rth_input); if (rt) - rt->dst.obsolete = DST_OBSOLETE_KILL; + WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL); for_each_possible_cpu(i) { struct rtable __rcu **prt; @@ -724,7 +725,7 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) - rt->dst.obsolete = DST_OBSOLETE_KILL; + WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL); } } @@ -796,7 +797,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow jiffies + ip_rt_gc_timeout); } if (kill_route) - rt->dst.obsolete = DST_OBSOLETE_KILL; + WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } neigh_release(n); @@ -841,9 +842,9 @@ static void ipv4_negative_advice(struct sock *sk, { struct rtable *rt = dst_rtable(dst); - if ((dst->obsolete > 0) || + if ((READ_ONCE(dst->obsolete) > 0) || (rt->rt_flags & RTCF_REDIRECTED) || - rt->dst.expires) + READ_ONCE(rt->dst.expires)) sk_dst_reset(sk); } @@ -1025,14 +1026,15 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); } if (rt->rt_pmtu == mtu && !lock && - time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) + time_before(jiffies, READ_ONCE(dst->expires) - + net->ipv4.ip_rt_mtu_expires / 2)) goto out; if (fib_lookup(net, fl4, &res, 0) == 0) { @@ -1135,7 +1137,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = dst_rtable(odst); - if (odst->obsolete && !odst->ops->check(odst, 0)) { + if (READ_ONCE(odst->obsolete) && !odst->ops->check(odst, 0)) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; @@ -1210,7 +1212,8 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, * this is indicated by setting obsolete to DST_OBSOLETE_KILL or * DST_OBSOLETE_DEAD. */ - if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) + if (READ_ONCE(dst->obsolete) != DST_OBSOLETE_FORCE_CHK || + rt_is_expired(rt)) return NULL; return dst; } @@ -1323,7 +1326,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, net->ipv4.ip_rt_min_advmss); rcu_read_unlock(); @@ -1570,7 +1573,7 @@ void rt_flush_dev(struct net_device *dev) static bool rt_cache_valid(const struct rtable *rt) { return rt && - rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK && !rt_is_expired(rt); } @@ -1684,8 +1687,8 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) else if (rt->rt_gw_family == AF_INET6) new_rt->rt_gw6 = rt->rt_gw6; - new_rt->dst.input = rt->dst.input; - new_rt->dst.output = rt->dst.output; + new_rt->dst.input = READ_ONCE(rt->dst.input); + new_rt->dst.output = READ_ONCE(rt->dst.output); new_rt->dst.error = rt->dst.error; new_rt->dst.lastuse = jiffies; new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate); @@ -2660,7 +2663,7 @@ add: if (IN_DEV_MFORWARD(in_dev) && !ipv4_is_local_multicast(fl4->daddr)) { rth->dst.input = ip_mr_input; - rth->dst.output = ip_mc_output; + rth->dst.output = ip_mr_output; } } #endif @@ -2977,8 +2980,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) goto nla_put_failure; - if (rt->dst.lwtstate && - lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) + if (lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (rt->dst.tclassid && @@ -3009,7 +3011,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, } } - expires = rt->dst.expires; + expires = READ_ONCE(rt->dst.expires); if (expires) { unsigned long now = jiffies; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5459a78b9809..eb0819463fae 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -454,7 +454,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); + ireq->ir_loc_addr, th->source, th->dest, + sk_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 461a9ab540af..31149a0ac849 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -302,8 +302,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); long sysctl_tcp_mem[3] __read_mostly; EXPORT_IPV6_MOD(sysctl_tcp_mem); -atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */ -EXPORT_IPV6_MOD(tcp_memory_allocated); DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); @@ -1297,8 +1295,7 @@ new_segment: if (!copy) goto wait_for_space; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) { if (err == -EMSGSIZE) { tcp_mark_push(tp, skb); @@ -5053,9 +5050,8 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs); - CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32); /* TXRX read-mostly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset); @@ -5243,6 +5239,6 @@ void __init tcp_init(void) tcp_v4_init(); tcp_metrics_init(); BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); - tcp_tasklet_init(); + tcp_tsq_work_init(); mptcp_init(); } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 5107121c5e37..f1884f0c9e52 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -559,6 +559,7 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) void tcp_fastopen_active_disable_ofo_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct net_device *dev; struct dst_entry *dst; struct sk_buff *skb; @@ -576,7 +577,8 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) } else if (tp->syn_fastopen_ch && atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { dst = sk_dst_get(sk); - if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) + dev = dst ? dst_dev(dst) : NULL; + if (!(dev && (dev->flags & IFF_LOOPBACK))) atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); dst_release(dst); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 68bc79eb9019..9b03c44c12b8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1451,11 +1451,6 @@ static u8 tcp_sacktag_one(struct sock *sk, tp->sacked_out += pcount; /* Out-of-order packets delivered */ state->sack_delivered += pcount; - - /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (tp->lost_skb_hint && - before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) - tp->lost_cnt_hint += pcount; } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1496,9 +1491,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, tcp_skb_timestamp_us(skb)); tcp_rate_skb_delivered(sk, skb, state->rate); - if (skb == tp->lost_skb_hint) - tp->lost_cnt_hint += pcount; - TCP_SKB_CB(prev)->end_seq += shifted; TCP_SKB_CB(skb)->seq += shifted; @@ -1531,10 +1523,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = prev; - if (skb == tp->lost_skb_hint) { - tp->lost_skb_hint = prev; - tp->lost_cnt_hint -= tcp_skb_pcount(prev); - } TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor; @@ -2151,12 +2139,6 @@ static inline void tcp_init_undo(struct tcp_sock *tp) tp->undo_retrans = -1; } -static bool tcp_is_rack(const struct sock *sk) -{ - return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) & - TCP_RACK_LOSS_DETECTION; -} - /* If we detect SACK reneging, forget all SACK information * and reset tags completely, otherwise preserve SACKs. If receiver * dropped its ofo queue, we will know this due to reneging detection. @@ -2182,8 +2164,7 @@ static void tcp_timeout_mark_lost(struct sock *sk) skb_rbtree_walk_from(skb) { if (is_reneg) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; - else if (tcp_is_rack(sk) && skb != head && - tcp_rack_skb_timeout(tp, skb, 0) > 0) + else if (skb != head && tcp_rack_skb_timeout(tp, skb, 0) > 0) continue; /* Don't mark recently sent ones lost yet */ tcp_mark_skb_lost(sk, skb); } @@ -2264,22 +2245,6 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag) return false; } -/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs - * counter when SACK is enabled (without SACK, sacked_out is used for - * that purpose). - * - * With reordering, holes may still be in flight, so RFC3517 recovery - * uses pure sacked_out (total number of SACKed segments) even though - * it violates the RFC that uses duplicate ACKs, often these are equal - * but when e.g. out-of-window ACKs or packet duplication occurs, - * they differ. Since neither occurs due to loss, TCP should really - * ignore them. - */ -static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) -{ - return tp->sacked_out + 1; -} - /* Linux NewReno/SACK/ECN state machine. * -------------------------------------- * @@ -2332,13 +2297,7 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * * If the receiver supports SACK: * - * RFC6675/3517: It is the conventional algorithm. A packet is - * considered lost if the number of higher sequence packets - * SACKed is greater than or equal the DUPACK thoreshold - * (reordering). This is implemented in tcp_mark_head_lost and - * tcp_update_scoreboard. - * - * RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm + * RACK (RFC8985): RACK is a newer loss detection algorithm * (2017-) that checks timing instead of counting DUPACKs. * Essentially a packet is considered lost if it's not S/ACKed * after RTT + reordering_window, where both metrics are @@ -2353,8 +2312,8 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * is lost (NewReno). This heuristics are the same in NewReno * and SACK. * - * Really tricky (and requiring careful tuning) part of algorithm - * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue(). + * The really tricky (and requiring careful tuning) part of the algorithm + * is hidden in the RACK code in tcp_recovery.c and tcp_xmit_retransmit_queue(). * The first determines the moment _when_ we should reduce CWND and, * hence, slow down forward transmission. In fact, it determines the moment * when we decide that hole is caused by loss, rather than by a reorder. @@ -2377,83 +2336,10 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * Main question: may we further continue forward transmission * with the same cwnd? */ -static bool tcp_time_to_recover(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - - /* Trick#1: The loss is proven. */ - if (tp->lost_out) - return true; - - /* Not-A-Trick#2 : Classic rule... */ - if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering) - return true; - - return false; -} - -/* Detect loss in event "A" above by marking head of queue up as lost. - * For RFC3517 SACK, a segment is considered lost if it - * has at least tp->reordering SACKed seqments above it; "packets" refers to - * the maximum SACKed segments to pass before reaching this limit. - */ -static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - int cnt; - /* Use SACK to deduce losses of new sequences sent during recovery */ - const u32 loss_high = tp->snd_nxt; - - WARN_ON(packets > tp->packets_out); - skb = tp->lost_skb_hint; - if (skb) { - /* Head already handled? */ - if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una)) - return; - cnt = tp->lost_cnt_hint; - } else { - skb = tcp_rtx_queue_head(sk); - cnt = 0; - } - - skb_rbtree_walk_from(skb) { - /* TODO: do this better */ - /* this is not the most efficient way to do this... */ - tp->lost_skb_hint = skb; - tp->lost_cnt_hint = cnt; - - if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) - break; - - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) - cnt += tcp_skb_pcount(skb); - - if (cnt > packets) - break; - - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) - tcp_mark_skb_lost(sk, skb); - - if (mark_head) - break; - } - tcp_verify_left_out(tp); -} - -/* Account newly detected lost packet(s) */ - -static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) +static bool tcp_time_to_recover(const struct tcp_sock *tp) { - struct tcp_sock *tp = tcp_sk(sk); - - if (tcp_is_sack(tp)) { - int sacked_upto = tp->sacked_out - tp->reordering; - if (sacked_upto >= 0) - tcp_mark_head_lost(sk, sacked_upto, 0); - else if (fast_rexmit) - tcp_mark_head_lost(sk, 1, 1); - } + /* Has loss detection marked at least one packet lost? */ + return tp->lost_out != 0; } static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when) @@ -2894,8 +2780,6 @@ void tcp_simple_retransmit(struct sock *sk) tcp_mark_skb_lost(sk, skb); } - tcp_clear_retrans_hints_partial(tp); - if (!tp->lost_out) return; @@ -3003,17 +2887,8 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, *rexmit = REXMIT_LOST; } -static bool tcp_force_fast_retransmit(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - return after(tcp_highest_sack_seq(tp), - tp->snd_una + tp->reordering * tp->mss_cache); -} - /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una, - bool *do_lost) +static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); @@ -3038,9 +2913,6 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una, tcp_undo_cwnd_reduction(sk, true); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); tcp_try_keep_open(sk); - } else { - /* Partial ACK arrived. Force fast retransmit. */ - *do_lost = tcp_force_fast_retransmit(sk); } return false; } @@ -3054,7 +2926,7 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag) if (unlikely(tcp_is_reno(tp))) { tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED); - } else if (tcp_is_rack(sk)) { + } else { u32 prior_retrans = tp->retrans_out; if (tcp_rack_mark_lost(sk)) @@ -3081,10 +2953,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int fast_rexmit = 0, flag = *ack_flag; + int flag = *ack_flag; bool ece_ack = flag & FLAG_ECE; - bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) && - tcp_force_fast_retransmit(sk)); if (!tp->packets_out && tp->sacked_out) tp->sacked_out = 0; @@ -3133,7 +3003,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp)) tcp_add_reno_sack(sk, num_dupack, ece_ack); - } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost)) + } else if (tcp_try_undo_partial(sk, prior_snd_una)) return; if (tcp_try_undo_dsack(sk)) @@ -3141,7 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tcp_identify_packet_loss(sk, ack_flag); if (icsk->icsk_ca_state != TCP_CA_Recovery) { - if (!tcp_time_to_recover(sk, flag)) + if (!tcp_time_to_recover(tp)) return; /* Undo reverts the recovery state. If loss is evident, * starts a new recovery (e.g. reordering then loss); @@ -3170,7 +3040,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tcp_try_undo_dsack(sk); tcp_identify_packet_loss(sk, ack_flag); - if (!tcp_time_to_recover(sk, flag)) { + if (!tcp_time_to_recover(tp)) { tcp_try_to_open(sk, flag); return; } @@ -3188,11 +3058,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, /* Otherwise enter Recovery state */ tcp_enter_recovery(sk, ece_ack); - fast_rexmit = 1; } - if (!tcp_is_rack(sk) && do_lost) - tcp_update_scoreboard(sk, fast_rexmit); *rexmit = REXMIT_LOST; } @@ -3448,8 +3315,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, next = skb_rb_next(skb); if (unlikely(skb == tp->retransmit_skb_hint)) tp->retransmit_skb_hint = NULL; - if (unlikely(skb == tp->lost_skb_hint)) - tp->lost_skb_hint = NULL; tcp_highest_sack_replace(sk, skb, next); tcp_rtx_queue_unlink_and_free(skb, sk); } @@ -3507,14 +3372,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, if (flag & FLAG_RETRANS_DATA_ACKED) flag &= ~FLAG_ORIG_SACK_ACKED; } else { - int delta; - /* Non-retransmitted hole got filled? That's reordering */ if (before(reord, prior_fack)) tcp_check_sack_reordering(sk, reord, 0); - - delta = prior_sacked - tp->sacked_out; - tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, @@ -3854,7 +3714,7 @@ static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } /* This routine deals with acks during a TLP episode and ends an episode by - * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack + * resetting tlp_high_seq. Ref: TLP algorithm in RFC8985 */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { @@ -4985,8 +4845,9 @@ static void tcp_ofo_queue(struct sock *sk) if (before(TCP_SKB_CB(skb)->seq, dsack_high)) { __u32 dsack = dsack_high; + if (before(TCP_SKB_CB(skb)->end_seq, dsack_high)) - dsack_high = TCP_SKB_CB(skb)->end_seq; + dsack = TCP_SKB_CB(skb)->end_seq; tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } p = rb_next(p); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6a14f9e6fef6..a847d894ace3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -59,6 +59,7 @@ #include <linux/slab.h> #include <linux/sched.h> +#include <net/aligned_data.h> #include <net/net_namespace.h> #include <net/icmp.h> #include <net/inet_hashtables.h> @@ -787,7 +788,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); - net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) @@ -1703,7 +1704,6 @@ static struct dst_entry *tcp_v4_route_req(const struct sock *sk, struct request_sock_ops tcp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct tcp_request_sock), - .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v4_reqsk_send_ack, .destructor = tcp_v4_reqsk_destructor, .send_reset = tcp_v4_send_reset, @@ -2896,7 +2896,7 @@ static void get_openreq4(const struct request_sock *req, jiffies_delta_to_clock_t(delta), req->num_timeout, from_kuid_munged(seq_user_ns(f), - sock_i_uid(req->rsk_listener)), + sk_uid(req->rsk_listener)), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, @@ -2954,7 +2954,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), + from_kuid_munged(seq_user_ns(f), sk_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), refcount_read(&sk->sk_refcnt), sk, @@ -3246,9 +3246,9 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) const struct request_sock *req = v; uid = from_kuid_munged(seq_user_ns(seq), - sock_i_uid(req->rsk_listener)); + sk_uid(req->rsk_listener)); } else { - uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); + uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk)); } meta.seq = seq; @@ -3391,7 +3391,7 @@ struct proto tcp_prot = { .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, - .memory_allocated = &tcp_memory_allocated, + .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4251670e328c..03c068ea27b6 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -166,11 +166,11 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, unsigned int hash) { struct tcp_metrics_block *tm; - struct net *net; bool reclaim = false; + struct net *net; spin_lock_bh(&tcp_metrics_lock); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); /* While waiting for the spin-lock the cache might have been populated * with this entry and so we have to check again. @@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); @@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, else return NULL; - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43d7852ce07e..2994c9222c9c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -726,7 +726,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time) && - !inet_rtx_syn_ack(sk, req)) { + !tcp_rtx_synack(sk, req)) { unsigned long expires = jiffies; expires += reqsk_timeout(req, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3ac8d2d17e1f..b616776e3354 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1066,15 +1066,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb * needs to be reallocated in a driver. * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc * - * Since transmit from skb destructor is forbidden, we use a tasklet + * Since transmit from skb destructor is forbidden, we use a BH work item * to process all sockets that eventually need to send more skbs. - * We use one tasklet per cpu, with its own queue of sockets. + * We use one work item per cpu, with its own queue of sockets. */ -struct tsq_tasklet { - struct tasklet_struct tasklet; +struct tsq_work { + struct work_struct work; struct list_head head; /* queue of tcp sockets */ }; -static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); +static DEFINE_PER_CPU(struct tsq_work, tsq_work); static void tcp_tsq_write(struct sock *sk) { @@ -1104,14 +1104,14 @@ static void tcp_tsq_handler(struct sock *sk) bh_unlock_sock(sk); } /* - * One tasklet per cpu tries to send more skbs. - * We run in tasklet context but need to disable irqs when + * One work item per cpu tries to send more skbs. + * We run in BH context but need to disable irqs when * transferring tsq->head because tcp_wfree() might * interrupt us (non NAPI drivers) */ -static void tcp_tasklet_func(struct tasklet_struct *t) +static void tcp_tsq_workfn(struct work_struct *work) { - struct tsq_tasklet *tsq = from_tasklet(tsq, t, tasklet); + struct tsq_work *tsq = container_of(work, struct tsq_work, work); LIST_HEAD(list); unsigned long flags; struct list_head *q, *n; @@ -1181,15 +1181,15 @@ void tcp_release_cb(struct sock *sk) } EXPORT_IPV6_MOD(tcp_release_cb); -void __init tcp_tasklet_init(void) +void __init tcp_tsq_work_init(void) { int i; for_each_possible_cpu(i) { - struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i); + struct tsq_work *tsq = &per_cpu(tsq_work, i); INIT_LIST_HEAD(&tsq->head); - tasklet_setup(&tsq->tasklet, tcp_tasklet_func); + INIT_WORK(&tsq->work, tcp_tsq_workfn); } } @@ -1203,11 +1203,11 @@ void tcp_wfree(struct sk_buff *skb) struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nval, oval; - struct tsq_tasklet *tsq; + struct tsq_work *tsq; bool empty; /* Keep one reference on sk_wmem_alloc. - * Will be released by sk_free() from here or tcp_tasklet_func() + * Will be released by sk_free() from here or tcp_tsq_workfn() */ WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); @@ -1229,13 +1229,13 @@ void tcp_wfree(struct sk_buff *skb) nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED; } while (!try_cmpxchg(&sk->sk_tsq_flags, &oval, nval)); - /* queue this socket to tasklet queue */ + /* queue this socket to BH workqueue */ local_irq_save(flags); - tsq = this_cpu_ptr(&tsq_tasklet); + tsq = this_cpu_ptr(&tsq_work); empty = list_empty(&tsq->head); list_add(&tp->tsq_node, &tsq->head); if (empty) - tasklet_schedule(&tsq->tasklet); + queue_work(system_bh_wq, &tsq->work); local_irq_restore(flags); return; out: @@ -1554,11 +1554,6 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de if (tcp_is_reno(tp) && decr > 0) tp->sacked_out -= min_t(u32, tp->sacked_out, decr); - if (tp->lost_skb_hint && - before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) - tp->lost_cnt_hint -= decr; - tcp_verify_left_out(tp); } @@ -2639,7 +2634,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, if (refcount_read(&sk->sk_wmem_alloc) > limit) { /* Always send skb if rtx queue is empty or has one skb. * No need to wait for TX completion to call us back, - * after softirq/tasklet schedule. + * after softirq schedule. * This helps when TX completions are delayed too much. */ if (tcp_rtx_queue_empty_or_single_skb(sk)) @@ -3252,7 +3247,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor; /* changed transmit queue under us so clear hints */ - tcp_clear_retrans_hints_partial(tp); if (next_skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = skb; @@ -4431,6 +4425,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) tcp_sk_rw(sk)->total_retrans++; } trace_tcp_retransmit_synack(sk, req); + req->num_retrans++; } return res; } diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index bba10110fbbc..c52fd3254b6e 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -35,7 +35,7 @@ s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd) tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb)); } -/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01): +/* RACK loss detection (IETF RFC8985): * * Marks a packet lost, if some packet sent later has been (s)acked. * The underlying idea is similar to the traditional dupthresh and FACK diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index bb37e24b97a7..a207877270fb 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -478,7 +478,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) * regular retransmit because if the child socket has been accepted * it's not good to give up too easily. */ - inet_rtx_syn_ack(sk, req); + tcp_rtx_synack(sk, req); req->num_timeout++; tcp_update_rto_stats(sk); if (!tp->retrans_stamp) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dde52b8050b8..49f43c54cfb0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -127,8 +127,6 @@ struct udp_table udp_table __read_mostly; long sysctl_udp_mem[3] __read_mostly; EXPORT_IPV6_MOD(sysctl_udp_mem); -atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp; -EXPORT_IPV6_MOD(udp_memory_allocated); DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); @@ -145,8 +143,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, unsigned long *bitmap, struct sock *sk, unsigned int log) { + kuid_t uid = sk_uid(sk); struct sock *sk2; - kuid_t uid = sock_i_uid(sk); sk_for_each(sk2, &hslot->head) { if (net_eq(sock_net(sk2), net) && @@ -158,7 +156,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, inet_rcv_saddr_equal(sk, sk2, true)) { if (sk2->sk_reuseport && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && - uid_eq(uid, sock_i_uid(sk2))) { + uid_eq(uid, sk_uid(sk2))) { if (!bitmap) return 0; } else { @@ -180,8 +178,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, struct udp_hslot *hslot2, struct sock *sk) { + kuid_t uid = sk_uid(sk); struct sock *sk2; - kuid_t uid = sock_i_uid(sk); int res = 0; spin_lock(&hslot2->lock); @@ -195,7 +193,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, inet_rcv_saddr_equal(sk, sk2, true)) { if (sk2->sk_reuseport && sk->sk_reuseport && !rcu_access_pointer(sk->sk_reuseport_cb) && - uid_eq(uid, sock_i_uid(sk2))) { + uid_eq(uid, sk_uid(sk2))) { res = 0; } else { res = 1; @@ -210,7 +208,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) { struct net *net = sock_net(sk); - kuid_t uid = sock_i_uid(sk); + kuid_t uid = sk_uid(sk); struct sock *sk2; sk_for_each(sk2, &hslot->head) { @@ -220,7 +218,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) ipv6_only_sock(sk2) == ipv6_only_sock(sk) && (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) && (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && + sk2->sk_reuseport && uid_eq(uid, sk_uid(sk2)) && inet_rcv_saddr_equal(sk, sk2, false)) { return reuseport_add_sock(sk, sk2, inet_rcv_saddr_any(sk)); @@ -1445,7 +1443,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, ipc.tos & INET_DSCP_MASK, scope, sk->sk_protocol, flow_flags, faddr, saddr, - dport, inet->inet_sport, sk->sk_uid); + dport, inet->inet_sport, + sk_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); @@ -3234,7 +3233,7 @@ struct proto udp_prot = { #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif - .memory_allocated = &udp_memory_allocated, + .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, @@ -3386,7 +3385,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, sk_wmem_alloc_get(sp), udp_rqueue_get(sp), 0, 0L, 0, - from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + from_kuid_munged(seq_user_ns(f), sk_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); @@ -3629,7 +3628,7 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v) goto unlock; } - uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); + uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index e1ff3a375996..c7142213fc21 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _UDP4_IMPL_H #define _UDP4_IMPL_H +#include <net/aligned_data.h> #include <net/udp.h> #include <net/udplite.h> #include <net/protocol.h> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 85b5aa82d7d7..75c489edc438 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -44,7 +44,7 @@ struct udp_tunnel_type_entry { DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv); static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call); -static struct mutex udp_tunnel_gro_type_lock; +static DEFINE_MUTEX(udp_tunnel_gro_type_lock); static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES]; static unsigned int udp_tunnel_gro_type_nr; static DEFINE_SPINLOCK(udp_tunnel_gro_lock); @@ -144,11 +144,6 @@ out: } EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv); -static void udp_tunnel_gro_init(void) -{ - mutex_init(&udp_tunnel_gro_type_lock); -} - static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb) @@ -165,8 +160,6 @@ static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, #else -static void udp_tunnel_gro_init(void) {} - static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb) @@ -1000,6 +993,5 @@ int __init udpv4_offload_init(void) }, }; - udp_tunnel_gro_init(); return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP); } diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 2326548997d3..fce945f23069 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -134,15 +134,17 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) struct udp_tunnel_info ti; struct net_device *dev; + ASSERT_RTNL(); + ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { + udp_tunnel_nic_lock(dev); udp_tunnel_nic_add_port(dev, &ti); + udp_tunnel_nic_unlock(dev); } - rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port); @@ -154,22 +156,24 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) struct udp_tunnel_info ti; struct net_device *dev; + ASSERT_RTNL(); + ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { + udp_tunnel_nic_lock(dev); udp_tunnel_nic_del_port(dev, &ti); + udp_tunnel_nic_unlock(dev); } - rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port); void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck) + bool xnet, bool nocheck, u16 ipcb_flags) { struct udphdr *uh; @@ -185,7 +189,8 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb udp_set_csum(nocheck, skb, src, dst, skb->len); - iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); + iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet, + ipcb_flags); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index b6d2d16189c0..ff66db48453c 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -29,6 +29,7 @@ struct udp_tunnel_nic_table_entry { * struct udp_tunnel_nic - UDP tunnel port offload state * @work: async work for talking to hardware from process context * @dev: netdev pointer + * @lock: protects all fields * @need_sync: at least one port start changed * @need_replay: space was freed, we need a replay of all ports * @work_pending: @work is currently scheduled @@ -41,6 +42,8 @@ struct udp_tunnel_nic { struct net_device *dev; + struct mutex lock; + u8 need_sync:1; u8 need_replay:1; u8 work_pending:1; @@ -298,22 +301,11 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) static void udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) { - const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; - bool may_sleep; - if (!utn->need_sync) return; - /* Drivers which sleep in the callback need to update from - * the workqueue, if we come from the tunnel driver's notification. - */ - may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP; - if (!may_sleep) - __udp_tunnel_nic_device_sync(dev, utn); - if (may_sleep || utn->need_replay) { - queue_work(udp_tunnel_nic_workqueue, &utn->work); - utn->work_pending = 1; - } + queue_work(udp_tunnel_nic_workqueue, &utn->work); + utn->work_pending = 1; } static bool @@ -554,12 +546,12 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) struct udp_tunnel_nic *utn; unsigned int i, j; - ASSERT_RTNL(); - utn = dev->udp_tunnel_nic; if (!utn) return; + mutex_lock(&utn->lock); + utn->need_sync = false; for (i = 0; i < utn->n_tables; i++) for (j = 0; j < info->tables[i].n_entries; j++) { @@ -569,7 +561,7 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL | UDP_TUNNEL_NIC_ENTRY_OP_FAIL); - /* We don't release rtnl across ops */ + /* We don't release utn lock across ops */ WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN); if (!entry->use_cnt) continue; @@ -579,6 +571,8 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) } __udp_tunnel_nic_device_sync(dev, utn); + + mutex_unlock(&utn->lock); } static size_t @@ -643,6 +637,33 @@ err_cancel: return -EMSGSIZE; } +static void __udp_tunnel_nic_assert_locked(struct net_device *dev) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (utn) + lockdep_assert_held(&utn->lock); +} + +static void __udp_tunnel_nic_lock(struct net_device *dev) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (utn) + mutex_lock(&utn->lock); +} + +static void __udp_tunnel_nic_unlock(struct net_device *dev) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (utn) + mutex_unlock(&utn->lock); +} + static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { .get_port = __udp_tunnel_nic_get_port, .set_port_priv = __udp_tunnel_nic_set_port_priv, @@ -651,6 +672,9 @@ static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { .reset_ntf = __udp_tunnel_nic_reset_ntf, .dump_size = __udp_tunnel_nic_dump_size, .dump_write = __udp_tunnel_nic_dump_write, + .assert_locked = __udp_tunnel_nic_assert_locked, + .lock = __udp_tunnel_nic_lock, + .unlock = __udp_tunnel_nic_unlock, }; static void @@ -710,11 +734,15 @@ static void udp_tunnel_nic_device_sync_work(struct work_struct *work) container_of(work, struct udp_tunnel_nic, work); rtnl_lock(); + mutex_lock(&utn->lock); + utn->work_pending = 0; __udp_tunnel_nic_device_sync(utn->dev, utn); if (utn->need_replay) udp_tunnel_nic_replay(utn->dev, utn); + + mutex_unlock(&utn->lock); rtnl_unlock(); } @@ -730,6 +758,7 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, return NULL; utn->n_tables = n_tables; INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); + mutex_init(&utn->lock); for (i = 0; i < n_tables; i++) { utn->entries[i] = kcalloc(info->tables[i].n_entries, @@ -821,8 +850,11 @@ static int udp_tunnel_nic_register(struct net_device *dev) dev_hold(dev); dev->udp_tunnel_nic = utn; - if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) { + udp_tunnel_nic_lock(dev); udp_tunnel_get_rx_info(dev); + udp_tunnel_nic_unlock(dev); + } return 0; } @@ -832,6 +864,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) { const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + udp_tunnel_nic_lock(dev); + /* For a shared table remove this dev from the list of sharing devices * and if there are other devices just detach. */ @@ -841,8 +875,10 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) list_for_each_entry(node, &info->shared->devices, list) if (node->dev == dev) break; - if (list_entry_is_head(node, &info->shared->devices, list)) + if (list_entry_is_head(node, &info->shared->devices, list)) { + udp_tunnel_nic_unlock(dev); return; + } list_del(&node->list); kfree(node); @@ -852,6 +888,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) if (first) { udp_tunnel_drop_rx_info(dev); utn->dev = first->dev; + udp_tunnel_nic_unlock(dev); goto release_dev; } @@ -862,6 +899,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) * from the work which we will boot immediately. */ udp_tunnel_nic_flush(dev, utn); + udp_tunnel_nic_unlock(dev); /* Wait for the work to be done using the state, netdev core will * retry unregister until we give up our reference on this device. @@ -910,12 +948,16 @@ udp_tunnel_nic_netdevice_event(struct notifier_block *unused, return NOTIFY_DONE; if (event == NETDEV_UP) { + udp_tunnel_nic_lock(dev); WARN_ON(!udp_tunnel_nic_is_empty(dev, utn)); udp_tunnel_get_rx_info(dev); + udp_tunnel_nic_unlock(dev); return NOTIFY_OK; } if (event == NETDEV_GOING_DOWN) { + udp_tunnel_nic_lock(dev); udp_tunnel_nic_flush(dev, utn); + udp_tunnel_nic_unlock(dev); return NOTIFY_OK; } diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index af37af3ab727..d3e621a11a1a 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -60,7 +60,7 @@ struct proto udplite_prot = { .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, - .memory_allocated = &udp_memory_allocated, + .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 3cff51ba72bb..0ae67d537499 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -31,7 +31,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, skb->dev, skb_dst(skb)->dev, + net, sk, skb, skb->dev, skb_dst_dev(skb), __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 870a0bd6c2ba..c85b1db74b1a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2229,32 +2229,29 @@ errdad: in6_ifa_put(ifp); } -/* Join to solicited addr multicast group. - * caller must hold RTNL */ +/* Join to solicited addr multicast group. */ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (READ_ONCE(dev->flags) & (IFF_LOOPBACK | IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); } -/* caller must hold RTNL */ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; - if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (READ_ONCE(idev->dev->flags) & (IFF_LOOPBACK | IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); __ipv6_dev_mc_dec(idev, &maddr); } -/* caller must hold RTNL */ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; @@ -2267,7 +2264,6 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) __ipv6_dev_ac_inc(ifp->idev, &addr); } -/* caller must hold RTNL */ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; @@ -3208,7 +3204,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, } } -#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) +#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) static void add_v4_addrs(struct inet6_dev *idev) { struct in6_addr addr; @@ -3463,6 +3459,7 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN) && + (dev->type != ARPHRD_IP6GRE) && (dev->type != ARPHRD_TUNNEL) && (dev->type != ARPHRD_NONE) && (dev->type != ARPHRD_RAWIP)) { @@ -3518,7 +3515,7 @@ static void addrconf_sit_config(struct net_device *dev) } #endif -#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) +#if IS_ENABLED(CONFIG_NET_IPGRE) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; @@ -3534,7 +3531,7 @@ static void addrconf_gre_config(struct net_device *dev) * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this * case). Such devices fall back to add_v4_addrs() instead. */ - if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + if (!(*(__be32 *)dev->dev_addr == 0 && idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; @@ -3552,8 +3549,7 @@ static void addrconf_init_auto_addrs(struct net_device *dev) addrconf_sit_config(dev); break; #endif -#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) - case ARPHRD_IP6GRE: +#if IS_ENABLED(CONFIG_NET_IPGRE) case ARPHRD_IPGRE: addrconf_gre_config(dev); break; @@ -3860,7 +3856,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) * Do not dev_put! */ if (unregister) { - idev->dead = 1; + WRITE_ONCE(idev->dead, 1); /* protected by rtnl_lock */ RCU_INIT_POINTER(dev->ip6_ptr, NULL); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index fb63ffbcfc64..567efd626ab4 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -20,12 +20,6 @@ #include <linux/netlink.h> #include <linux/rtnetlink.h> -#if 0 -#define ADDRLABEL(x...) printk(x) -#else -#define ADDRLABEL(x...) do { ; } while (0) -#endif - /* * Policy Table */ @@ -150,8 +144,8 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", - __func__, addr, type, ifindex, label); + net_dbg_ratelimited("%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type, + ifindex, label); return label; } @@ -164,8 +158,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", - __func__, prefix, prefixlen, ifindex, (unsigned int)label); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__, + prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -207,8 +201,7 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, struct hlist_node *n; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, - replace); + net_dbg_ratelimited("%s(newp=%p, replace=%d)\n", __func__, newp, replace); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && @@ -247,9 +240,8 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __func__, prefix, prefixlen, ifindex, (unsigned int)label, - replace); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) @@ -271,8 +263,8 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", - __func__, prefix, prefixlen, ifindex); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, + prefixlen, ifindex); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && @@ -294,8 +286,8 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", - __func__, prefix, prefixlen, ifindex); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, + prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&net->ipv6.ip6addrlbl_table.lock); @@ -312,8 +304,6 @@ static int __net_init ip6addrlbl_net_init(struct net *net) int err; int i; - ADDRLABEL(KERN_DEBUG "%s\n", __func__); - spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index acaff1296783..1992621e3f3f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -842,7 +842,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 21e01695b48c..53cf68e0242b 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -47,6 +47,9 @@ static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(acaddr_hash_lock); +#define ac_dereference(a, idev) \ + rcu_dereference_protected(a, lockdep_is_held(&(idev)->lock)) + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); static u32 inet6_acaddr_hash(const struct net *net, @@ -64,14 +67,11 @@ static u32 inet6_acaddr_hash(const struct net *net, int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_ac_socklist *pac = NULL; + struct net *net = sock_net(sk); struct net_device *dev = NULL; struct inet6_dev *idev; - struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int ishost = !net->ipv6.devconf_all->forwarding; - int err = 0; - - ASSERT_RTNL(); + int err = 0, ishost; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -79,32 +79,43 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; if (ifindex) - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); - if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) - return -EINVAL; + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) { + err = -EINVAL; + goto error; + } pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); - if (!pac) - return -ENOMEM; + if (!pac) { + err = -ENOMEM; + goto error; + } + pac->acl_next = NULL; pac->acl_addr = *addr; + ishost = !READ_ONCE(net->ipv6.devconf_all->forwarding); + if (ifindex == 0) { struct rt6_info *rt; + rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } else if (ishost) { + rcu_read_unlock(); err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ - dev = __dev_get_by_flags(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = dev_get_by_flags_rcu(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } + rcu_read_unlock(); } if (!dev) { @@ -112,7 +123,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; @@ -120,8 +131,9 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = -EADDRNOTAVAIL; goto error; } + /* reset ishost, now that we have a specific device */ - ishost = !idev->cnf.forwarding; + ishost = !READ_ONCE(idev->cnf.forwarding); pac->acl_ifindex = dev->ifindex; @@ -134,7 +146,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto error; + goto error_idev; } err = __ipv6_dev_ac_inc(idev, addr); @@ -144,7 +156,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac = NULL; } +error_idev: + in6_dev_put(idev); error: + dev_put(dev); + if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -155,12 +171,10 @@ error: */ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; + struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); - - ASSERT_RTNL(); + struct net_device *dev; prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { @@ -176,9 +190,11 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - dev = __dev_get_by_index(net, pac->acl_ifindex); - if (dev) + dev = dev_get_by_index(net, pac->acl_ifindex); + if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); + dev_put(dev); + } sock_kfree_s(sk, pac, sizeof(*pac)); return 0; @@ -187,21 +203,20 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int prev_index; + int prev_index = 0; - ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - prev_index = 0; while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - dev = __dev_get_by_index(net, pac->acl_ifindex); + dev_put(dev); + dev = dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -209,6 +224,8 @@ void __ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } + + dev_put(dev); } void ipv6_sock_ac_close(struct sock *sk) @@ -217,9 +234,8 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - rtnl_lock(); + __ipv6_sock_ac_close(sk); - rtnl_unlock(); } static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) @@ -319,16 +335,14 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) struct net *net; int err; - ASSERT_RTNL(); - write_lock_bh(&idev->lock); if (idev->dead) { err = -ENODEV; goto out; } - for (aca = rtnl_dereference(idev->ac_list); aca; - aca = rtnl_dereference(aca->aca_next)) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; @@ -380,12 +394,10 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca, *prev_aca; - ASSERT_RTNL(); - write_lock_bh(&idev->lock); prev_aca = NULL; - for (aca = rtnl_dereference(idev->ac_list); aca; - aca = rtnl_dereference(aca->aca_next)) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; @@ -414,14 +426,18 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } -/* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { - struct inet6_dev *idev = __in6_dev_get(dev); + struct inet6_dev *idev = in6_dev_get(dev); + int err; if (!idev) return -ENODEV; - return __ipv6_dev_ac_dec(idev, addr); + + err = __ipv6_dev_ac_dec(idev, addr); + in6_dev_put(idev); + + return err; } void ipv6_ac_destroy_dev(struct inet6_dev *idev) @@ -429,7 +445,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) struct ifacaddr6 *aca; write_lock_bh(&idev->lock); - while ((aca = rtnl_dereference(idev->ac_list)) != NULL) { + while ((aca = ac_dereference(idev->ac_list, idev)) != NULL) { rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index a247bb93908b..df1986973430 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -32,7 +32,7 @@ #include <linux/unaligned.h> #include <linux/crc-ccitt.h> -/* Maximium size of the calipso option including +/* Maximum size of the calipso option including * the two-byte TLV header. */ #define CALIPSO_OPT_LEN_MAX (2 + 252) @@ -42,13 +42,13 @@ */ #define CALIPSO_HDR_LEN (2 + 8) -/* Maximium size of the calipso option including +/* Maximum size of the calipso option including * the two-byte TLV header and upto 3 bytes of * leading pad and 7 bytes of trailing pad. */ #define CALIPSO_OPT_LEN_MAX_WITH_PAD (3 + CALIPSO_OPT_LEN_MAX + 7) - /* Maximium size of u32 aligned buffer required to hold calipso + /* Maximum size of u32 aligned buffer required to hold calipso * option. Max of 3 initial pad bytes starting from buffer + 3. * i.e. the worst case is when the previous tlv finishes on 4n + 3. */ diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fff78496803d..972bf0426d59 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -53,7 +53,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); if (!oif) oif = np->sticky_pktinfo.ipi6_ifindex; @@ -127,7 +127,7 @@ void ip6_datagram_release_cb(struct sock *sk) rcu_read_lock(); dst = __sk_dst_get(sk); - if (!dst || !dst->obsolete || + if (!dst || !READ_ONCE(dst->obsolete) || dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) { rcu_read_unlock(); return; @@ -1064,7 +1064,7 @@ void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, sk_wmem_alloc_get(sp), rqueue, 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 457de0745a33..d1ef9644f826 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -306,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - __IP6_INC_STATS(dev_net(dst->dev), idev, + __IP6_INC_STATS(dev_net(dst_dev(dst)), idev, IPSTATS_MIB_INHDRERRORS); fail_and_free: kfree_skb(skb); @@ -460,7 +460,7 @@ looped_back: return -1; } - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, @@ -621,7 +621,7 @@ looped_back: return -1; } - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, @@ -783,7 +783,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { + if (!ipv6_chk_home_addr(skb_dst_dev_net(skb), addr)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -809,7 +809,7 @@ looped_back: return -1; } - if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { + if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3fd19a84b358..44550957fd4e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -196,6 +196,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi6 *fl6, bool apply_ratelimit) { struct net *net = sock_net(sk); + struct net_device *dev; struct dst_entry *dst; bool res = false; @@ -208,10 +209,11 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(net, sk, fl6); + dev = dst_dev(dst); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); - } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { + } else if (dev && (dev->flags & IFF_LOOPBACK)) { res = true; } else { struct rt6_info *rt = dst_rt6_info(dst); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 7d574f5132e2..7bb9edc5c28c 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = orig_dst->dev->ifindex; + fl6.flowi6_oif = dst_dev(orig_dst)->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst), &ip6h->daddr); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8f500eaf33cf..333e43434dd7 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -45,7 +45,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); @@ -79,7 +79,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index a84d332f952f..9553a3200081 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -696,6 +696,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_schema *sc, u8 sclen, bool is_input) { + struct net_device *dev = skb_dst_dev(skb); struct timespec64 ts; ktime_t tstamp; u64 raw64; @@ -712,7 +713,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (is_input) byte--; - raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; + raw32 = dev_net(dev)->ipv6.sysctl.ioam6_id; *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); data += sizeof(__be32); @@ -728,10 +729,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + if (dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); + raw16 = (__force u16)READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -783,10 +784,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct Qdisc *qdisc; __u32 qlen, backlog; - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (dev->flags & IFF_LOOPBACK) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { - queue = skb_get_tx_queue(skb_dst(skb)->dev, skb); + queue = skb_get_tx_queue(dev, skb); qdisc = rcu_dereference(queue->qdisc); qdisc_qstats_qlen_backlog(qdisc, &qlen, &backlog); @@ -807,7 +808,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (is_input) byte--; - raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; + raw64 = dev_net(dev)->ipv6.sysctl.ioam6_id_wide; *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); data += sizeof(__be64); @@ -823,10 +824,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + if (dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); + raw32 = READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 40df8bdfaacd..1fe7894f14dd 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -335,7 +335,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, if (has_tunsrc) memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc)); else - ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, + ipv6_dev_get_saddr(net, dst_dev(dst), &hdr->daddr, IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); skb_postpush_rcsum(skb, hdr, len); @@ -442,7 +442,7 @@ do_encap: dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); local_bh_enable(); - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 93578b2ec35f..7272d7e0fc36 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -963,8 +963,7 @@ insert_above: } static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, - const struct fib6_info *match, - const struct fib6_table *table) + const struct fib6_info *match) { int cpu; @@ -999,21 +998,15 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, rcu_read_unlock(); } -struct fib6_nh_pcpu_arg { - struct fib6_info *from; - const struct fib6_table *table; -}; - static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg) { - struct fib6_nh_pcpu_arg *arg = _arg; + struct fib6_info *arg = _arg; - __fib6_drop_pcpu_from(nh, arg->from, arg->table); + __fib6_drop_pcpu_from(nh, arg); return 0; } -static void fib6_drop_pcpu_from(struct fib6_info *f6i, - const struct fib6_table *table) +static void fib6_drop_pcpu_from(struct fib6_info *f6i) { /* Make sure rt6_make_pcpu_route() wont add other percpu routes * while we are cleaning them here. @@ -1022,19 +1015,14 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */ if (f6i->nh) { - struct fib6_nh_pcpu_arg arg = { - .from = f6i, - .table = table - }; - rcu_read_lock(); - nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, &arg); + nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, f6i); rcu_read_unlock(); } else { struct fib6_nh *fib6_nh; fib6_nh = f6i->fib6_nh; - __fib6_drop_pcpu_from(fib6_nh, f6i, table); + __fib6_drop_pcpu_from(fib6_nh, f6i); } } @@ -1045,7 +1033,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, /* Flush all cached dst in exception table */ rt6_flush_exceptions(rt); - fib6_drop_pcpu_from(rt, table); + fib6_drop_pcpu_from(rt); if (rt->nh) { spin_lock(&rt->nh->lock); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 2dc9dcffe2ca..a1210fd6404e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1085,9 +1085,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ - if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); - + if (!t->parms.collect_md && dst) { + mtu = READ_ONCE(dst_dev(dst)->mtu); + if (dst_mtu(dst) > mtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); + } err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, NEXTHDR_GRE); if (err != 0) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 39da6a7ce5f1..168ec07e31cc 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -187,7 +187,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, * arrived via the sending interface (ethX), because of the * nature of scoping architecture. --yoshfuji */ - IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex; + IP6CB(skb)->iif = skb_valid_dst(skb) ? + ip6_dst_idev(skb_dst(skb))->dev->ifindex : + dev->ifindex; if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; @@ -476,6 +478,13 @@ discard: static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INDISCARDS); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); + return 0; + } + skb_clear_delivery_time(skb); ip6_protocol_deliver_rcu(net, skb, 0, false); @@ -499,38 +508,32 @@ EXPORT_SYMBOL_GPL(ip6_input); int ip6_mc_input(struct sk_buff *skb) { + struct net_device *dev = skb->dev; int sdif = inet6_sdif(skb); const struct ipv6hdr *hdr; - struct net_device *dev; bool deliver; - __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), - __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST, - skb->len); + __IP6_UPD_PO_STATS(skb_dst_dev_net_rcu(skb), + __in6_dev_get_safely(dev), IPSTATS_MIB_INMCAST, + skb->len); /* skb->dev passed may be master dev for vrfs. */ if (sdif) { - rcu_read_lock(); - dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif); + dev = dev_get_by_index_rcu(dev_net_rcu(dev), sdif); if (!dev) { - rcu_read_unlock(); kfree_skb(skb); return -ENODEV; } - } else { - dev = skb->dev; } hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL); - if (sdif) - rcu_read_unlock(); #ifdef CONFIG_IPV6_MROUTE /* * IPv6 multicast router mode is now supported ;) */ - if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && + if (atomic_read(&dev_net_rcu(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { @@ -571,22 +574,21 @@ int ip6_mc_input(struct sk_buff *skb) /* unknown RA - process it normally */ } - if (deliver) + if (deliver) { skb2 = skb_clone(skb, GFP_ATOMIC); - else { + } else { skb2 = skb; skb = NULL; } - if (skb2) { + if (skb2) ip6_mr_input(skb2); - } } out: #endif - if (likely(deliver)) + if (likely(deliver)) { ip6_input(skb); - else { + } else { /* discard */ kfree_skb(skb); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7bd29a9ff0db..fcc20c7250eb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,7 +60,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; @@ -232,8 +232,9 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst_dev(dst), *indev = skb->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -271,7 +272,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); @@ -503,7 +504,8 @@ int ip6_forward(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); - struct net *net = dev_net(dst->dev); + struct net *net = dev_net(dst_dev(dst)); + struct net_device *dev; struct inet6_dev *idev; SKB_DR(reason); u32 mtu; @@ -591,12 +593,12 @@ int ip6_forward(struct sk_buff *skb) goto drop; } dst = skb_dst(skb); - + dev = dst_dev(dst); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - if (IP6CB(skb)->iif == dst->dev->ifindex && + if (IP6CB(skb)->iif == dev->ifindex && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct inet_peer *peer; @@ -644,7 +646,7 @@ int ip6_forward(struct sk_buff *skb) if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ - skb->dev = dst->dev; + skb->dev = dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), @@ -653,7 +655,7 @@ int ip6_forward(struct sk_buff *skb) return -EMSGSIZE; } - if (skb_cow(skb, dst->dev->hard_header_len)) { + if (skb_cow(skb, dev->hard_header_len)) { __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; @@ -666,7 +668,7 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, dst->dev, + net, NULL, skb, skb->dev, dev, ip6_forward_finish); error: @@ -1093,7 +1095,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst_dev(dst)->ifindex)) { dst_release(dst); dst = NULL; } @@ -1760,8 +1762,7 @@ alloc_new_skb: if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) goto error; copy = err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 894d3158a6f0..3262e81223df 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -632,7 +632,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else { if (ip_route_input(skb2, eiph->daddr, eiph->saddr, ip4h_dscp(eiph), skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) + skb_dst_dev(skb2)->type != ARPHRD_TUNNEL6) goto out; } @@ -1179,7 +1179,7 @@ route_lookup: ndst = dst; } - tdev = dst->dev; + tdev = dst_dev(dst); if (tdev == dev) { DEV_STATS_INC(dev, collisions); @@ -1255,7 +1255,7 @@ route_lookup: /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. */ - max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; if (max_headroom > READ_ONCE(dev->needed_headroom)) WRITE_ONCE(dev->needed_headroom, max_headroom); @@ -1278,7 +1278,7 @@ route_lookup: ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - ip6tunnel_xmit(NULL, skb, dev); + ip6tunnel_xmit(NULL, skb, dev, 0); return 0; tx_err_link_failure: DEV_STATS_INC(dev, tx_carrier_errors); @@ -1562,11 +1562,22 @@ static void ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) netdev_state_change(t->dev); } -static void ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p, + bool strict) { - /* for default tnl0 device allow to change only the proto */ + /* For the default ip6tnl0 device, allow changing only the protocol + * (the IP6_TNL_F_CAP_PER_PACKET flag is set on ip6tnl0, and all other + * parameters are 0). + */ + if (strict && + (!ipv6_addr_any(&p->laddr) || !ipv6_addr_any(&p->raddr) || + p->flags != t->parms.flags || p->hop_limit || p->encap_limit || + p->flowinfo || p->link || p->fwmark || p->collect_md)) + return -EINVAL; + t->parms.proto = p->proto; netdev_state_change(t->dev); + return 0; } static void @@ -1680,7 +1691,7 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, } else t = netdev_priv(dev); if (dev == ip6n->fb_tnl_dev) - ip6_tnl0_update(t, &p1); + ip6_tnl0_update(t, &p1, false); else ip6_tnl_update(t, &p1); } @@ -2053,8 +2064,28 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip_tunnel_encap ipencap; - if (dev == ip6n->fb_tnl_dev) - return -EINVAL; + if (dev == ip6n->fb_tnl_dev) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { + /* iproute2 always sets TUNNEL_ENCAP_FLAG_CSUM6, so + * let's ignore this flag. + */ + ipencap.flags &= ~TUNNEL_ENCAP_FLAG_CSUM6; + if (memchr_inv(&ipencap, 0, sizeof(ipencap))) { + NL_SET_ERR_MSG(extack, + "Only protocol can be changed for fallback tunnel, not encap params"); + return -EINVAL; + } + } + + ip6_tnl_netlink_parms(data, &p); + if (ip6_tnl0_update(t, &p, true) < 0) { + NL_SET_ERR_MSG(extack, + "Only protocol can be changed for fallback tunnel"); + return -EINVAL; + } + + return 0; + } if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(t, &ipencap); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index c99053189ea8..0ff547a4bff7 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -74,13 +74,14 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, - struct net_device *dev, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be32 label, - __be16 src_port, __be16 dst_port, bool nocheck) +void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck, + u16 ip6cb_flags) { struct udphdr *uh; struct ipv6hdr *ip6h; @@ -108,8 +109,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - ip6tunnel_xmit(sk, skb, dev); - return 0; + ip6tunnel_xmit(sk, skb, dev, ip6cb_flags); } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); @@ -168,7 +168,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); return ERR_PTR(-ENETUNREACH); } - if (dst->dev == dev) { /* is this necessary? */ + if (dst_dev(dst) == dev) { /* is this necessary? */ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); dst_release(dst); return ERR_PTR(-ELOOP); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 40464a88bca6..ad5290be4dd6 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -497,7 +497,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) (const struct in6_addr *)&x->id.daddr)) goto tx_err_link_failure; - tdev = dst->dev; + tdev = dst_dev(dst); if (tdev == dev) { DEV_STATS_INC(dev, collisions); @@ -529,7 +529,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) xmit: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; + skb->dev = dst_dev(dst); err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9db31e5b998c..e047a4680ab0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2035,8 +2035,8 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct * Processing handlers for ip6mr_forward */ -static int ip6mr_forward2(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, int vifi) +static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) { struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *vif_dev; @@ -2046,7 +2046,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, vif_dev = vif_dev_read(vif); if (!vif_dev) - goto out_free; + return -1; #ifdef CONFIG_IPV6_PIMSM_V2 if (vif->flags & MIFF_REGISTER) { @@ -2055,7 +2055,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); DEV_STATS_INC(vif_dev, tx_packets); ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); - goto out_free; + return -1; } #endif @@ -2069,7 +2069,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); - goto out_free; + return -1; } skb_dst_drop(skb); @@ -2093,20 +2093,43 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, /* We are about to write */ /* XXX: extension headers? */ if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) - goto out_free; + return -1; ipv6h = ipv6_hdr(skb); ipv6h->hop_limit--; + return 0; +} + +static void ip6mr_forward2(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) +{ + struct net_device *indev = skb->dev; + + if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) + goto out_free; IP6CB(skb)->flags |= IP6SKB_FORWARDED; - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, vif_dev, - ip6mr_forward2_finish); + NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, + net, NULL, skb, indev, skb->dev, + ip6mr_forward2_finish); + return; + +out_free: + kfree_skb(skb); +} + +static void ip6mr_output2(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) +{ + if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) + goto out_free; + + ip6_output(net, NULL, skb); + return; out_free: kfree_skb(skb); - return 0; } /* Called with rcu_read_lock() */ @@ -2221,6 +2244,56 @@ dont_forward: kfree_skb(skb); } +/* Called under rcu_read_lock() */ +static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt, + struct net_device *dev, struct sk_buff *skb, + struct mfc6_cache *c) +{ + int psend = -1; + int ct; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + atomic_long_inc(&c->_c.mfc_un.res.pkt); + atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); + WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); + + /* Forward the frame */ + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp)) { + if (ipv6_hdr(skb)->hop_limit > + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = c->_c.mfc_parent; + goto last_forward; + } + goto dont_forward; + } + for (ct = c->_c.mfc_un.res.maxvif - 1; + ct >= c->_c.mfc_un.res.minvif; ct--) { + if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { + if (psend != -1) { + struct sk_buff *skb2; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) + ip6mr_output2(net, mrt, skb2, psend); + } + psend = ct; + } + } +last_forward: + if (psend != -1) { + ip6mr_output2(net, mrt, skb, psend); + return; + } + +dont_forward: + kfree_skb(skb); +} /* * Multicast packets for forwarding arrive here @@ -2228,21 +2301,20 @@ dont_forward: int ip6_mr_input(struct sk_buff *skb) { + struct net_device *dev = skb->dev; + struct net *net = dev_net_rcu(dev); struct mfc6_cache *cache; - struct net *net = dev_net(skb->dev); struct mr_table *mrt; struct flowi6 fl6 = { - .flowi6_iif = skb->dev->ifindex, + .flowi6_iif = dev->ifindex, .flowi6_mark = skb->mark, }; int err; - struct net_device *dev; /* skb->dev passed in is the master dev for vrfs. * Get the proper interface that does have a vif associated with it. */ - dev = skb->dev; - if (netif_is_l3_master(skb->dev)) { + if (netif_is_l3_master(dev)) { dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) { kfree_skb(skb); @@ -2288,6 +2360,61 @@ int ip6_mr_input(struct sk_buff *skb) return 0; } +int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + struct flowi6 fl6 = (struct flowi6) { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_mark = skb->mark, + }; + struct mfc6_cache *cache; + struct mr_table *mrt; + int err; + int vif; + + guard(rcu)(); + + if (IP6CB(skb)->flags & IP6SKB_FORWARDED) + goto ip6_output; + if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE)) + goto ip6_output; + + err = ip6mr_fib_lookup(net, &fl6, &mrt); + if (err < 0) { + kfree_skb(skb); + return err; + } + + cache = ip6mr_cache_find(mrt, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (!cache) { + vif = ip6mr_find_vif(mrt, dev); + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } + + /* No usable cache entry */ + if (!cache) { + vif = ip6mr_find_vif(mrt, dev); + if (vif >= 0) + return ip6mr_cache_unresolved(mrt, vif, skb, dev); + goto ip6_output; + } + + /* Wrong interface */ + vif = cache->_c.mfc_parent; + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) + goto ip6_output; + + ip6_mr_output_finish(net, mrt, dev, skb, cache); + return 0; + +ip6_output: + return ip6_output(net, sk, skb); +} + int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, u32 portid) { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1e225e6489ea..e66ec623972e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -117,26 +117,6 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, return opt; } -static bool setsockopt_needs_rtnl(int optname) -{ - switch (optname) { - case IPV6_ADDRFORM: - case IPV6_ADD_MEMBERSHIP: - case IPV6_DROP_MEMBERSHIP: - case IPV6_JOIN_ANYCAST: - case IPV6_LEAVE_ANYCAST: - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - case MCAST_MSFILTER: - return true; - } - return false; -} - static int copy_group_source_from_sockptr(struct group_source_req *greqs, sockptr_t optval, int optlen) { @@ -395,9 +375,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); - int val, valbool; int retv = -ENOPROTOOPT; - bool needs_rtnl = setsockopt_needs_rtnl(optname); + int val, valbool; if (sockptr_is_null(optval)) val = 0; @@ -562,8 +541,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return 0; } } - if (needs_rtnl) - rtnl_lock(); + sockopt_lock_sock(sk); /* Another thread has converted the socket into IPv4 with @@ -969,8 +947,6 @@ done: unlock: sockopt_release_sock(sk); - if (needs_rtnl) - rtnl_unlock(); return retv; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 65831b4fee1f..6c875721d423 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -108,9 +108,9 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; -/* - * socket join on multicast group - */ +#define mc_assert_locked(idev) \ + lockdep_assert_held(&(idev)->mc_lock) + #define mc_dereference(e, idev) \ rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock)) @@ -169,17 +169,18 @@ static int unsolicited_report_interval(struct inet6_dev *idev) return iv > 0 ? iv : 1; } +/* + * socket join on multicast group + */ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr, unsigned int mode) { - struct net_device *dev = NULL; - struct ipv6_mc_socklist *mc_lst; struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); + struct net_device *dev = NULL; int err; - ASSERT_RTNL(); - if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -199,13 +200,18 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, if (ifindex == 0) { struct rt6_info *rt; + + rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } - } else - dev = __dev_get_by_index(net, ifindex); + rcu_read_unlock(); + } else { + dev = dev_get_by_index(net, ifindex); + } if (!dev) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -216,12 +222,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, mc_lst->sfmode = mode; RCU_INIT_POINTER(mc_lst->sflist, NULL); - /* - * now add/increase the group membership on the device - */ - + /* now add/increase the group membership on the device */ err = __ipv6_dev_mc_inc(dev, addr, mode); + dev_put(dev); + if (err) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; @@ -248,14 +253,36 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex, /* * socket leave on multicast group */ +static void __ipv6_sock_mc_drop(struct sock *sk, struct ipv6_mc_socklist *mc_lst) +{ + struct net *net = sock_net(sk); + struct net_device *dev; + + dev = dev_get_by_index(net, mc_lst->ifindex); + if (dev) { + struct inet6_dev *idev = in6_dev_get(dev); + + ip6_mc_leave_src(sk, mc_lst, idev); + + if (idev) { + __ipv6_dev_mc_dec(idev, &mc_lst->addr); + in6_dev_put(idev); + } + + dev_put(dev); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } + + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + kfree_rcu(mc_lst, rcu); +} + int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst; struct ipv6_mc_socklist __rcu **lnk; - struct net *net = sock_net(sk); - - ASSERT_RTNL(); + struct ipv6_mc_socklist *mc_lst; if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -265,23 +292,8 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - struct net_device *dev; - *lnk = mc_lst->next; - - dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev) { - struct inet6_dev *idev = __in6_dev_get(dev); - - ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) - __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else { - ip6_mc_leave_src(sk, mc_lst, NULL); - } - - atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); - kfree_rcu(mc_lst, rcu); + __ipv6_sock_mc_drop(sk, mc_lst); return 0; } } @@ -290,31 +302,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } EXPORT_SYMBOL(ipv6_sock_mc_drop); -static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net, - const struct in6_addr *group, - int ifindex) +static struct inet6_dev *ip6_mc_find_dev(struct net *net, + const struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; - struct inet6_dev *idev = NULL; + struct inet6_dev *idev; if (ifindex == 0) { - struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0); + struct rt6_info *rt; + rcu_read_lock(); + rt = rt6_lookup(net, group, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } + rcu_read_unlock(); } else { - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); } - if (!dev) return NULL; - idev = __in6_dev_get(dev); + + idev = in6_dev_get(dev); + dev_put(dev); + if (!idev) return NULL; - if (idev->dead) - return NULL; + return idev; } @@ -322,28 +339,10 @@ void __ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; - struct net *net = sock_net(sk); - - ASSERT_RTNL(); while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) { - struct net_device *dev; - np->ipv6_mc_list = mc_lst->next; - - dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev) { - struct inet6_dev *idev = __in6_dev_get(dev); - - ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) - __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else { - ip6_mc_leave_src(sk, mc_lst, NULL); - } - - atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); - kfree_rcu(mc_lst, rcu); + __ipv6_sock_mc_drop(sk, mc_lst); } } @@ -354,24 +353,22 @@ void ipv6_sock_mc_close(struct sock *sk) if (!rcu_access_pointer(np->ipv6_mc_list)) return; - rtnl_lock(); lock_sock(sk); __ipv6_sock_mc_close(sk); release_sock(sk); - rtnl_unlock(); } int ip6_mc_source(int add, int omode, struct sock *sk, - struct group_source_req *pgsr) + struct group_source_req *pgsr) { + struct ipv6_pinfo *inet6 = inet6_sk(sk); struct in6_addr *source, *group; + struct net *net = sock_net(sk); struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; - struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; - struct net *net = sock_net(sk); - int i, j, rv; + struct inet6_dev *idev; int leavegroup = 0; + int i, j, rv; int err; source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; @@ -380,13 +377,19 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface); + idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); if (!idev) return -ENODEV; + mutex_lock(&idev->mc_lock); + + if (idev->dead) { + err = -ENODEV; + goto done; + } + err = -EADDRNOTAVAIL; - mutex_lock(&idev->mc_lock); for_each_pmc_socklock(inet6, sk, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; @@ -483,6 +486,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, ip6_mc_add_src(idev, group, omode, 1, source, 1); done: mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); if (leavegroup) err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -491,12 +495,12 @@ done: int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage *list) { - const struct in6_addr *group; - struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; struct net *net = sock_net(sk); + const struct in6_addr *group; + struct ipv6_mc_socklist *pmc; + struct inet6_dev *idev; int leavegroup = 0; int i, err; @@ -508,10 +512,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + idev = ip6_mc_find_dev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; + mutex_lock(&idev->mc_lock); + + if (idev->dead) { + err = -ENODEV; + goto done; + } + err = 0; if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { @@ -544,24 +555,19 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } - mutex_lock(&idev->mc_lock); + err = ip6_mc_add_src(idev, group, gsf->gf_fmode, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { - mutex_unlock(&idev->mc_lock); sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, newpsl->sl_max)); goto done; } - mutex_unlock(&idev->mc_lock); } else { newpsl = NULL; - mutex_lock(&idev->mc_lock); ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); - mutex_unlock(&idev->mc_lock); } - mutex_lock(&idev->mc_lock); psl = sock_dereference(pmc->sflist, sk); if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, @@ -571,12 +577,14 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); } + rcu_assign_pointer(pmc->sflist, newpsl); - mutex_unlock(&idev->mc_lock); kfree_rcu(psl, rcu); pmc->sfmode = gsf->gf_fmode; err = 0; done: + mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -597,10 +605,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - /* changes to the ipv6_mc_list require the socket lock and - * rtnl lock. We have the socket lock, so reading the list is safe. - */ - for_each_pmc_socklock(inet6, sk, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; @@ -668,12 +672,13 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, return rv; } -/* called with mc_lock */ static void igmp6_group_added(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + mc_assert_locked(mc->idev); + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; @@ -703,12 +708,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) mld_ifc_event(mc->idev); } -/* called with mc_lock */ static void igmp6_group_dropped(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + mc_assert_locked(mc->idev); + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; @@ -729,14 +735,13 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) refcount_dec(&mc->mca_refcnt); } -/* - * deleted ifmcaddr6 manipulation - * called with mc_lock - */ +/* deleted ifmcaddr6 manipulation */ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ifmcaddr6 *pmc; + mc_assert_locked(idev); + /* this is an "ifmcaddr6" for convenience; only the fields below * are actually used. In particular, the refcnt and users are not * used for management of the delete list. Using the same structure @@ -770,13 +775,14 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) rcu_assign_pointer(idev->mc_tomb, pmc); } -/* called with mc_lock */ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ip6_sf_list *psf, *sources, *tomb; struct in6_addr *pmca = &im->mca_addr; struct ifmcaddr6 *pmc, *pmc_prev; + mc_assert_locked(idev); + pmc_prev = NULL; for_each_mc_tomb(idev, pmc) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) @@ -813,11 +819,12 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) } } -/* called with mc_lock */ static void mld_clear_delrec(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *nextpmc; + mc_assert_locked(idev); + pmc = mc_dereference(idev->mc_tomb, idev); RCU_INIT_POINTER(idev->mc_tomb, NULL); @@ -861,11 +868,6 @@ static void mld_clear_report(struct inet6_dev *idev) spin_unlock_bh(&idev->mc_report_lock); } -static void mca_get(struct ifmcaddr6 *mc) -{ - refcount_inc(&mc->mca_refcnt); -} - static void ma_put(struct ifmcaddr6 *mc) { if (refcount_dec_and_test(&mc->mca_refcnt)) { @@ -874,13 +876,14 @@ static void ma_put(struct ifmcaddr6 *mc) } } -/* called with mc_lock */ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, const struct in6_addr *addr, unsigned int mode) { struct ifmcaddr6 *mc; + mc_assert_locked(idev); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; @@ -945,23 +948,22 @@ error: static int __ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr, unsigned int mode) { - struct ifmcaddr6 *mc; struct inet6_dev *idev; - - ASSERT_RTNL(); + struct ifmcaddr6 *mc; /* we need to take a reference on idev */ idev = in6_dev_get(dev); - if (!idev) return -EINVAL; - if (idev->dead) { + mutex_lock(&idev->mc_lock); + + if (READ_ONCE(idev->dead)) { + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return -ENODEV; } - mutex_lock(&idev->mc_lock); for_each_mc_mclock(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, addr)) { mc->mca_users++; @@ -982,13 +984,11 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, rcu_assign_pointer(mc->next, idev->mc_list); rcu_assign_pointer(idev->mc_list, mc); - mca_get(mc); - mld_del_delrec(idev, mc); igmp6_group_added(mc); inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST); mutex_unlock(&idev->mc_lock); - ma_put(mc); + return 0; } @@ -1005,9 +1005,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifmcaddr6 *ma, __rcu **map; - ASSERT_RTNL(); - mutex_lock(&idev->mc_lock); + for (map = &idev->mc_list; (ma = mc_dereference(*map, idev)); map = &ma->next) { @@ -1038,13 +1037,12 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) struct inet6_dev *idev; int err; - ASSERT_RTNL(); - - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) - err = -ENODEV; - else - err = __ipv6_dev_mc_dec(idev, addr); + return -ENODEV; + + err = __ipv6_dev_mc_dec(idev, addr); + in6_dev_put(idev); return err; } @@ -1091,46 +1089,51 @@ unlock: return rv; } -/* called with mc_lock */ static void mld_gq_start_work(struct inet6_dev *idev) { unsigned long tv = get_random_u32_below(idev->mc_maxdelay); + mc_assert_locked(idev); + idev->mc_gq_running = 1; if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2)) in6_dev_hold(idev); } -/* called with mc_lock */ static void mld_gq_stop_work(struct inet6_dev *idev) { + mc_assert_locked(idev); + idev->mc_gq_running = 0; if (cancel_delayed_work(&idev->mc_gq_work)) __in6_dev_put(idev); } -/* called with mc_lock */ static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = get_random_u32_below(delay); + mc_assert_locked(idev); + if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2)) in6_dev_hold(idev); } -/* called with mc_lock */ static void mld_ifc_stop_work(struct inet6_dev *idev) { + mc_assert_locked(idev); + idev->mc_ifc_count = 0; if (cancel_delayed_work(&idev->mc_ifc_work)) __in6_dev_put(idev); } -/* called with mc_lock */ static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = get_random_u32_below(delay); + mc_assert_locked(idev); + if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2)) in6_dev_hold(idev); } @@ -1155,14 +1158,13 @@ static void mld_report_stop_work(struct inet6_dev *idev) __in6_dev_put(idev); } -/* - * IGMP handling (alias multicast ICMPv6 messages) - * called with mc_lock - */ +/* IGMP handling (alias multicast ICMPv6 messages) */ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) { unsigned long delay = resptime; + mc_assert_locked(ma->idev); + /* Do not start work for these addresses */ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) || IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) @@ -1181,15 +1183,15 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) ma->mca_flags |= MAF_TIMER_RUNNING; } -/* mark EXCLUDE-mode sources - * called with mc_lock - */ +/* mark EXCLUDE-mode sources */ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; + mc_assert_locked(pmc->idev); + scount = 0; for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) @@ -1212,13 +1214,14 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, return true; } -/* called with mc_lock */ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; + mc_assert_locked(pmc->idev); + if (pmc->mca_sfmode == MCAST_EXCLUDE) return mld_xmarksources(pmc, nsrcs, srcs); @@ -1913,7 +1916,6 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, #define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) -/* called with mc_lock */ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted, int crsend) @@ -1927,6 +1929,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_report *pmr; unsigned int mtu; + mc_assert_locked(idev); + if (pmc->mca_flags & MAF_NOREPORT) return skb; @@ -2045,12 +2049,13 @@ empty_source: return skb; } -/* called with mc_lock */ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) { struct sk_buff *skb = NULL; int type; + mc_assert_locked(idev); + if (!pmc) { for_each_mc_mclock(idev, pmc) { if (pmc->mca_flags & MAF_NOREPORT) @@ -2072,10 +2077,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) mld_sendpack(skb); } -/* - * remove zero-count source records from a source filter list - * called with mc_lock - */ +/* remove zero-count source records from a source filter list */ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev) { struct ip6_sf_list *psf_prev, *psf_next, *psf; @@ -2099,7 +2101,6 @@ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *i } } -/* called with mc_lock */ static void mld_send_cr(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next; @@ -2263,13 +2264,14 @@ err_out: goto out; } -/* called with mc_lock */ static void mld_send_initial_cr(struct inet6_dev *idev) { - struct sk_buff *skb; struct ifmcaddr6 *pmc; + struct sk_buff *skb; int type; + mc_assert_locked(idev); + if (mld_in_v1_mode(idev)) return; @@ -2316,13 +2318,14 @@ static void mld_dad_work(struct work_struct *work) in6_dev_put(idev); } -/* called with mc_lock */ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, - const struct in6_addr *psfsrc) + const struct in6_addr *psfsrc) { struct ip6_sf_list *psf, *psf_prev; int rv = 0; + mc_assert_locked(pmc->idev); + psf_prev = NULL; for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) @@ -2359,7 +2362,6 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, return rv; } -/* called with mc_lock */ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2371,6 +2373,8 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; + mc_assert_locked(idev); + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; @@ -2412,15 +2416,14 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, return err; } -/* - * Add multicast single-source filter to the interface list - * called with mc_lock - */ +/* Add multicast single-source filter to the interface list */ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, - const struct in6_addr *psfsrc) + const struct in6_addr *psfsrc) { struct ip6_sf_list *psf, *psf_prev; + mc_assert_locked(pmc->idev); + psf_prev = NULL; for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) @@ -2443,11 +2446,12 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, return 0; } -/* called with mc_lock */ static void sf_markstate(struct ifmcaddr6 *pmc) { - struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; + struct ip6_sf_list *psf; + + mc_assert_locked(pmc->idev); for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { @@ -2460,14 +2464,15 @@ static void sf_markstate(struct ifmcaddr6 *pmc) } } -/* called with mc_lock */ static int sf_setstate(struct ifmcaddr6 *pmc) { - struct ip6_sf_list *psf, *dpsf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; + struct ip6_sf_list *psf, *dpsf; int qrv = pmc->idev->mc_qrv; int new_in, rv; + mc_assert_locked(pmc->idev); + rv = 0; for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { @@ -2526,10 +2531,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) return rv; } -/* - * Add multicast source filter list to the interface list - * called with mc_lock - */ +/* Add multicast source filter list to the interface list */ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2541,6 +2543,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; + mc_assert_locked(idev); + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; @@ -2588,11 +2592,12 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, return err; } -/* called with mc_lock */ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; + mc_assert_locked(pmc->idev); + for (psf = mc_dereference(pmc->mca_tomb, pmc->idev); psf; psf = nextpsf) { @@ -2613,11 +2618,12 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1); } -/* called with mc_lock */ static void igmp6_join_group(struct ifmcaddr6 *ma) { unsigned long delay; + mc_assert_locked(ma->idev); + if (ma->mca_flags & MAF_NOREPORT) return; @@ -2664,9 +2670,10 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, return err; } -/* called with mc_lock */ static void igmp6_leave_group(struct ifmcaddr6 *ma) { + mc_assert_locked(ma->idev); + if (mld_in_v1_mode(ma->idev)) { if (ma->mca_flags & MAF_LAST_REPORTER) { igmp6_send(&ma->mca_addr, ma->idev->dev, @@ -2711,9 +2718,10 @@ static void mld_ifc_work(struct work_struct *work) in6_dev_put(idev); } -/* called with mc_lock */ static void mld_ifc_event(struct inet6_dev *idev) { + mc_assert_locked(idev); + if (mld_in_v1_mode(idev)) return; @@ -2868,8 +2876,6 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) { struct ifmcaddr6 *pmc; - ASSERT_RTNL(); - mutex_lock(&idev->mc_lock); if (mld_in_v1_mode(idev)) { for_each_mc_mclock(idev, pmc) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ecb5c4b8518f..d4c5876e1771 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -243,9 +243,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { - ND_PRINTK(2, warn, - "%s: duplicated ND6 option found: type=%d\n", - __func__, nd_opt->nd_opt_type); + net_dbg_ratelimited("%s: duplicated ND6 option found: type=%d\n", + __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } @@ -275,11 +274,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, * to accommodate future extension to the * protocol. */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); + net_dbg_ratelimited("%s: ignored unsupported option; type=%d, len=%d\n", + __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len); } next_opt: opt_len -= l; @@ -377,24 +373,25 @@ static int ndisc_constructor(struct neighbour *neigh) static int pndisc_constructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; - struct in6_addr maddr; struct net_device *dev = n->dev; + struct in6_addr maddr; - if (!dev || !__in6_dev_get(dev)) + if (!dev) return -EINVAL; + addrconf_addr_solict_mult(addr, &maddr); - ipv6_dev_mc_inc(dev, &maddr); - return 0; + return ipv6_dev_mc_inc(dev, &maddr); } static void pndisc_destructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; - struct in6_addr maddr; struct net_device *dev = n->dev; + struct in6_addr maddr; - if (!dev || !__in6_dev_get(dev)) + if (!dev) return; + addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); } @@ -473,6 +470,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, { struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); + struct net_device *dev; struct inet6_dev *idev; struct net *net; struct sock *sk; @@ -507,11 +505,12 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); - idev = __in6_dev_get(dst->dev); + dev = dst_dev(dst); + idev = __in6_dev_get(dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, dst->dev, + net, sk, skb, NULL, dev, dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); @@ -751,9 +750,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { - ND_PRINTK(1, dbg, - "%s: trying to ucast probe in NUD_INVALID: %pI6\n", - __func__, target); + net_dbg_ratelimited("%s: trying to ucast probe in NUD_INVALID: %pI6\n", + __func__, target); } ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { @@ -811,7 +809,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK(2, warn, "NS: multicast target address\n"); + net_dbg_ratelimited("NS: multicast target address\n"); return reason; } @@ -820,7 +818,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) * DAD has to be destined for solicited node multicast address. */ if (dad && !ipv6_addr_is_solict_mult(daddr)) { - ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); + net_dbg_ratelimited("NS: bad DAD packet (wrong destination)\n"); return reason; } @@ -830,8 +828,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); if (!lladdr) { - ND_PRINTK(2, warn, - "NS: invalid link-layer address length\n"); + net_dbg_ratelimited("NS: invalid link-layer address length\n"); return reason; } @@ -841,8 +838,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) * in the message. */ if (dad) { - ND_PRINTK(2, warn, - "NS: bad DAD packet (link-layer address option)\n"); + net_dbg_ratelimited("NS: bad DAD packet (link-layer address option)\n"); return reason; } } @@ -859,10 +855,8 @@ have_ifp: if (nonce != 0 && ifp->dad_nonce == nonce) { u8 *np = (u8 *)&nonce; /* Matching nonce if looped back */ - ND_PRINTK(2, notice, - "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", - ifp->idev->dev->name, - &ifp->addr, np); + net_dbg_ratelimited("%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", + ifp->idev->dev->name, &ifp->addr, np); goto out; } /* @@ -1013,13 +1007,13 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK(2, warn, "NA: target address is multicast\n"); + net_dbg_ratelimited("NA: target address is multicast\n"); return reason; } if (ipv6_addr_is_multicast(daddr) && msg->icmph.icmp6_solicited) { - ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); + net_dbg_ratelimited("NA: solicited NA is multicasted\n"); return reason; } @@ -1038,8 +1032,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) if (ndopts.nd_opts_tgt_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); if (!lladdr) { - ND_PRINTK(2, warn, - "NA: invalid link-layer address length\n"); + net_dbg_ratelimited("NA: invalid link-layer address length\n"); return reason; } } @@ -1060,9 +1053,9 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) unsolicited advertisement. */ if (skb->pkt_type != PACKET_LOOPBACK) - ND_PRINTK(1, warn, - "NA: %pM advertised our address %pI6c on %s!\n", - eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); + net_warn_ratelimited("NA: %pM advertised our address %pI6c on %s!\n", + eth_hdr(skb)->h_source, &ifp->addr, + ifp->idev->dev->name); in6_ifa_put(ifp); return reason; } @@ -1149,7 +1142,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); if (!idev) { - ND_PRINTK(1, err, "RS: can't find in6 device\n"); + net_err_ratelimited("RS: can't find in6 device\n"); return reason; } @@ -1257,11 +1250,9 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); - ND_PRINTK(2, info, - "RA: %s, dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, dev: %s\n", __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, "RA: source address is not link-local\n"); + net_dbg_ratelimited("RA: source address is not link-local\n"); return reason; } if (optlen < 0) @@ -1269,15 +1260,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { - ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); + net_dbg_ratelimited("RA: from host or unauthorized router\n"); return reason; } #endif in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) { - ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", - skb->dev->name); + net_err_ratelimited("RA: can't find inet6 device for %s\n", skb->dev->name); return reason; } @@ -1285,18 +1275,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ipv6_accept_ra(in6_dev)) { - ND_PRINTK(2, info, - "RA: %s, did not accept ra for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, did not accept ra for dev: %s\n", __func__, + skb->dev->name); goto skip_linkparms; } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { - ND_PRINTK(2, info, - "RA: %s, nodetype is NODEFAULT, dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__, + skb->dev->name); goto skip_linkparms; } #endif @@ -1325,18 +1313,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) send_ifinfo_notify = true; if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { - ND_PRINTK(2, info, - "RA: %s, defrtr is false for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, defrtr is false for dev: %s\n", __func__, + skb->dev->name); goto skip_defrtr; } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); if (lifetime != 0 && lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { - ND_PRINTK(2, info, - "RA: router lifetime (%ds) is too short: %s\n", - lifetime, skb->dev->name); + net_dbg_ratelimited("RA: router lifetime (%ds) is too short: %s\n", lifetime, + skb->dev->name); goto skip_defrtr; } @@ -1346,9 +1332,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) net = dev_net(in6_dev->dev); if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { - ND_PRINTK(2, info, - "RA from local address detected on dev: %s: default router ignored\n", - skb->dev->name); + net_dbg_ratelimited("RA from local address detected on dev: %s: default router ignored\n", + skb->dev->name); goto skip_defrtr; } @@ -1366,9 +1351,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { - ND_PRINTK(0, err, - "RA: %s got default router without neighbour\n", - __func__); + net_err_ratelimited("RA: %s got default router without neighbour\n", + __func__); fib6_info_release(rt); return reason; } @@ -1381,10 +1365,10 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt = NULL; } - ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", - rt, lifetime, defrtr_usr_metric, skb->dev->name); + net_dbg_ratelimited("RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime, + defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { - ND_PRINTK(3, info, "RA: adding default router\n"); + net_dbg_ratelimited("RA: adding default router\n"); if (neigh) neigh_release(neigh); @@ -1393,9 +1377,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) skb->dev, pref, defrtr_usr_metric, lifetime); if (!rt) { - ND_PRINTK(0, err, - "RA: %s failed to add default route\n", - __func__); + net_err_ratelimited("RA: %s failed to add default route\n", __func__); return reason; } @@ -1403,9 +1385,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { - ND_PRINTK(0, err, - "RA: %s got default router without neighbour\n", - __func__); + net_err_ratelimited("RA: %s got default router without neighbour\n", + __func__); fib6_info_release(rt); return reason; } @@ -1436,7 +1417,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { - ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); + net_dbg_ratelimited("RA: Got route advertisement with lower hop_limit than minimum\n"); } } @@ -1492,8 +1473,7 @@ skip_linkparms: lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) { - ND_PRINTK(2, warn, - "RA: invalid link-layer address length\n"); + net_dbg_ratelimited("RA: invalid link-layer address length\n"); goto out; } } @@ -1507,9 +1487,8 @@ skip_linkparms: } if (!ipv6_accept_ra(in6_dev)) { - ND_PRINTK(2, info, - "RA: %s, accept_ra is false for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, accept_ra is false for dev: %s\n", __func__, + skb->dev->name); goto out; } @@ -1517,9 +1496,8 @@ skip_linkparms: if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { - ND_PRINTK(2, info, - "RA from local address detected on dev: %s: router info ignored.\n", - skb->dev->name); + net_dbg_ratelimited("RA from local address detected on dev: %s: router info ignored.\n", + skb->dev->name); goto skip_routeinfo; } @@ -1555,9 +1533,8 @@ skip_routeinfo: #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { - ND_PRINTK(2, info, - "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", + __func__, skb->dev->name); goto out; } #endif @@ -1586,7 +1563,7 @@ skip_routeinfo: } if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { - ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); + net_dbg_ratelimited("RA: invalid mtu: %d\n", mtu); } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); @@ -1605,7 +1582,7 @@ skip_routeinfo: } if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { - ND_PRINTK(2, warn, "RA: invalid RA options\n"); + net_dbg_ratelimited("RA: invalid RA options\n"); } out: /* Send a notify if RA changed managed/otherconf flags or @@ -1633,15 +1610,13 @@ static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb) switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: - ND_PRINTK(2, warn, - "Redirect: from host or unauthorized router\n"); + net_dbg_ratelimited("Redirect: from host or unauthorized router\n"); return reason; } #endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: source address is not link-local\n"); + net_dbg_ratelimited("Redirect: source address is not link-local\n"); return reason; } @@ -1702,15 +1677,13 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { - ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", - dev->name); + net_dbg_ratelimited("Redirect: no link-local address on %s\n", dev->name); return; } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: target address is not link-local unicast\n"); + net_dbg_ratelimited("Redirect: target address is not link-local unicast\n"); return; } @@ -1729,8 +1702,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { - ND_PRINTK(2, warn, - "Redirect: destination is not a neighbour\n"); + net_dbg_ratelimited("Redirect: destination is not a neighbour\n"); goto release; } @@ -1743,8 +1715,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) if (dev->addr_len) { struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); if (!neigh) { - ND_PRINTK(2, warn, - "Redirect: no neigh for target address\n"); + net_dbg_ratelimited("Redirect: no neigh for target address\n"); goto release; } @@ -1845,14 +1816,12 @@ enum skb_drop_reason ndisc_rcv(struct sk_buff *skb) __skb_push(skb, skb->data - skb_transport_header(skb)); if (ipv6_hdr(skb)->hop_limit != 255) { - ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", - ipv6_hdr(skb)->hop_limit); + net_dbg_ratelimited("NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit); return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT; } if (msg->icmph.icmp6_code != 0) { - ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", - msg->icmph.icmp6_code); + net_dbg_ratelimited("NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code); return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE; } @@ -2003,9 +1972,8 @@ static int __net_init ndisc_net_init(struct net *net) err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { - ND_PRINTK(0, err, - "NDISC: Failed to initialize the control socket (err %d)\n", - err); + net_err_ratelimited("NDISC: Failed to initialize the control socket (err %d)\n", + err); return err; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 4541836ee3da..45f9105f9ac1 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,7 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst_dev(skb); struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; @@ -72,7 +72,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #endif /* Change in oif may mean change in hh_len. */ - hh_len = skb_dst(skb)->dev->hard_header_len; + hh_len = skb_dst_dev(skb)->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index b903c62c00c9..6da3102b7c1b 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -38,7 +38,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, } skb_dst_drop(skb); skb_dst_set(skb, dst); - skb->dev = dst->dev; + skb->dev = dst_dev(dst); skb->protocol = htons(ETH_P_IPV6); return true; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 9ae2b2725bf9..838295fa32e3 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -300,7 +300,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, skb_dst_set(oldskb, dst); } - fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); + fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst_dev(oldskb)); fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); security_skb_classify_flow(oldskb, flowi6_to_flowi_common(&fl6)); dst = ip6_route_output(net, NULL, &fl6); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 806d4b5dd1e6..d21fe27fe21e 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -105,7 +105,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit == 0) { - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct inet6_dev *idev; rcu_read_lock(); @@ -141,7 +141,7 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst(skb)->dev, + net, sk, skb, NULL, skb_dst_dev(skb), dst_output); } EXPORT_SYMBOL_GPL(__ip6_local_out); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 84d90dd8b3f0..82b0492923d4 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -142,7 +142,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.saddr = np->saddr; fl6.daddr = *daddr; fl6.flowi6_mark = ipc6.sockc.mark; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fda640ebd53f..4c3f8245c40f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -777,7 +777,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = ipc6.sockc.mark; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7d4bcf3fda5b..25ec8001898d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -104,11 +104,11 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) return container_of(q, struct frag_queue, q); } -static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, +static int ip6_frag_queue(struct net *net, + struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, u32 *prob_offset, int *refs) { - struct net *net = dev_net(skb_dst(skb)->dev); int offset, end, fragsize; struct sk_buff *prev_tail; struct net_device *dev; @@ -324,10 +324,10 @@ out_fail: static int ipv6_frag_rcv(struct sk_buff *skb) { + const struct ipv6hdr *hdr = ipv6_hdr(skb); + struct net *net = skb_dst_dev_net(skb); struct frag_hdr *fhdr; struct frag_queue *fq; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - struct net *net = dev_net(skb_dst(skb)->dev); u8 nexthdr; int iif; @@ -384,7 +384,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) spin_lock(&fq->q.lock); fq->iif = iif; - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, + ret = ip6_frag_queue(net, fq, skb, fhdr, IP6CB(skb)->nhoff, &prob_offset, &refs); spin_unlock(&fq->q.lock); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 79c8f1acf8a3..3fbe0885c21c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -228,13 +228,13 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, const struct rt6_info *rt = dst_rt6_info(dst); return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), - dst->dev, skb, daddr); + dst_dev(dst), skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) { const struct rt6_info *rt = dst_rt6_info(dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); if (!daddr) @@ -391,9 +391,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) static bool __rt6_check_expired(const struct rt6_info *rt) { if (rt->rt6i_flags & RTF_EXPIRES) - return time_after(jiffies, rt->dst.expires); - else - return false; + return time_after(jiffies, READ_ONCE(rt->dst.expires)); + return false; } static bool rt6_check_expired(const struct rt6_info *rt) @@ -403,10 +402,10 @@ static bool rt6_check_expired(const struct rt6_info *rt) from = rcu_dereference(rt->from); if (rt->rt6i_flags & RTF_EXPIRES) { - if (time_after(jiffies, rt->dst.expires)) + if (time_after(jiffies, READ_ONCE(rt->dst.expires))) return true; } else if (from) { - return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || + return READ_ONCE(rt->dst.obsolete) != DST_OBSOLETE_FORCE_CHK || fib6_check_expired(from); } return false; @@ -1145,6 +1144,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res) rt->dst.input = ip6_input; } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; + rt->dst.output = ip6_mr_output; } else { rt->dst.input = ip6_forward; } @@ -2133,12 +2133,13 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, * expired, independently from their aging, as per RFC 8201 section 4 */ if (!(rt->rt6i_flags & RTF_EXPIRES)) { - if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { + if (time_after_eq(now, READ_ONCE(rt->dst.lastuse) + + gc_args->timeout)) { pr_debug("aging clone %p\n", rt); rt6_remove_exception(bucket, rt6_ex); return; } - } else if (time_after(jiffies, rt->dst.expires)) { + } else if (time_after(jiffies, READ_ONCE(rt->dst.expires))) { pr_debug("purging expired route %p\n", rt); rt6_remove_exception(bucket, rt6_ex); return; @@ -2776,11 +2777,10 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) { if (!__rt6_check_expired(rt) && - rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK && fib6_check(from, cookie)) return &rt->dst; - else - return NULL; + return NULL; } INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, @@ -2870,7 +2870,7 @@ static void rt6_update_expires(struct rt6_info *rt0, int timeout) rcu_read_lock(); from = rcu_dereference(rt0->from); if (from) - rt0->dst.expires = from->expires; + WRITE_ONCE(rt0->dst.expires, from->expires); rcu_read_unlock(); } @@ -2943,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst->dev, + .dev = dst_dev(dst), .gw = &rt6->rt6i_gateway, }; @@ -3010,10 +3010,10 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) oif = l3mdev_master_ifindex(skb->dev); ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark), - sk->sk_uid); + sk_uid(sk)); dst = __sk_dst_get(sk); - if (!dst || !dst->obsolete || + if (!dst || !READ_ONCE(dst->obsolete) || dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) return; @@ -3232,13 +3232,13 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, - READ_ONCE(sk->sk_mark), sk->sk_uid); + READ_ONCE(sk->sk_mark), sk_uid(sk)); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); static unsigned int ip6_default_advmss(const struct dst_entry *dst) { - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); unsigned int mtu = dst_mtu(dst); struct net *net; @@ -4301,7 +4301,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst->dev, + .dev = dst_dev(dst), .gw = &rt->rt6i_gateway, }; @@ -4587,13 +4587,14 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(dst->dev); + struct net_device *dev = dst_dev(dst); + struct net *net = dev_net(dev); struct inet6_dev *idev; SKB_DR(reason); int type; if (netif_is_l3_master(skb->dev) || - dst->dev == net->loopback_dev) + dev == net->loopback_dev) idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); else idev = ip6_dst_idev(dst); @@ -4630,7 +4631,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb->dev = skb_dst(skb)->dev; + skb->dev = skb_dst_dev(skb); return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } @@ -4641,7 +4642,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb->dev = skb_dst(skb)->dev; + skb->dev = skb_dst_dev(skb); return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } @@ -5844,15 +5845,17 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, * each as a nexthop within RTA_MULTIPATH. */ if (rt6) { + struct net_device *dev; + if (rt6_flags & RTF_GATEWAY && nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway)) goto nla_put_failure; - if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex)) + dev = dst_dev(dst); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; - if (dst->lwtstate && - lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) + if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; } else if (rt->fib6_nsiblings) { struct fib6_info *sibling; @@ -5904,7 +5907,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, } if (rt6_flags & RTF_EXPIRES) { - expires = dst ? dst->expires : rt->expires; + expires = dst ? READ_ONCE(dst->expires) : rt->expires; expires -= jiffies; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7c05ac846646..1f41f53fbaff 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -242,7 +242,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } @@ -297,7 +297,7 @@ static int rpl_input(struct sk_buff *skb) local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } else { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 51583461ae29..3e1b9991131a 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -128,7 +128,8 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(dst->dev); + struct net_device *dev = dst_dev(dst); + struct net *net = dev_net(dev); struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; @@ -181,7 +182,7 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { @@ -212,7 +213,8 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, { __u8 first_seg = osrh->first_segment; struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(dst->dev); + struct net_device *dev = dst_dev(dst); + struct net *net = dev_net(dev); struct ipv6hdr *hdr, *inner_hdr; int hdrlen = ipv6_optlen(osrh); int red_tlv_offset, tlv_offset; @@ -270,7 +272,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, if (skip_srh) { hdr->nexthdr = proto; - set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); goto out; } @@ -306,7 +308,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, srcaddr: isrh->nexthdr = proto; - set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (unlikely(!skip_srh && sr_has_hmac(isrh))) { @@ -362,7 +364,7 @@ static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = skb_dst_dev_net(skb); err = seg6_push_hmac(net, &hdr->saddr, isrh); if (unlikely(err)) @@ -507,7 +509,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } else { @@ -518,7 +520,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, seg6_input_finish); + skb_dst_dev(skb), seg6_input_finish); return seg6_input_finish(dev_net(skb->dev), NULL, skb); drop: @@ -528,7 +530,7 @@ drop: static int seg6_input_nf(struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst_dev(skb); struct net *net = dev_net(skb->dev); switch (skb->protocol) { @@ -593,7 +595,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } @@ -603,7 +605,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, - NULL, skb_dst(skb)->dev, dst_output); + NULL, dst_dev(dst), dst_output); return dst_output(net, sk, skb); drop: @@ -614,7 +616,7 @@ drop: static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst_dev(skb); switch (skb->protocol) { case htons(ETH_P_IP): diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index a11a02b4ba95..2b41e4c0dddd 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -270,7 +270,7 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) static int seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, - u32 tbl_id, bool local_delivery) + u32 tbl_id, bool local_delivery, int oif) { struct net *net = dev_net(skb->dev); struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -282,6 +282,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_iif = skb->dev->ifindex; + fl6.flowi6_oif = oif; fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; fl6.saddr = hdr->saddr; fl6.flowlabel = ip6_flowinfo(hdr); @@ -291,17 +292,19 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, if (nhaddr) fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; - if (!tbl_id) { + if (!tbl_id && !oif) { dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); - } else { + } else if (tbl_id) { struct fib6_table *table; table = fib6_get_table(net, tbl_id); if (!table) goto out; - rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); + rt = ip6_pol_route(net, table, oif, &fl6, skb, flags); dst = &rt->dst; + } else { + dst = ip6_route_output(net, NULL, &fl6); } /* we want to discard traffic destined for local packet processing, @@ -310,7 +313,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, if (!local_delivery) dev_flags |= IFF_LOOPBACK; - if (dst && (dst->dev->flags & dev_flags) && !dst->error) { + if (dst && (dst_dev(dst)->flags & dev_flags) && !dst->error) { dst_release(dst); dst = NULL; } @@ -330,7 +333,7 @@ out: int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id) { - return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false); + return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false, 0); } static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo) @@ -418,7 +421,7 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) static int input_action_end_x_finish(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - seg6_lookup_nexthop(skb, &slwt->nh6, 0); + seg6_lookup_any_nexthop(skb, &slwt->nh6, 0, false, slwt->oif); return dst_input(skb); } @@ -1277,7 +1280,7 @@ static int input_action_end_dt6(struct sk_buff *skb, /* note: this time we do not need to specify the table because the VRF * takes care of selecting the correct table. */ - seg6_lookup_any_nexthop(skb, NULL, 0, true); + seg6_lookup_any_nexthop(skb, NULL, 0, true, 0); return dst_input(skb); @@ -1285,7 +1288,7 @@ legacy_mode: #endif skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); + seg6_lookup_any_nexthop(skb, NULL, slwt->table, true, 0); return dst_input(skb); @@ -1477,7 +1480,8 @@ static struct seg6_action_desc seg6_action_table[] = { .action = SEG6_LOCAL_ACTION_END_X, .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), .optattrs = SEG6_F_LOCAL_COUNTERS | - SEG6_F_LOCAL_FLAVORS, + SEG6_F_LOCAL_FLAVORS | + SEG6_F_ATTR(SEG6_LOCAL_OIF), .input = input_action_end_x, }, { @@ -2083,7 +2087,7 @@ struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = { static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len) { /* Locator-Block and Locator-Node Function cannot exceed 128 bits - * (i.e. C-SID container lenghts). + * (i.e. C-SID container length). */ if (next_csid_chk_cntr_bits(block_len, func_len)) return -EINVAL; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a72dbca9e8fc..12496ba1b7d4 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1035,7 +1035,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, - df, !net_eq(tunnel->net, dev_net(dev))); + df, !net_eq(tunnel->net, dev_net(dev)), 0); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9d83eadd308b..f0ee1a909771 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -236,7 +236,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e8e68a142649..8f2c3cba1f1f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -41,6 +41,7 @@ #include <linux/random.h> #include <linux/indirect_call_wrapper.h> +#include <net/aligned_data.h> #include <net/tcp.h> #include <net/ndisc.h> #include <net/inet6_hashtables.h> @@ -269,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -835,7 +836,6 @@ static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, @@ -868,7 +868,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 int oif, int rst, u8 tclass, __be32 label, u32 priority, u32 txhash, struct tcp_key *key) { - struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); unsigned int tot_len = sizeof(struct tcphdr); struct sock *ctl_sk = net->ipv6.tcp_sk; const struct tcphdr *th = tcp_hdr(skb); @@ -1043,7 +1043,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, if (!sk && !ipv6_unicast_destination(skb)) return; - net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; @@ -2168,7 +2168,7 @@ static void get_openreq6(struct seq_file *seq, jiffies_to_clock_t(ttd), req->num_timeout, from_kuid_munged(seq_user_ns(seq), - sock_i_uid(req->rsk_listener)), + sk_uid(req->rsk_listener)), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); @@ -2234,7 +2234,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, @@ -2357,7 +2357,7 @@ struct proto tcpv6_prot = { .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, - .memory_allocated = &tcp_memory_allocated, + .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7317f8e053f1..6bbdadbd5fec 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -750,7 +750,8 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) { if (tunnel) { ip6_redirect(skb, sock_net(sk), inet6_iif(skb), - READ_ONCE(sk->sk_mark), sk->sk_uid); + READ_ONCE(sk->sk_mark), + sk_uid(sk)); } else { ip6_sk_redirect(skb, sk); } @@ -1620,7 +1621,7 @@ do_udp_sendmsg: if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); if (msg->msg_controllen) { opt = &opt_space; @@ -1924,7 +1925,7 @@ struct proto udpv6_prot = { .psock_update_sk_prot = udp_bpf_update_proto, #endif - .memory_allocated = &udp_memory_allocated, + .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 0590f566379d..8a406be25a3a 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _UDP6_IMPL_H #define _UDP6_IMPL_H +#include <net/aligned_data.h> #include <net/udp.h> #include <net/udplite.h> #include <net/protocol.h> diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index a60bec9b14f1..2cec542437f7 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -59,7 +59,7 @@ struct proto udplitev6_prot = { .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, - .memory_allocated = &udp_memory_allocated, + .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b3d5d1f266ee..512bdaf13699 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -106,7 +106,7 @@ skip_frag: int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, skb->dev, skb_dst(skb)->dev, + net, sk, skb, skb->dev, skb_dst_dev(skb), __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 24aec295a51c..a0be3896a934 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -835,8 +835,7 @@ start: if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) { if (err == -EMSGSIZE) goto wait_for_memory; diff --git a/net/key/af_key.c b/net/key/af_key.c index efc2a91f4c48..1f82f69acfde 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3788,7 +3788,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), - from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), + from_kuid_munged(seq_user_ns(f), sk_uid(s)), sock_i_ino(s) ); return 0; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b98d13584c81..ea232f338dcb 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -545,7 +545,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = READ_ONCE(sk->sk_mark); - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); ipcm6_init_sk(&ipc6, sk); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index cc77ec5769d8..5958a80fe14c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -210,7 +210,7 @@ static int llc_ui_release(struct socket *sock) dprintk("%s: closing local(%02X) remote(%02X)\n", __func__, llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) - llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); + llc_ui_wait_for_disc(sk, READ_ONCE(sk->sk_rcvtimeo)); if (!sock_flag(sk, SOCK_ZAPPED)) { struct llc_sap *sap = llc->sap; @@ -455,7 +455,7 @@ static int llc_ui_shutdown(struct socket *sock, int how) goto out; rc = llc_send_disc(sk); if (!rc) - rc = llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); + rc = llc_ui_wait_for_disc(sk, READ_ONCE(sk->sk_rcvtimeo)); /* Wake up anyone sleeping in poll */ sk->sk_state_change(sk); out: @@ -712,7 +712,7 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock, goto out; /* wait for a connection to arrive. */ if (skb_queue_empty(&sk->sk_receive_queue)) { - rc = llc_wait_data(sk, sk->sk_rcvtimeo); + rc = llc_wait_data(sk, READ_ONCE(sk->sk_rcvtimeo)); if (rc) goto out; } diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index 07e9abb5978a..aa81c67b24a1 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -151,7 +151,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk) - llc->copied_seq, sk->sk_state, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sk)), llc->link); out: return 0; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index ee534797c033..e38f46ffebfa 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -299,7 +299,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, if (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported && - !sta->sta.deflink.he_cap.has_he) { + !sta->sta.deflink.he_cap.has_he && + !sta->sta.deflink.s1g_cap.s1g) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o HT\n", sta->sta.addr, tid); @@ -327,7 +328,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) && (sta->sta.valid_links || - !(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || + !(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA) || + !(sta->sta.deflink.s1g_cap.cap[3] & S1G_CAP3_HT_DELAYED_BA))) || (buf_size > max_buf_size)) { status = WLAN_STATUS_INVALID_QOS_PARAM; ht_dbg_ratelimited(sta->sdata, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index dbd9ad5f3992..d981b0fc57bf 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -616,7 +616,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, !pubsta->deflink.ht_cap.ht_supported && !pubsta->deflink.vht_cap.vht_supported && !pubsta->deflink.he_cap.has_he && - !pubsta->deflink.eht_cap.has_eht) + !pubsta->deflink.eht_cap.has_eht && + !pubsta->deflink.s1g_cap.s1g) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 954795b0fe48..d76643d46150 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -178,6 +178,7 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, link_conf->nontransmitted = true; link_conf->bssid_index = params->index; + link_conf->bssid_indicator = tx_bss_conf->bssid_indicator; } if (params->ema) link_conf->ema_ap = true; @@ -885,6 +886,13 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, ret = 0; memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo, true); + + /* Add accumulated removed link data to sinfo data for + * consistency for MLO + */ + if (sinfo->valid_links) + sta_set_accumulated_removed_links_sinfo(sta, sinfo); + } return ret; @@ -912,6 +920,12 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, if (sta) { ret = 0; sta_set_sinfo(sta, sinfo, true); + + /* Add accumulated removed link data to sinfo data for + * consistency for MLO + */ + if (sinfo->valid_links) + sta_set_accumulated_removed_links_sinfo(sta, sinfo); } return ret; @@ -1218,8 +1232,11 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_copy_rnr_beacon(pos, new->rnr_ies, rnr); } /* update bssid_indicator */ - link_conf->bssid_indicator = - ilog2(__roundup_pow_of_two(mbssid->cnt + 1)); + if (new->mbssid_ies->cnt && new->mbssid_ies->elem[0].len > 2) + link_conf->bssid_indicator = + *(new->mbssid_ies->elem[0].data + 2); + else + link_conf->bssid_indicator = 0; } if (csa) { @@ -1878,6 +1895,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, params->vht_capa || params->he_capa || params->eht_capa || + params->s1g_capa || params->opmode_notif_used; switch (mode) { @@ -1956,6 +1974,10 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, params->eht_capa_len, link_sta); + if (params->s1g_capa) + ieee80211_s1g_cap_to_sta_s1g_cap(sdata, params->s1g_capa, + link_sta); + ieee80211_sta_init_nss(link_sta); if (params->opmode_notif_used) { @@ -3042,7 +3064,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) +static int ieee80211_set_wiphy_params(struct wiphy *wiphy, int radio_idx, + u32 changed) { struct ieee80211_local *local = wiphy_priv(wiphy); int err; @@ -3050,7 +3073,8 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { ieee80211_check_fast_xmit_all(local); - err = drv_set_frag_threshold(local, wiphy->frag_threshold); + err = drv_set_frag_threshold(local, radio_idx, + wiphy->frag_threshold); if (err) { ieee80211_check_fast_xmit_all(local); @@ -3064,14 +3088,23 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) coverage_class = changed & WIPHY_PARAM_COVERAGE_CLASS ? wiphy->coverage_class : -1; - err = drv_set_coverage_class(local, coverage_class); + err = drv_set_coverage_class(local, radio_idx, + coverage_class); if (err) return err; } if (changed & WIPHY_PARAM_RTS_THRESHOLD) { - err = drv_set_rts_threshold(local, wiphy->rts_threshold); + u32 rts_threshold; + + if ((radio_idx == -1) || (radio_idx >= wiphy->n_radio)) + rts_threshold = wiphy->rts_threshold; + else + rts_threshold = + wiphy->radio_cfg[radio_idx].rts_threshold; + + err = drv_set_rts_threshold(local, radio_idx, rts_threshold); if (err) return err; @@ -3089,18 +3122,19 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) } if (changed & (WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG)) - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); + ieee80211_hw_config(local, radio_idx, + IEEE80211_CONF_CHANGE_RETRY_LIMITS); if (changed & (WIPHY_PARAM_TXQ_LIMIT | WIPHY_PARAM_TXQ_MEMORY_LIMIT | WIPHY_PARAM_TXQ_QUANTUM)) - ieee80211_txq_set_params(local); + ieee80211_txq_set_params(local, radio_idx); return 0; } static int ieee80211_set_tx_power(struct wiphy *wiphy, - struct wireless_dev *wdev, + struct wireless_dev *wdev, int radio_idx, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -3228,6 +3262,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, static int ieee80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, + int radio_idx, unsigned int link_id, int *dbm) { @@ -3406,7 +3441,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, } if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); ieee80211_recalc_ps(local); ieee80211_recalc_ps_vif(sdata); @@ -3563,6 +3598,56 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return 0; } +static bool ieee80211_is_scan_ongoing(struct wiphy *wiphy, + struct ieee80211_local *local, + struct cfg80211_chan_def *chandef) +{ + struct cfg80211_scan_request *scan_req; + int chan_radio_idx, req_radio_idx; + struct ieee80211_roc_work *roc; + + if (list_empty(&local->roc_list) && !local->scanning) + return false; + + if (wiphy->n_radio < 2) + return true; + + req_radio_idx = cfg80211_get_radio_idx_by_chan(wiphy, chandef->chan); + if (req_radio_idx < 0) + return true; + + if (local->scanning) { + scan_req = wiphy_dereference(wiphy, local->scan_req); + /* + * Scan is going on but info is not there. Should not happen + * but if it does, let's not take risk and assume we can't use + * the hw hence return true + */ + if (WARN_ON_ONCE(!scan_req)) + return true; + + return ieee80211_is_radio_idx_in_scan_req(wiphy, scan_req, + req_radio_idx); + } + + list_for_each_entry(roc, &local->roc_list, list) { + chan_radio_idx = cfg80211_get_radio_idx_by_chan(wiphy, + roc->chan); + /* + * The roc work is added but chan_radio_idx is invalid. + * Should not happen but if it does, let's not take + * risk and return true. + */ + if (chan_radio_idx < 0) + return true; + + if (chan_radio_idx == req_radio_idx) + return true; + } + + return false; +} + static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_chan_def *chandef, @@ -3576,7 +3661,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, lockdep_assert_wiphy(local->hw.wiphy); - if (!list_empty(&local->roc_list) || local->scanning) + if (ieee80211_is_scan_ongoing(wiphy, local, chandef)) return -EBUSY; link_data = sdata_dereference(sdata->link[link_id], sdata); @@ -4068,7 +4153,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, lockdep_assert_wiphy(local->hw.wiphy); - if (!list_empty(&local->roc_list) || local->scanning) + if (ieee80211_is_scan_ongoing(wiphy, local, ¶ms->chandef)) return -EBUSY; if (sdata->wdev.links[link_id].cac_started) @@ -4252,7 +4337,8 @@ ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, ieee80211_configure_filter(local); } -static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) +static int ieee80211_set_antenna(struct wiphy *wiphy, int radio_idx, + u32 tx_ant, u32 rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); int ret; @@ -4268,11 +4354,12 @@ static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) return 0; } -static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) +static int ieee80211_get_antenna(struct wiphy *wiphy, int radio_idx, + u32 *tx_ant, u32 *rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); - return drv_get_antenna(local, tx_ant, rx_ant); + return drv_get_antenna(local, radio_idx, tx_ant, rx_ant); } static int ieee80211_set_rekey_data(struct wiphy *wiphy, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 3aaf5abf1acc..4bcbcf9d98b5 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -644,15 +644,39 @@ ieee80211_find_chanctx(struct ieee80211_local *local, return NULL; } -bool ieee80211_is_radar_required(struct ieee80211_local *local) +bool ieee80211_is_radar_required(struct ieee80211_local *local, + struct cfg80211_scan_request *req) { + struct wiphy *wiphy = local->hw.wiphy; struct ieee80211_link_data *link; + struct ieee80211_channel *chan; + int radio_idx; lockdep_assert_wiphy(local->hw.wiphy); + if (!req) + return false; + for_each_sdata_link(local, link) { - if (link->radar_required) - return true; + if (link->radar_required) { + if (wiphy->n_radio < 2) + return true; + + chan = link->conf->chanreq.oper.chan; + radio_idx = cfg80211_get_radio_idx_by_chan(wiphy, chan); + /* + * The radio index (radio_idx) is expected to be valid, + * as it's derived from a channel tied to a link. If + * it's invalid (i.e., negative), return true to avoid + * potential issues with radar-sensitive operations. + */ + if (radio_idx < 0) + return true; + + if (ieee80211_is_radio_idx_in_scan_req(wiphy, req, + radio_idx)) + return true; + } } return false; @@ -720,7 +744,7 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, /* turn idle off *before* setting channel -- some drivers need that */ changed = ieee80211_idle_off(local); if (changed) - ieee80211_hw_config(local, changed); + ieee80211_hw_config(local, -1, changed); err = drv_add_chanctx(local, ctx); if (err) { @@ -1381,6 +1405,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) goto out; } + link->radar_required = link->reserved_radar_required; list_move(&link->assigned_chanctx_list, &new_ctx->assigned_links); rcu_assign_pointer(link_conf->chanctx_conf, &new_ctx->conf); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 69e03630f64c..e8b78ec682da 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -4,7 +4,7 @@ * * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 - 2019, 2021-2024 Intel Corporation + * Copyright (C) 2018 - 2019, 2021-2025 Intel Corporation */ #include <linux/debugfs.h> @@ -490,7 +490,6 @@ static const char *hw_flag_names[] = { FLAG(DETECTS_COLOR_COLLISION), FLAG(MLO_MCAST_MULTI_LINK_TX), FLAG(DISALLOW_PUNCTURING), - FLAG(DISALLOW_PUNCTURING_5GHZ), FLAG(HANDLES_QUIET_CSA), FLAG(STRICT), #undef FLAG diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 307587c8a003..8baebb5636ec 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -143,15 +143,16 @@ int drv_change_interface(struct ieee80211_local *local, void drv_remove_interface(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); -static inline int drv_config(struct ieee80211_local *local, u32 changed) +static inline int drv_config(struct ieee80211_local *local, int radio_idx, + u32 changed) { int ret; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - trace_drv_config(local, changed); - ret = local->ops->config(&local->hw, changed); + trace_drv_config(local, radio_idx, changed); + ret = local->ops->config(&local->hw, radio_idx, changed); trace_drv_return_int(local, ret); return ret; } @@ -387,45 +388,47 @@ static inline void drv_get_key_seq(struct ieee80211_local *local, } static inline int drv_set_frag_threshold(struct ieee80211_local *local, - u32 value) + int radio_idx, u32 value) { int ret = 0; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - trace_drv_set_frag_threshold(local, value); + trace_drv_set_frag_threshold(local, radio_idx, value); if (local->ops->set_frag_threshold) - ret = local->ops->set_frag_threshold(&local->hw, value); + ret = local->ops->set_frag_threshold(&local->hw, radio_idx, + value); trace_drv_return_int(local, ret); return ret; } static inline int drv_set_rts_threshold(struct ieee80211_local *local, - u32 value) + int radio_idx, u32 value) { int ret = 0; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - trace_drv_set_rts_threshold(local, value); + trace_drv_set_rts_threshold(local, radio_idx, value); if (local->ops->set_rts_threshold) - ret = local->ops->set_rts_threshold(&local->hw, value); + ret = local->ops->set_rts_threshold(&local->hw, radio_idx, + value); trace_drv_return_int(local, ret); return ret; } static inline int drv_set_coverage_class(struct ieee80211_local *local, - s16 value) + int radio_idx, s16 value) { int ret = 0; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); - trace_drv_set_coverage_class(local, value); + trace_drv_set_coverage_class(local, radio_idx, value); if (local->ops->set_coverage_class) - local->ops->set_coverage_class(&local->hw, value); + local->ops->set_coverage_class(&local->hw, radio_idx, value); else ret = -EOPNOTSUPP; @@ -631,6 +634,25 @@ static inline void drv_sta_statistics(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_link_sta_statistics(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_sta *link_sta, + struct link_station_info *link_sinfo) +{ + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_link_sta_statistics(local, sdata, link_sta); + if (local->ops->link_sta_statistics) + local->ops->link_sta_statistics(&local->hw, &sdata->vif, + link_sta, link_sinfo); + trace_drv_return_void(local); +} + int drv_conf_tx(struct ieee80211_local *local, struct ieee80211_link_data *link, u16 ac, const struct ieee80211_tx_queue_params *params); @@ -753,20 +775,21 @@ static inline int drv_set_antenna(struct ieee80211_local *local, might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->set_antenna) - ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant); + ret = local->ops->set_antenna(&local->hw, -1, tx_ant, rx_ant); trace_drv_set_antenna(local, tx_ant, rx_ant, ret); return ret; } -static inline int drv_get_antenna(struct ieee80211_local *local, +static inline int drv_get_antenna(struct ieee80211_local *local, int radio_idx, u32 *tx_ant, u32 *rx_ant) { int ret = -EOPNOTSUPP; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->get_antenna) - ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant); - trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret); + ret = local->ops->get_antenna(&local->hw, radio_idx, + tx_ant, rx_ant); + trace_drv_get_antenna(local, radio_idx, *tx_ant, *rx_ant, ret); return ret; } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 9ed87d6f5019..6e36b09fe97f 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -635,7 +635,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { - unsigned long last_active = ieee80211_sta_last_active(sta); + unsigned long last_active = ieee80211_sta_last_active(sta, -1); if (sta->sdata == sdata && time_is_after_jiffies(last_active + @@ -1228,7 +1228,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - unsigned long last_active = ieee80211_sta_last_active(sta); + unsigned long last_active = ieee80211_sta_last_active(sta, -1); if (sdata != sta->sdata) continue; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 30809f0b35f7..ec68204fddc9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1872,7 +1872,8 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, struct ieee80211_rx_status *status, unsigned int mpdu_len, unsigned int mpdu_offset); -int ieee80211_hw_config(struct ieee80211_local *local, u32 changed); +int ieee80211_hw_config(struct ieee80211_local *local, int radio_idx, + u32 changed); int ieee80211_hw_conf_chan(struct ieee80211_local *local); void ieee80211_hw_conf_init(struct ieee80211_local *local); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); @@ -2269,6 +2270,9 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_s1g_cap *s1g_cap_ie, + struct link_sta_info *link_sta); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, @@ -2542,7 +2546,7 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local) } int ieee80211_txq_setup_flows(struct ieee80211_local *local); -void ieee80211_txq_set_params(struct ieee80211_local *local); +void ieee80211_txq_set_params(struct ieee80211_local *local, int radio_idx); void ieee80211_txq_teardown_flows(struct ieee80211_local *local); void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, @@ -2638,6 +2642,8 @@ int ieee80211_put_eht_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, const struct ieee80211_supported_band *sband, const struct ieee80211_conn_settings *conn); +int ieee80211_put_reg_conn(struct sk_buff *skb, + enum ieee80211_channel_flags flags); /* channel management */ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, @@ -2712,7 +2718,11 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, struct ieee80211_link_data *rsvd_for, bool check_reserved); -bool ieee80211_is_radar_required(struct ieee80211_local *local); +bool ieee80211_is_radar_required(struct ieee80211_local *local, + struct cfg80211_scan_request *req); +bool ieee80211_is_radio_idx_in_scan_req(struct wiphy *wiphy, + struct cfg80211_scan_request *scan_req, + int radio_idx); void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work); void ieee80211_dfs_cac_cancel(struct ieee80211_local *local, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c01634fdba78..0ba590a68605 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -146,7 +146,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) { u32 change = __ieee80211_recalc_idle(local, false); if (change) - ieee80211_hw_config(local, change); + ieee80211_hw_config(local, -1, change); } static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, @@ -726,7 +726,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do /* do after stop to avoid reconfiguring when we stop anyway */ ieee80211_configure_filter(local); - ieee80211_hw_config(local, hw_reconf_flags); + ieee80211_hw_config(local, -1, hw_reconf_flags); if (local->virt_monitors == local->open_count) ieee80211_add_virtual_monitor(local); @@ -1493,7 +1493,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (local->open_count == 1) ieee80211_hw_conf_init(local); else if (hw_reconf_flags) - ieee80211_hw_config(local, hw_reconf_flags); + ieee80211_hw_config(local, -1, hw_reconf_flags); ieee80211_recalc_ps(local); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 4f7b7d0f64f2..d71eabe5abf8 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -2,7 +2,7 @@ /* * MLO link handling * - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -368,6 +368,13 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, ieee80211_update_apvlan_links(sdata); } + /* + * Ignore errors if we are only removing links as removal should + * always succeed + */ + if (!new_links) + ret = 0; + if (ret) { /* restore config */ memcpy(sdata->link, old_data, sizeof(old_data)); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6b6de43d9420..c1c758e76d2e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -190,7 +190,8 @@ static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local, return changed; } -int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) +int ieee80211_hw_config(struct ieee80211_local *local, int radio_idx, + u32 changed) { int ret = 0; @@ -201,7 +202,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) IEEE80211_CONF_CHANGE_SMPS)); if (changed && local->open_count) { - ret = drv_config(local, changed); + ret = drv_config(local, radio_idx, changed); /* * Goal: * HW reconfiguration should never fail, the driver has told @@ -235,7 +236,7 @@ static int _ieee80211_hw_conf_chan(struct ieee80211_local *local, if (!changed) return 0; - return drv_config(local, changed); + return drv_config(local, -1, changed); } int ieee80211_hw_conf_chan(struct ieee80211_local *local) @@ -269,7 +270,7 @@ void ieee80211_hw_conf_init(struct ieee80211_local *local) ctx ? &ctx->conf : NULL); } - WARN_ON(drv_config(local, changed)); + WARN_ON(drv_config(local, -1, changed)); } int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0ed68182f79b..53f8b9bd2bd4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -776,10 +776,6 @@ static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata, ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING)) return false; - if (chandef->punctured && chandef->chan->band == NL80211_BAND_5GHZ && - ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING_5GHZ)) - return false; - return true; } @@ -1645,6 +1641,30 @@ static size_t ieee80211_add_before_he_elems(struct sk_buff *skb, return noffset; } +static size_t ieee80211_add_before_reg_conn(struct sk_buff *skb, + const u8 *elems, size_t elems_len, + size_t offset) +{ + static const u8 before_reg_conn[] = { + /* + * no need to list the ones split off before HE + * or generated here + */ + WLAN_EID_EXTENSION, WLAN_EID_EXT_DH_PARAMETER, + WLAN_EID_EXTENSION, WLAN_EID_EXT_KNOWN_STA_IDENTIFCATION, + }; + size_t noffset; + + if (!elems_len) + return offset; + + noffset = ieee80211_ie_split(elems, elems_len, before_reg_conn, + ARRAY_SIZE(before_reg_conn), offset); + skb_put_data(skb, elems + offset, noffset - offset); + + return noffset; +} + #define PRESENT_ELEMS_MAX 8 #define PRESENT_ELEM_EXT_OFFS 0x100 @@ -1806,6 +1826,22 @@ ieee80211_add_link_elems(struct ieee80211_sub_if_data *sdata, } /* + * if present, add any custom IEs that go before regulatory + * connectivity element + */ + offset = ieee80211_add_before_reg_conn(skb, extra_elems, + extra_elems_len, offset); + + if (sband->band == NL80211_BAND_6GHZ) { + /* + * as per Section E.2.7 of IEEE 802.11 REVme D7.0, non-AP STA + * capable of operating on the 6 GHz band shall transmit + * regulatory connectivity element. + */ + ieee80211_put_reg_conn(skb, chan->flags); + } + + /* * careful - need to know about all the present elems before * calling ieee80211_assoc_add_ml_elem(), so add this one if * we're going to put it after the ML element @@ -1943,14 +1979,7 @@ ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, } skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops)); - /* Many APs have broken parsing of the extended MLD capa/ops field, - * dropping (re-)association request frames or replying with association - * response with a failure status if it's present. Without a clear - * indication as to whether the AP supports parsing this field or not do - * not include it in the common information unless strict mode is set. - */ - if (ieee80211_hw_check(&local->hw, STRICT) && - assoc_data->ext_mld_capa_ops) { + if (assoc_data->ext_mld_capa_ops) { ml_elem->control |= cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP); common->len += 2; @@ -2381,9 +2410,26 @@ static void ieee80211_csa_switch_work(struct wiphy *wiphy, * update cfg80211 directly. */ if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) { + struct link_sta_info *link_sta; + struct sta_info *ap_sta; + link->conf->chanreq = link->csa.chanreq; cfg80211_ch_switch_notify(sdata->dev, &link->csa.chanreq.oper, link->link_id); + link->conf->csa_active = false; + + ap_sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); + if (WARN_ON(!ap_sta)) + return; + + link_sta = wiphy_dereference(wiphy, + ap_sta->link[link->link_id]); + if (WARN_ON(!link_sta)) + return; + + link_sta->pub->bandwidth = + _ieee80211_sta_cur_vht_bw(link_sta, + &link->csa.chanreq.oper); return; } @@ -3181,7 +3227,7 @@ static void ieee80211_enable_ps(struct ieee80211_local *local, return; conf->flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); } } @@ -3193,7 +3239,7 @@ static void ieee80211_change_ps(struct ieee80211_local *local) ieee80211_enable_ps(local, local->ps_sdata); } else if (conf->flags & IEEE80211_CONF_PS) { conf->flags &= ~IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); timer_delete_sync(&local->dynamic_ps_timer); wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); @@ -3302,7 +3348,7 @@ void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy, if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); } ieee80211_wake_queues_by_reason(&local->hw, @@ -3377,7 +3423,7 @@ void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy, (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; local->hw.conf.flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); } } @@ -3989,7 +4035,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, */ if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); } local->ps_sdata = NULL; @@ -4737,6 +4783,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_prep_tx_info info = { .subtype = IEEE80211_STYPE_AUTH, }; + bool sae_need_confirm = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -4782,6 +4829,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY; ifmgd->auth_data->timeout_started = true; run_again(sdata, ifmgd->auth_data->timeout); + if (auth_transaction == 1) + sae_need_confirm = true; goto notify_driver; } @@ -4825,6 +4874,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (!ieee80211_mark_sta_auth(sdata)) return; /* ignore frame -- wait for timeout */ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && + auth_transaction == 1) { + sae_need_confirm = true; + } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && auth_transaction == 2) { sdata_info(sdata, "SAE peer confirmed\n"); ifmgd->auth_data->peer_confirmed = true; @@ -4832,7 +4884,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); notify_driver: - drv_mgd_complete_tx(sdata->local, sdata, &info); + if (!sae_need_confirm) + drv_mgd_complete_tx(sdata->local, sdata, &info); } #define case_WLAN(type) \ @@ -5402,6 +5455,12 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, bss_conf->epcs_support = false; } + if (elems->s1g_oper && + link->u.mgd.conn.mode == IEEE80211_CONN_MODE_S1G && + elems->s1g_capab) + ieee80211_s1g_cap_to_sta_s1g_cap(sdata, elems->s1g_capab, + link_sta); + bss_conf->twt_broadcast = ieee80211_twt_bcast_support(sdata, bss_conf, sband, link_sta); @@ -5922,6 +5981,7 @@ ieee80211_ap_power_type(u8 control) return IEEE80211_REG_LPI_AP; case IEEE80211_6GHZ_CTRL_REG_SP_AP: case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD: return IEEE80211_REG_SP_AP; case IEEE80211_6GHZ_CTRL_REG_VLP_AP: return IEEE80211_REG_VLP_AP; @@ -7344,7 +7404,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, if (local->hw.conf.dynamic_ps_timeout > 0) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; - ieee80211_hw_config(local, + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); } ieee80211_send_nullfunc(local, sdata, false); @@ -8698,21 +8758,33 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, bool have_sta = false; bool mlo; int err; + u16 new_links; if (link_id >= 0) { mlo = true; if (WARN_ON(!ap_mld_addr)) return -EINVAL; - err = ieee80211_vif_set_links(sdata, BIT(link_id), 0); + new_links = BIT(link_id); } else { if (WARN_ON(ap_mld_addr)) return -EINVAL; ap_mld_addr = cbss->bssid; - err = ieee80211_vif_set_links(sdata, 0, 0); + new_links = 0; link_id = 0; mlo = false; } + if (assoc) { + rcu_read_lock(); + have_sta = sta_info_get(sdata, ap_mld_addr); + rcu_read_unlock(); + } + + if (mlo && !have_sta && + WARN_ON(sdata->vif.valid_links || sdata->vif.active_links)) + return -EINVAL; + + err = ieee80211_vif_set_links(sdata, new_links, 0); if (err) return err; @@ -8733,12 +8805,6 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, goto out_err; } - if (assoc) { - rcu_read_lock(); - have_sta = sta_info_get(sdata, ap_mld_addr); - rcu_read_unlock(); - } - if (!have_sta) { if (mlo) new_sta = sta_info_alloc_with_link(sdata, ap_mld_addr, @@ -9338,6 +9404,39 @@ out_rcu: return err; } +static bool +ieee80211_mgd_assoc_bss_has_mld_ext_capa_ops(struct cfg80211_assoc_request *req) +{ + const struct cfg80211_bss_ies *ies; + struct cfg80211_bss *bss; + const struct element *ml; + + /* not an MLO connection if link_id < 0, so irrelevant */ + if (req->link_id < 0) + return false; + + bss = req->links[req->link_id].bss; + + guard(rcu)(); + ies = rcu_dereference(bss->ies); + for_each_element_extid(ml, WLAN_EID_EXT_EHT_MULTI_LINK, + ies->data, ies->len) { + const struct ieee80211_multi_link_elem *mle; + + if (!ieee80211_mle_type_ok(ml->data + 1, + IEEE80211_ML_CONTROL_TYPE_BASIC, + ml->datalen - 1)) + continue; + + mle = (void *)(ml->data + 1); + if (mle->control & cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP)) + return true; + } + + return false; + +} + int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_assoc_request *req) { @@ -9390,7 +9489,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, else memcpy(assoc_data->ap_addr, cbss->bssid, ETH_ALEN); - assoc_data->ext_mld_capa_ops = cpu_to_le16(req->ext_mld_capa_ops); + /* + * Many APs have broken parsing of the extended MLD capa/ops field, + * dropping (re-)association request frames or replying with association + * response with a failure status if it's present. + * Set our value from the userspace request only in strict mode or if + * the AP also had that field present. + */ + if (ieee80211_hw_check(&local->hw, STRICT) || + ieee80211_mgd_assoc_bss_has_mld_ext_capa_ops(req)) + assoc_data->ext_mld_capa_ops = + cpu_to_le16(req->ext_mld_capa_ops); if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -10033,7 +10142,6 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { if (!add_links_data->link[link_id].bss || !(sdata->u.mgd.reconf.added_links & BIT(link_id))) - continue; valid_links |= BIT(link_id); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 2b9abc27462e..13df6321634d 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -39,7 +39,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) if (local->hw.conf.flags & IEEE80211_CONF_PS) { offchannel_ps_enabled = true; local->hw.conf.flags &= ~IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); } if (!offchannel_ps_enabled || @@ -567,6 +567,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, { struct ieee80211_roc_work *roc, *tmp; bool queued = false, combine_started = true; + struct cfg80211_scan_request *req; int ret; lockdep_assert_wiphy(local->hw.wiphy); @@ -612,9 +613,11 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->mgmt_tx_cookie = *cookie; } + req = wiphy_dereference(local->hw.wiphy, local->scan_req); + /* if there's no need to queue, handle it immediately */ if (list_empty(&local->roc_list) && - !local->scanning && !ieee80211_is_radar_required(local)) { + !local->scanning && !ieee80211_is_radar_required(local, req)) { /* if not HW assist, just queue & schedule work */ if (!local->ops->remain_on_channel) { list_add_tail(&roc->list, &local->roc_list); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index a9cc832240a5..5a508d99e84f 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -108,7 +108,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) sdata->u.mgd.powersave && !(local->hw.conf.flags & IEEE80211_CONF_PS)) { local->hw.conf.flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS); } } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e73431549ce7..caa3e6b3f46e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -231,8 +231,19 @@ static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, skb_queue_tail(&sdata->skb_queue, skb); wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); - if (sta) - sta->deflink.rx_stats.packets++; + if (sta) { + struct link_sta_info *link_sta_info; + + if (link_id >= 0) { + link_sta_info = rcu_dereference(sta->link[link_id]); + if (!link_sta_info) + return; + } else { + link_sta_info = &sta->deflink; + } + + link_sta_info->rx_stats.packets++; + } } static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, @@ -4234,10 +4245,16 @@ static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx, rx->link_sta = NULL; } - if (link_id < 0) - rx->link = &rx->sdata->deflink; - else if (!ieee80211_rx_data_set_link(rx, link_id)) + if (link_id < 0) { + if (ieee80211_vif_is_mld(&rx->sdata->vif) && + sta && !sta->sta.valid_links) + rx->link = + rcu_dereference(rx->sdata->link[sta->deflink.link_id]); + else + rx->link = &rx->sdata->deflink; + } else if (!ieee80211_rx_data_set_link(rx, link_id)) { return false; + } return true; } diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index d4ed0c0a335c..1f68df6e8067 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -194,3 +194,29 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, break; } } + +void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_s1g_cap *s1g_cap_ie, + struct link_sta_info *link_sta) +{ + struct ieee80211_sta_s1g_cap *s1g_cap = &link_sta->pub->s1g_cap; + + memset(s1g_cap, 0, sizeof(*s1g_cap)); + + memcpy(s1g_cap->cap, s1g_cap_ie->capab_info, sizeof(s1g_cap->cap)); + memcpy(s1g_cap->nss_mcs, s1g_cap_ie->supp_mcs_nss, + sizeof(s1g_cap->nss_mcs)); + + s1g_cap->s1g = true; + + /* Maximum MPDU length is 1 bit for S1G */ + if (s1g_cap->cap[3] & S1G_CAP3_MAX_MPDU_LEN) { + link_sta->pub->agg.max_amsdu_len = + IEEE80211_MAX_MPDU_LEN_VHT_7991; + } else { + link_sta->pub->agg.max_amsdu_len = + IEEE80211_MAX_MPDU_LEN_VHT_3895; + } + + ieee80211_sta_recalc_aggregates(&link_sta->sta->sta); +} diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index cd8385ecafd9..dbf98aa4cd67 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/if_arp.h> @@ -586,7 +586,8 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local, return 0; } -static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata) +static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata, + struct cfg80211_scan_request *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *sdata_iter; @@ -594,7 +595,7 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata) lockdep_assert_wiphy(local->hw.wiphy); - if (!ieee80211_is_radar_required(local)) + if (!ieee80211_is_radar_required(local, req)) return true; if (!regulatory_pre_cac_allowed(local->hw.wiphy)) @@ -610,9 +611,10 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata) } static bool ieee80211_can_scan(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + struct ieee80211_sub_if_data *sdata, + struct cfg80211_scan_request *req) { - if (!__ieee80211_can_leave_ch(sdata)) + if (!__ieee80211_can_leave_ch(sdata, req)) return false; if (!list_empty(&local->roc_list)) @@ -627,15 +629,19 @@ static bool ieee80211_can_scan(struct ieee80211_local *local, void ieee80211_run_deferred_scan(struct ieee80211_local *local) { + struct cfg80211_scan_request *req; + lockdep_assert_wiphy(local->hw.wiphy); if (!local->scan_req || local->scanning) return; + req = wiphy_dereference(local->hw.wiphy, local->scan_req); if (!ieee80211_can_scan(local, rcu_dereference_protected( local->scan_sdata, - lockdep_is_held(&local->hw.wiphy->mtx)))) + lockdep_is_held(&local->hw.wiphy->mtx)), + req)) return; wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, @@ -732,10 +738,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, !(sdata->vif.active_links & BIT(req->tsf_report_link_id))) return -EINVAL; - if (!__ieee80211_can_leave_ch(sdata)) + if (!__ieee80211_can_leave_ch(sdata, req)) return -EBUSY; - if (!ieee80211_can_scan(local, sdata)) { + if (!ieee80211_can_scan(local, sdata, req)) { /* wait for the work to finish/time out */ rcu_assign_pointer(local->scan_req, req); rcu_assign_pointer(local->scan_sdata, sdata); @@ -794,6 +800,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->hw_scan_req->req.scan_6ghz_params = req->scan_6ghz_params; local->hw_scan_req->req.scan_6ghz = req->scan_6ghz; + local->hw_scan_req->req.first_part = req->first_part; /* * After allocating local->hw_scan_req, we must diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 61583173629e..8c550aab9bdc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation */ #include <linux/module.h> @@ -355,6 +355,50 @@ static void sta_info_free_link(struct link_sta_info *link_sta) free_percpu(link_sta->pcpu_rx_stats); } +static void sta_accumulate_removed_link_stats(struct sta_info *sta, int link_id) +{ + struct link_sta_info *link_sta = wiphy_dereference(sta->local->hw.wiphy, + sta->link[link_id]); + struct ieee80211_link_data *link; + int ac, tid; + u32 thr; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + sta->rem_link_stats.tx_packets += + link_sta->tx_stats.packets[ac]; + sta->rem_link_stats.tx_bytes += link_sta->tx_stats.bytes[ac]; + } + + sta->rem_link_stats.rx_packets += link_sta->rx_stats.packets; + sta->rem_link_stats.rx_bytes += link_sta->rx_stats.bytes; + sta->rem_link_stats.tx_retries += link_sta->status_stats.retry_count; + sta->rem_link_stats.tx_failed += link_sta->status_stats.retry_failed; + sta->rem_link_stats.rx_dropped_misc += link_sta->rx_stats.dropped; + + thr = sta_get_expected_throughput(sta); + if (thr != 0) + sta->rem_link_stats.expected_throughput += thr; + + for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { + sta->rem_link_stats.pertid_stats.rx_msdu += + link_sta->rx_stats.msdu[tid]; + sta->rem_link_stats.pertid_stats.tx_msdu += + link_sta->tx_stats.msdu[tid]; + sta->rem_link_stats.pertid_stats.tx_msdu_retries += + link_sta->status_stats.msdu_retries[tid]; + sta->rem_link_stats.pertid_stats.tx_msdu_failed += + link_sta->status_stats.msdu_failed[tid]; + } + + if (sta->sdata->vif.type == NL80211_IFTYPE_STATION) { + link = wiphy_dereference(sta->sdata->local->hw.wiphy, + sta->sdata->link[link_id]); + if (link) + sta->rem_link_stats.beacon_loss_count += + link->u.mgd.beacon_loss_count; + } +} + static void sta_remove_link(struct sta_info *sta, unsigned int link_id, bool unhash) { @@ -377,6 +421,10 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id, alloc = container_of(link_sta, typeof(*alloc), info); sta->sta.valid_links &= ~BIT(link_id); + + /* store removed link info for accumulated stats consistency */ + sta_accumulate_removed_link_stats(sta, link_id); + RCU_INIT_POINTER(sta->link[link_id], NULL); RCU_INIT_POINTER(sta->sta.link[link_id], NULL); if (alloc) { @@ -681,6 +729,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, IEEE80211_RATE_MANDATORY_G; break; case NL80211_BAND_5GHZ: + case NL80211_BAND_6GHZ: mandatory = IEEE80211_RATE_MANDATORY_A; break; case NL80211_BAND_60GHZ: @@ -1651,7 +1700,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - unsigned long last_active = ieee80211_sta_last_active(sta); + unsigned long last_active = ieee80211_sta_last_active(sta, -1); if (sdata != sta->sdata) continue; @@ -2420,18 +2469,27 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, } static struct ieee80211_sta_rx_stats * -sta_get_last_rx_stats(struct sta_info *sta) +sta_get_last_rx_stats(struct sta_info *sta, int link_id) { - struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats; + struct ieee80211_sta_rx_stats *stats; + struct link_sta_info *link_sta_info; int cpu; - if (!sta->deflink.pcpu_rx_stats) + if (link_id < 0) + link_sta_info = &sta->deflink; + else + link_sta_info = wiphy_dereference(sta->local->hw.wiphy, + sta->link[link_id]); + + stats = &link_sta_info->rx_stats; + + if (!link_sta_info->pcpu_rx_stats) return stats; for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpustats; - cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); + cpustats = per_cpu_ptr(link_sta_info->pcpu_rx_stats, cpu); if (time_after(cpustats->last_rx, stats->last_rx)) stats = cpustats; @@ -2499,9 +2557,10 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate, } } -static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) +static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo, + int link_id) { - u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); + u32 rate = READ_ONCE(sta_get_last_rx_stats(sta, link_id)->last_rate); if (rate == STA_STATS_RATE_INVALID) return -EINVAL; @@ -2526,20 +2585,28 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, static void sta_set_tidstats(struct sta_info *sta, struct cfg80211_tid_stats *tidstats, - int tid) + int tid, int link_id) { struct ieee80211_local *local = sta->local; + struct link_sta_info *link_sta_info; int cpu; + if (link_id < 0) + link_sta_info = &sta->deflink; + else + link_sta_info = wiphy_dereference(sta->local->hw.wiphy, + sta->link[link_id]); + if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { - tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats, - tid); + tidstats->rx_msdu += + sta_get_tidstats_msdu(&link_sta_info->rx_stats, + tid); - if (sta->deflink.pcpu_rx_stats) { + if (link_sta_info->pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; - cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, + cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats, cpu); tidstats->rx_msdu += sta_get_tidstats_msdu(cpurxs, tid); @@ -2551,22 +2618,24 @@ static void sta_set_tidstats(struct sta_info *sta, if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); - tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid]; + tidstats->tx_msdu = link_sta_info->tx_stats.msdu[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); - tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid]; + tidstats->tx_msdu_retries = + link_sta_info->status_stats.msdu_retries[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); - tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid]; + tidstats->tx_msdu_failed = + link_sta_info->status_stats.msdu_failed[tid]; } - if (tid < IEEE80211_NUM_TIDS) { + if (link_id < 0 && tid < IEEE80211_NUM_TIDS) { spin_lock_bh(&local->fq.lock); rcu_read_lock(); @@ -2625,16 +2694,278 @@ static void sta_set_mesh_sinfo(struct sta_info *sta, } #endif +void sta_set_accumulated_removed_links_sinfo(struct sta_info *sta, + struct station_info *sinfo) +{ + /* Accumulating the removed link statistics. */ + sinfo->tx_packets = sta->rem_link_stats.tx_packets; + sinfo->rx_packets = sta->rem_link_stats.rx_packets; + sinfo->tx_bytes = sta->rem_link_stats.tx_bytes; + sinfo->rx_bytes = sta->rem_link_stats.rx_bytes; + sinfo->tx_retries = sta->rem_link_stats.tx_retries; + sinfo->tx_failed = sta->rem_link_stats.tx_failed; + sinfo->rx_dropped_misc = sta->rem_link_stats.rx_dropped_misc; + sinfo->beacon_loss_count = sta->rem_link_stats.beacon_loss_count; + sinfo->expected_throughput = sta->rem_link_stats.expected_throughput; + + if (sinfo->pertid) { + sinfo->pertid->rx_msdu = + sta->rem_link_stats.pertid_stats.rx_msdu; + sinfo->pertid->tx_msdu = + sta->rem_link_stats.pertid_stats.tx_msdu; + sinfo->pertid->tx_msdu_retries = + sta->rem_link_stats.pertid_stats.tx_msdu_retries; + sinfo->pertid->tx_msdu_failed = + sta->rem_link_stats.pertid_stats.tx_msdu_failed; + } +} + +static void sta_set_link_sinfo(struct sta_info *sta, + struct link_station_info *link_sinfo, + struct ieee80211_link_data *link, + bool tidstats) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_sta_rx_stats *last_rxstats; + int i, ac, cpu, link_id = link->link_id; + struct link_sta_info *link_sta_info; + u32 thr = 0; + + last_rxstats = sta_get_last_rx_stats(sta, link_id); + + link_sta_info = wiphy_dereference(sta->local->hw.wiphy, + sta->link[link_id]); + + /* do before driver, so beacon filtering drivers have a + * chance to e.g. just add the number of filtered beacons + * (or just modify the value entirely, of course) + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) + link_sinfo->rx_beacon = link->u.mgd.count_beacon_signal; + + ether_addr_copy(link_sinfo->addr, link_sta_info->addr); + + drv_link_sta_statistics(sta->local, sdata, + link_sta_info->pub, + link_sinfo); + + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) | + BIT_ULL(NL80211_STA_INFO_BSS_PARAM) | + BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC); + + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + link_sinfo->beacon_loss_count = + link->u.mgd.beacon_loss_count; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS); + } + + link_sinfo->inactive_time = + jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, link_id)); + + if (!(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | + BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { + link_sinfo->tx_bytes = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + link_sinfo->tx_bytes += + link_sta_info->tx_stats.bytes[ac]; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) { + link_sinfo->tx_packets = 0; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + link_sinfo->tx_packets += + link_sta_info->tx_stats.packets[ac]; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); + } + + if (!(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | + BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { + link_sinfo->rx_bytes += + sta_get_stats_bytes(&link_sta_info->rx_stats); + + if (link_sta_info->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats, + cpu); + link_sinfo->rx_bytes += + sta_get_stats_bytes(cpurxs); + } + } + + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) { + link_sinfo->rx_packets = link_sta_info->rx_stats.packets; + if (link_sta_info->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats, + cpu); + link_sinfo->rx_packets += cpurxs->packets; + } + } + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) { + link_sinfo->tx_retries = + link_sta_info->status_stats.retry_count; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) { + link_sinfo->tx_failed = + link_sta_info->status_stats.retry_failed; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) { + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + link_sinfo->rx_duration += sta->airtime[ac].rx_airtime; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) { + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + link_sinfo->tx_duration += sta->airtime[ac].tx_airtime; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) { + link_sinfo->airtime_weight = sta->airtime_weight; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT); + } + + link_sinfo->rx_dropped_misc = link_sta_info->rx_stats.dropped; + if (link_sta_info->pcpu_rx_stats) { + for_each_possible_cpu(cpu) { + struct ieee80211_sta_rx_stats *cpurxs; + + cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats, + cpu); + link_sinfo->rx_dropped_misc += cpurxs->dropped; + } + } + + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) | + BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG); + link_sinfo->rx_beacon_signal_avg = + ieee80211_ave_rssi(&sdata->vif, -1); + } + + if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || + ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) { + link_sinfo->signal = (s8)last_rxstats->last_signal; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); + } + + if (!link_sta_info->pcpu_rx_stats && + !(link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) { + link_sinfo->signal_avg = + -ewma_signal_read(&link_sta_info->rx_stats_avg.signal); + link_sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); + } + } + + /* for the average - if pcpu_rx_stats isn't set - rxstats must point to + * the sta->rx_stats struct, so the check here is fine with and without + * pcpu statistics + */ + if (last_rxstats->chains && + !(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) | + BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); + if (!link_sta_info->pcpu_rx_stats) + link_sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); + + link_sinfo->chains = last_rxstats->chains; + + for (i = 0; i < ARRAY_SIZE(link_sinfo->chain_signal); i++) { + link_sinfo->chain_signal[i] = + last_rxstats->chain_signal_last[i]; + link_sinfo->chain_signal_avg[i] = + -ewma_signal_read( + &link_sta_info->rx_stats_avg.chain_signal[i]); + } + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) && + ieee80211_rate_valid(&link_sta_info->tx_stats.last_rate)) { + sta_set_rate_info_tx(sta, &link_sta_info->tx_stats.last_rate, + &link_sinfo->txrate); + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))) { + if (sta_set_rate_info_rx(sta, &link_sinfo->rxrate, + link_id) == 0) + link_sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_RX_BITRATE); + } + + if (tidstats && !cfg80211_link_sinfo_alloc_tid_stats(link_sinfo, + GFP_KERNEL)) { + for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) + sta_set_tidstats(sta, &link_sinfo->pertid[i], i, + link_id); + } + + link_sinfo->bss_param.flags = 0; + if (sdata->vif.bss_conf.use_cts_prot) + link_sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT; + if (sdata->vif.bss_conf.use_short_preamble) + link_sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; + if (sdata->vif.bss_conf.use_short_slot) + link_sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; + link_sinfo->bss_param.dtim_period = link->conf->dtim_period; + link_sinfo->bss_param.beacon_interval = link->conf->beacon_int; + + thr = sta_get_expected_throughput(sta); + + if (thr != 0) { + link_sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT); + link_sinfo->expected_throughput = thr; + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) && + link_sta_info->status_stats.ack_signal_filled) { + link_sinfo->ack_signal = + link_sta_info->status_stats.last_ack_signal; + link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL); + } + + if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) && + link_sta_info->status_stats.ack_signal_filled) { + link_sinfo->avg_ack_signal = + -(s8)ewma_avg_signal_read( + &link_sta_info->status_stats.avg_ack_signal); + link_sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); + } +} + void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, bool tidstats) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; u32 thr = 0; - int i, ac, cpu; + int i, ac, cpu, link_id; struct ieee80211_sta_rx_stats *last_rxstats; - last_rxstats = sta_get_last_rx_stats(sta); + last_rxstats = sta_get_last_rx_stats(sta, -1); sinfo->generation = sdata->local->sta_generation; @@ -2662,7 +2993,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->assoc_at = sta->assoc_at; sinfo->inactive_time = - jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); + jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, -1)); if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { @@ -2751,7 +3082,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) | BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG); - sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); + sinfo->rx_beacon_signal_avg = + ieee80211_ave_rssi(&sdata->vif, -1); } if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || @@ -2800,13 +3132,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) && !sta->sta.valid_links) { - if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0) + if (sta_set_rate_info_rx(sta, &sinfo->rxrate, -1) == 0) sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); } if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - sta_set_tidstats(sta, &sinfo->pertid[i], i); + sta_set_tidstats(sta, &sinfo->pertid[i], i, -1); } #ifdef CONFIG_MAC80211_MESH @@ -2868,6 +3200,26 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); } + + if (sta->sta.valid_links) { + struct ieee80211_link_data *link; + struct link_sta_info *link_sta; + + ether_addr_copy(sinfo->mld_addr, sta->addr); + for_each_valid_link(sinfo, link_id) { + link_sta = wiphy_dereference(sta->local->hw.wiphy, + sta->link[link_id]); + link = wiphy_dereference(sdata->local->hw.wiphy, + sdata->link[link_id]); + + if (!link_sta || !sinfo->links[link_id] || !link) + continue; + + sinfo->valid_links = sta->sta.valid_links; + sta_set_link_sinfo(sta, sinfo->links[link_id], + link, tidstats); + } + } } u32 sta_get_expected_throughput(struct sta_info *sta) @@ -2889,14 +3241,24 @@ u32 sta_get_expected_throughput(struct sta_info *sta) return thr; } -unsigned long ieee80211_sta_last_active(struct sta_info *sta) +unsigned long ieee80211_sta_last_active(struct sta_info *sta, int link_id) { - struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); + struct ieee80211_sta_rx_stats *stats; + struct link_sta_info *link_sta_info; - if (!sta->deflink.status_stats.last_ack || - time_after(stats->last_rx, sta->deflink.status_stats.last_ack)) + stats = sta_get_last_rx_stats(sta, link_id); + + if (link_id < 0) + link_sta_info = &sta->deflink; + else + link_sta_info = wiphy_dereference(sta->local->hw.wiphy, + sta->link[link_id]); + + if (!link_sta_info->status_stats.last_ack || + time_after(stats->last_rx, link_sta_info->status_stats.last_ack)) return stats->last_rx; - return sta->deflink.status_stats.last_ack; + + return link_sta_info->status_stats.last_ack; } int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 7a95d8d34fca..5288d5286651 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -569,6 +569,58 @@ struct link_sta_info { }; /** + * struct ieee80211_sta_removed_link_stats - Removed link sta data + * + * keep required accumulated removed link data for stats + * + * @rx_packets: accumulated packets (MSDUs & MMPDUs) received from + * this station for removed links + * @tx_packets: accumulated packets (MSDUs & MMPDUs) transmitted to + * this station for removed links + * @rx_bytes: accumulated bytes (size of MPDUs) received from this + * station for removed links + * @tx_bytes: accumulated bytes (size of MPDUs) transmitted to this + * station for removed links + * @tx_retries: cumulative retry counts (MPDUs) for removed links + * @tx_failed: accumulated number of failed transmissions (MPDUs) + * (retries exceeded, no ACK) for removed links + * @rx_dropped_misc: accumulated dropped packets for un-specified reason + * from this station for removed links + * @beacon_loss_count: Number of times beacon loss event has triggered + * from this station for removed links. + * @expected_throughput: expected throughput in kbps (including 802.11 + * headers) towards this station for removed links + * @pertid_stats: accumulated per-TID statistics for removed link of + * station + * @pertid_stats.rx_msdu : accumulated number of received MSDUs towards + * this station for removed links. + * @pertid_stats.tx_msdu: accumulated number of (attempted) transmitted + * MSDUs towards this station for removed links + * @pertid_stats.tx_msdu_retries: accumulated number of retries (not + * counting the first) for transmitted MSDUs towards this station + * for removed links + * @pertid_stats.tx_msdu_failed: accumulated number of failed transmitted + * MSDUs towards this station for removed links + */ +struct ieee80211_sta_removed_link_stats { + u32 rx_packets; + u32 tx_packets; + u64 rx_bytes; + u64 tx_bytes; + u32 tx_retries; + u32 tx_failed; + u32 rx_dropped_misc; + u32 beacon_loss_count; + u32 expected_throughput; + struct { + u64 rx_msdu; + u64 tx_msdu; + u64 tx_msdu_retries; + u64 tx_msdu_failed; + } pertid_stats; +}; + +/** * struct sta_info - STA information * * This structure collects information about a station that @@ -644,6 +696,7 @@ struct link_sta_info { * @deflink address and remaining would be allocated and the address * would be assigned to link[link_id] where link_id is the id assigned * by the AP. + * @rem_link_stats: accumulated removed link stats */ struct sta_info { /* General information, mostly static */ @@ -718,6 +771,7 @@ struct sta_info { struct ieee80211_sta_aggregates cur; struct link_sta_info deflink; struct link_sta_info __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + struct ieee80211_sta_removed_link_stats rem_link_stats; /* keep last! */ struct ieee80211_sta sta; @@ -922,6 +976,9 @@ void sta_set_rate_info_tx(struct sta_info *sta, void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, bool tidstats); +void sta_set_accumulated_removed_links_sinfo(struct sta_info *sta, + struct station_info *sinfo); + u32 sta_get_expected_throughput(struct sta_info *sta); void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, @@ -936,7 +993,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); -unsigned long ieee80211_sta_last_active(struct sta_info *sta); +unsigned long ieee80211_sta_last_active(struct sta_info *sta, int link_id); void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta, const u8 *ext_capab, diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 72fad8ea8bb9..0bfbce157486 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -384,12 +384,14 @@ DEFINE_EVENT(local_sdata_addr_evt, drv_remove_interface, TRACE_EVENT(drv_config, TP_PROTO(struct ieee80211_local *local, + int radio_idx, u32 changed), - TP_ARGS(local, changed), + TP_ARGS(local, radio_idx, changed), TP_STRUCT__entry( LOCAL_ENTRY + __field(int, radio_idx) __field(u32, changed) __field(u32, flags) __field(int, power_level) @@ -403,6 +405,7 @@ TRACE_EVENT(drv_config, TP_fast_assign( LOCAL_ASSIGN; + __entry->radio_idx = radio_idx; __entry->changed = changed; __entry->flags = local->hw.conf.flags; __entry->power_level = local->hw.conf.power_level; @@ -417,8 +420,8 @@ TRACE_EVENT(drv_config, ), TP_printk( - LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT, - LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG + LOCAL_PR_FMT " radio_idx:%d ch:%#x" CHANDEF_PR_FMT, + LOCAL_PR_ARG, __entry->radio_idx, __entry->changed, CHANDEF_PR_ARG ) ); @@ -818,34 +821,71 @@ TRACE_EVENT(drv_get_key_seq, ) ); -DEFINE_EVENT(local_u32_evt, drv_set_frag_threshold, - TP_PROTO(struct ieee80211_local *local, u32 value), - TP_ARGS(local, value) +TRACE_EVENT(drv_set_frag_threshold, + TP_PROTO(struct ieee80211_local *local, int radio_idx, u32 value), + + TP_ARGS(local, radio_idx, value), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, radio_idx) + __field(u32, value) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->radio_idx = radio_idx; + __entry->value = value; + ), + + TP_printk( + LOCAL_PR_FMT " radio_id:%d value:%u", + LOCAL_PR_ARG, __entry->radio_idx, __entry->value + ) ); -DEFINE_EVENT(local_u32_evt, drv_set_rts_threshold, - TP_PROTO(struct ieee80211_local *local, u32 value), - TP_ARGS(local, value) +TRACE_EVENT(drv_set_rts_threshold, + TP_PROTO(struct ieee80211_local *local, int radio_idx, u32 value), + + TP_ARGS(local, radio_idx, value), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, radio_idx) + __field(u32, value) + ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->radio_idx = radio_idx; + __entry->value = value; + ), + + TP_printk( + LOCAL_PR_FMT " radio_id:%d value:%u", + LOCAL_PR_ARG, __entry->radio_idx, __entry->value + ) ); TRACE_EVENT(drv_set_coverage_class, - TP_PROTO(struct ieee80211_local *local, s16 value), + TP_PROTO(struct ieee80211_local *local, int radio_idx, s16 value), - TP_ARGS(local, value), + TP_ARGS(local, radio_idx, value), TP_STRUCT__entry( LOCAL_ENTRY + __field(int, radio_idx) __field(s16, value) ), TP_fast_assign( LOCAL_ASSIGN; + __entry->radio_idx = radio_idx; __entry->value = value; ), TP_printk( - LOCAL_PR_FMT " value:%d", - LOCAL_PR_ARG, __entry->value + LOCAL_PR_FMT " radio_id:%d value:%d", + LOCAL_PR_ARG, __entry->radio_idx, __entry->value ) ); @@ -1002,6 +1042,33 @@ DEFINE_EVENT(sta_event, drv_sta_statistics, TP_ARGS(local, sdata, sta) ); +TRACE_EVENT(drv_link_sta_statistics, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_link_sta *link_sta), + + TP_ARGS(local, sdata, link_sta), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + __field(u32, link_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_NAMED_ASSIGN(link_sta->sta); + __entry->link_id = link_sta->link_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " (link %d)", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->link_id + ) +); + DEFINE_EVENT(sta_event, drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -1291,12 +1358,14 @@ TRACE_EVENT(drv_set_antenna, ); TRACE_EVENT(drv_get_antenna, - TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), + TP_PROTO(struct ieee80211_local *local, int radio_idx, u32 tx_ant, + u32 rx_ant, int ret), - TP_ARGS(local, tx_ant, rx_ant, ret), + TP_ARGS(local, radio_idx, tx_ant, rx_ant, ret), TP_STRUCT__entry( LOCAL_ENTRY + __field(int, radio_idx) __field(u32, tx_ant) __field(u32, rx_ant) __field(int, ret) @@ -1304,14 +1373,16 @@ TRACE_EVENT(drv_get_antenna, TP_fast_assign( LOCAL_ASSIGN; + __entry->radio_idx = radio_idx; __entry->tx_ant = tx_ant; __entry->rx_ant = rx_ant; __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", - LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret + LOCAL_PR_FMT " radio_idx:%d tx_ant:%d rx_ant:%d ret:%d", + LOCAL_PR_ARG, __entry->radio_idx, __entry->tx_ant, + __entry->rx_ant, __entry->ret ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d58b80813bdd..6fa883a9250d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1173,7 +1173,8 @@ void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, return; if (!sta || - (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported) || + (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported && + !sta->sta.deflink.s1g_cap.s1g) || !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || skb->protocol == sdata->control_port_protocol) return; @@ -1541,7 +1542,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local, spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]); } -void ieee80211_txq_set_params(struct ieee80211_local *local) +void ieee80211_txq_set_params(struct ieee80211_local *local, int radio_idx) { if (local->hw.wiphy->txq_limit) local->fq.limit = local->hw.wiphy->txq_limit; @@ -1605,7 +1606,7 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) for (i = 0; i < fq->flows_cnt; i++) codel_vars_init(&local->cvars[i]); - ieee80211_txq_set_params(local); + ieee80211_txq_set_params(local, -1); return 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e66da651678a..0d85a382746f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1756,6 +1756,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) bool sched_scan_stopped = false; bool suspended = local->suspended; bool in_reconfig = false; + u32 rts_threshold; lockdep_assert_wiphy(local->hw.wiphy); @@ -1826,13 +1827,20 @@ int ieee80211_reconfig(struct ieee80211_local *local) } /* setup fragmentation threshold */ - drv_set_frag_threshold(local, hw->wiphy->frag_threshold); + drv_set_frag_threshold(local, -1, hw->wiphy->frag_threshold); /* setup RTS threshold */ - drv_set_rts_threshold(local, hw->wiphy->rts_threshold); + if (hw->wiphy->n_radio > 0) { + for (i = 0; i < hw->wiphy->n_radio; i++) { + rts_threshold = hw->wiphy->radio_cfg[i].rts_threshold; + drv_set_rts_threshold(local, i, rts_threshold); + } + } else { + drv_set_rts_threshold(local, -1, hw->wiphy->rts_threshold); + } /* reset coverage class */ - drv_set_coverage_class(local, hw->wiphy->coverage_class); + drv_set_coverage_class(local, -1, hw->wiphy->coverage_class); ieee80211_led_radio(local, true); ieee80211_mod_tpt_led_trig(local, @@ -1890,11 +1898,11 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_assign_chanctx(local, sdata, &sdata->deflink); /* reconfigure hardware */ - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_LISTEN_INTERVAL | - IEEE80211_CONF_CHANGE_MONITOR | - IEEE80211_CONF_CHANGE_PS | - IEEE80211_CONF_CHANGE_RETRY_LIMITS | - IEEE80211_CONF_CHANGE_IDLE); + ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_LISTEN_INTERVAL | + IEEE80211_CONF_CHANGE_MONITOR | + IEEE80211_CONF_CHANGE_PS | + IEEE80211_CONF_CHANGE_RETRY_LIMITS | + IEEE80211_CONF_CHANGE_IDLE); ieee80211_configure_filter(local); @@ -2547,6 +2555,23 @@ end: return 0; } +int ieee80211_put_reg_conn(struct sk_buff *skb, + enum ieee80211_channel_flags flags) +{ + u8 reg_conn = IEEE80211_REG_CONN_LPI_VALID | + IEEE80211_REG_CONN_LPI_VALUE | + IEEE80211_REG_CONN_SP_VALID; + + if (!(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT)) + reg_conn |= IEEE80211_REG_CONN_SP_VALUE; + + skb_put_u8(skb, WLAN_EID_EXTENSION); + skb_put_u8(skb, 1 + sizeof(reg_conn)); + skb_put_u8(skb, WLAN_EID_EXT_NON_AP_STA_REG_CON); + skb_put_u8(skb, reg_conn); + return 0; +} + int ieee80211_put_he_6ghz_cap(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps_mode) @@ -3264,14 +3289,24 @@ int ieee80211_put_srates_elem(struct sk_buff *skb, return 0; } -int ieee80211_ave_rssi(struct ieee80211_vif *vif) +int ieee80211_ave_rssi(struct ieee80211_vif *vif, int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_link_data *link_data; if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return 0; - return -ewma_beacon_signal_read(&sdata->deflink.u.mgd.ave_beacon_signal); + if (link_id < 0) + link_data = &sdata->deflink; + else + link_data = wiphy_dereference(sdata->local->hw.wiphy, + sdata->link[link_id]); + + if (WARN_ON_ONCE(!link_data)) + return -99; + + return -ewma_beacon_signal_read(&link_data->u.mgd.ave_beacon_signal); } EXPORT_SYMBOL_GPL(ieee80211_ave_rssi); @@ -3952,6 +3987,33 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local, return radar_detect; } +bool ieee80211_is_radio_idx_in_scan_req(struct wiphy *wiphy, + struct cfg80211_scan_request *scan_req, + int radio_idx) +{ + struct ieee80211_channel *chan; + int i, chan_radio_idx; + + for (i = 0; i < scan_req->n_channels; i++) { + chan = scan_req->channels[i]; + chan_radio_idx = cfg80211_get_radio_idx_by_chan(wiphy, chan); + /* + * The chan_radio_idx should be valid since it's taken from a + * valid scan request. + * However, if chan_radio_idx is unexpectedly invalid (negative), + * we take a conservative approach and assume the scan request + * might use the specified radio_idx. Hence, return true. + */ + if (WARN_ON(chan_radio_idx < 0)) + return true; + + if (chan_radio_idx == radio_idx) + return true; + } + + return false; +} + static u32 __ieee80211_get_radio_mask(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 9b12ca97f412..aef74308c18e 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -97,8 +97,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sock *sk = sock->sk; struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb; - struct mctp_route *rt; struct sk_buff *skb = NULL; + struct mctp_dst dst; int hlen; if (addr) { @@ -133,34 +133,30 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, extaddr, msg->msg_name); - struct net_device *dev; - - rc = -EINVAL; - rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), extaddr->smctp_ifindex); - /* check for correct halen */ - if (dev && extaddr->smctp_halen == dev->addr_len) { - hlen = LL_RESERVED_SPACE(dev) + sizeof(struct mctp_hdr); - rc = 0; - } - rcu_read_unlock(); + + if (!mctp_sockaddr_ext_is_ok(extaddr)) + return -EINVAL; + + rc = mctp_dst_from_extaddr(&dst, sock_net(sk), + extaddr->smctp_ifindex, + extaddr->smctp_halen, + extaddr->smctp_haddr); if (rc) - goto err_free; - rt = NULL; + return rc; + } else { - rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, - addr->smctp_addr.s_addr); - if (!rt) { - rc = -EHOSTUNREACH; - goto err_free; - } - hlen = LL_RESERVED_SPACE(rt->dev->dev) + sizeof(struct mctp_hdr); + rc = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr, &dst); + if (rc) + return rc; } + hlen = LL_RESERVED_SPACE(dst.dev->dev) + sizeof(struct mctp_hdr); + skb = sock_alloc_send_skb(sk, hlen + 1 + len, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) - return rc; + goto err_release_dst; skb_reserve(skb, hlen); @@ -175,30 +171,16 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) cb = __mctp_cb(skb); cb->net = addr->smctp_network; - if (!rt) { - /* fill extended address in cb */ - DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, - extaddr, msg->msg_name); - - if (!mctp_sockaddr_ext_is_ok(extaddr) || - extaddr->smctp_halen > sizeof(cb->haddr)) { - rc = -EINVAL; - goto err_free; - } - - cb->ifindex = extaddr->smctp_ifindex; - /* smctp_halen is checked above */ - cb->halen = extaddr->smctp_halen; - memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen); - } - - rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, + rc = mctp_local_output(sk, &dst, skb, addr->smctp_addr.s_addr, addr->smctp_tag); + mctp_dst_release(&dst); return rc ? : len; err_free: kfree_skb(skb); +err_release_dst: + mctp_dst_release(&dst); return rc; } @@ -793,3 +775,7 @@ MODULE_DESCRIPTION("MCTP core"); MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>"); MODULE_ALIAS_NETPROTO(PF_MCTP); + +#if IS_ENABLED(CONFIG_MCTP_TEST) +#include "test/sock-test.c" +#endif diff --git a/net/mctp/route.c b/net/mctp/route.c index d9c8e5a5f9ce..a20d6b11d418 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -17,6 +17,8 @@ #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <kunit/static_stub.h> + #include <uapi/linux/if_arp.h> #include <net/mctp.h> @@ -32,7 +34,7 @@ static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev); /* route output callbacks */ -static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) +static int mctp_dst_discard(struct mctp_dst *dst, struct sk_buff *skb) { kfree_skb(skb); return 0; @@ -368,7 +370,7 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) return 0; } -static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) +static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) { struct mctp_sk_key *key, *any_key = NULL; struct net *net = dev_net(skb->dev); @@ -392,6 +394,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) */ skb_orphan(skb); + if (skb->pkt_type == PACKET_OUTGOING) + skb->pkt_type = PACKET_LOOPBACK; + /* ensure we have enough data for a header and a type */ if (skb->len < sizeof(struct mctp_hdr) + 1) goto out; @@ -556,39 +561,31 @@ out: return rc; } -static unsigned int mctp_route_mtu(struct mctp_route *rt) +static int mctp_dst_output(struct mctp_dst *dst, struct sk_buff *skb) { - return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); -} - -static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) -{ - struct mctp_skb_cb *cb = mctp_cb(skb); - struct mctp_hdr *hdr = mctp_hdr(skb); char daddr_buf[MAX_ADDR_LEN]; char *daddr = NULL; - unsigned int mtu; int rc; skb->protocol = htons(ETH_P_MCTP); + skb->pkt_type = PACKET_OUTGOING; - mtu = READ_ONCE(skb->dev->mtu); - if (skb->len > mtu) { + if (skb->len > dst->mtu) { kfree_skb(skb); return -EMSGSIZE; } - if (cb->ifindex) { - /* direct route; use the hwaddr we stashed in sendmsg */ - if (cb->halen != skb->dev->addr_len) { + /* direct route; use the hwaddr we stashed in sendmsg */ + if (dst->halen) { + if (dst->halen != skb->dev->addr_len) { /* sanity check, sendmsg should have already caught this */ kfree_skb(skb); return -EMSGSIZE; } - daddr = cb->haddr; + daddr = dst->haddr; } else { /* If lookup fails let the device handle daddr==NULL */ - if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) + if (mctp_neigh_lookup(dst->dev, dst->nexthop, daddr_buf) == 0) daddr = daddr_buf; } @@ -599,7 +596,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) return -EHOSTUNREACH; } - mctp_flow_prepare_output(skb, route->dev); + mctp_flow_prepare_output(skb, dst->dev); rc = dev_queue_xmit(skb); if (rc) @@ -612,7 +609,8 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) static void mctp_route_release(struct mctp_route *rt) { if (refcount_dec_and_test(&rt->refs)) { - mctp_dev_put(rt->dev); + if (rt->dst_type == MCTP_ROUTE_DIRECT) + mctp_dev_put(rt->dev); kfree_rcu(rt, rcu); } } @@ -628,7 +626,7 @@ static struct mctp_route *mctp_route_alloc(void) INIT_LIST_HEAD(&rt->list); refcount_set(&rt->refs, 1); - rt->output = mctp_route_discard; + rt->output = mctp_dst_discard; return rt; } @@ -801,10 +799,16 @@ static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, } /* routing lookups */ +static unsigned int mctp_route_netid(struct mctp_route *rt) +{ + return rt->dst_type == MCTP_ROUTE_DIRECT ? + READ_ONCE(rt->dev->net) : rt->gateway.net; +} + static bool mctp_rt_match_eid(struct mctp_route *rt, unsigned int net, mctp_eid_t eid) { - return READ_ONCE(rt->dev->net) == net && + return mctp_route_netid(rt) == net && rt->min <= eid && rt->max >= eid; } @@ -813,54 +817,150 @@ static bool mctp_rt_compare_exact(struct mctp_route *rt1, struct mctp_route *rt2) { ASSERT_RTNL(); - return rt1->dev->net == rt2->dev->net && + return mctp_route_netid(rt1) == mctp_route_netid(rt2) && rt1->min == rt2->min && rt1->max == rt2->max; } -struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, - mctp_eid_t daddr) +/* must only be called on a direct route, as the final output hop */ +static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, + unsigned int mtu, struct mctp_route *route) +{ + mctp_dev_hold(route->dev); + dst->nexthop = eid; + dst->dev = route->dev; + dst->mtu = READ_ONCE(dst->dev->dev->mtu); + if (mtu) + dst->mtu = min(dst->mtu, mtu); + dst->halen = 0; + dst->output = route->output; +} + +int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, + unsigned char halen, const unsigned char *haddr) { - struct mctp_route *tmp, *rt = NULL; + struct net_device *netdev; + struct mctp_dev *dev; + int rc = -ENOENT; + + if (halen > sizeof(dst->haddr)) + return -EINVAL; rcu_read_lock(); - list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { - /* TODO: add metrics */ - if (mctp_rt_match_eid(tmp, dnet, daddr)) { - if (refcount_inc_not_zero(&tmp->refs)) { - rt = tmp; - break; - } - } + netdev = dev_get_by_index_rcu(net, ifindex); + if (!netdev) + goto out_unlock; + + if (netdev->addr_len != halen) { + rc = -EINVAL; + goto out_unlock; } + dev = __mctp_dev_get(netdev); + if (!dev) + goto out_unlock; + + dst->dev = dev; + dst->mtu = READ_ONCE(netdev->mtu); + dst->halen = halen; + dst->output = mctp_dst_output; + dst->nexthop = 0; + memcpy(dst->haddr, haddr, halen); + + rc = 0; + +out_unlock: rcu_read_unlock(); + return rc; +} - return rt; +void mctp_dst_release(struct mctp_dst *dst) +{ + mctp_dev_put(dst->dev); +} + +static struct mctp_route *mctp_route_lookup_single(struct net *net, + unsigned int dnet, + mctp_eid_t daddr) +{ + struct mctp_route *rt; + + list_for_each_entry_rcu(rt, &net->mctp.routes, list) { + if (mctp_rt_match_eid(rt, dnet, daddr)) + return rt; + } + + return NULL; } -static struct mctp_route *mctp_route_lookup_null(struct net *net, - struct net_device *dev) +/* populates *dst on successful lookup, if set */ +int mctp_route_lookup(struct net *net, unsigned int dnet, + mctp_eid_t daddr, struct mctp_dst *dst) { - struct mctp_route *tmp, *rt = NULL; + const unsigned int max_depth = 32; + unsigned int depth, mtu = 0; + int rc = -EHOSTUNREACH; rcu_read_lock(); - list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { - if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL && - refcount_inc_not_zero(&tmp->refs)) { - rt = tmp; + for (depth = 0; depth < max_depth; depth++) { + struct mctp_route *rt; + + rt = mctp_route_lookup_single(net, dnet, daddr); + if (!rt) break; + + /* clamp mtu to the smallest in the path, allowing 0 + * to specify no restrictions + */ + if (mtu && rt->mtu) + mtu = min(mtu, rt->mtu); + else + mtu = mtu ?: rt->mtu; + + if (rt->dst_type == MCTP_ROUTE_DIRECT) { + if (dst) + mctp_dst_from_route(dst, daddr, mtu, rt); + rc = 0; + break; + + } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { + daddr = rt->gateway.eid; } } rcu_read_unlock(); - return rt; + return rc; } -static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, +static int mctp_route_lookup_null(struct net *net, struct net_device *dev, + struct mctp_dst *dst) +{ + int rc = -EHOSTUNREACH; + struct mctp_route *rt; + + rcu_read_lock(); + + list_for_each_entry_rcu(rt, &net->mctp.routes, list) { + if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL) + continue; + + if (rt->dev->dev != dev) + continue; + + mctp_dst_from_route(dst, 0, 0, rt); + rc = 0; + break; + } + + rcu_read_unlock(); + + return rc; +} + +static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb, unsigned int mtu, u8 tag) { const unsigned int hlen = sizeof(struct mctp_hdr); @@ -933,7 +1033,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, skb_ext_copy(skb2, skb); /* do route */ - rc = rt->output(rt, skb2); + rc = dst->output(dst, skb2); if (rc) break; @@ -945,68 +1045,34 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, return rc; } -int mctp_local_output(struct sock *sk, struct mctp_route *rt, +int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); - struct mctp_skb_cb *cb = mctp_cb(skb); - struct mctp_route tmp_rt = {0}; struct mctp_sk_key *key; struct mctp_hdr *hdr; unsigned long flags; unsigned int netid; unsigned int mtu; mctp_eid_t saddr; - bool ext_rt; int rc; u8 tag; - rc = -ENODEV; - - if (rt) { - ext_rt = false; - if (WARN_ON(!rt->dev)) - goto out_release; - - } else if (cb->ifindex) { - struct net_device *dev; - - ext_rt = true; - rt = &tmp_rt; - - rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); - if (!dev) { - rcu_read_unlock(); - goto out_free; - } - rt->dev = __mctp_dev_get(dev); - rcu_read_unlock(); - - if (!rt->dev) - goto out_release; - - /* establish temporary route - we set up enough to keep - * mctp_route_output happy - */ - rt->output = mctp_route_output; - rt->mtu = 0; + KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr, + req_tag); - } else { - rc = -EINVAL; - goto out_free; - } + rc = -ENODEV; - spin_lock_irqsave(&rt->dev->addrs_lock, flags); - if (rt->dev->num_addrs == 0) { + spin_lock_irqsave(&dst->dev->addrs_lock, flags); + if (dst->dev->num_addrs == 0) { rc = -EHOSTUNREACH; } else { /* use the outbound interface's first address as our source */ - saddr = rt->dev->addrs[0]; + saddr = dst->dev->addrs[0]; rc = 0; } - spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); - netid = READ_ONCE(rt->dev->net); + spin_unlock_irqrestore(&dst->dev->addrs_lock, flags); + netid = READ_ONCE(dst->dev->net); if (rc) goto out_release; @@ -1032,15 +1098,13 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, tag = req_tag & MCTP_TAG_MASK; } + skb->pkt_type = PACKET_OUTGOING; skb->protocol = htons(ETH_P_MCTP); skb->priority = 0; skb_reset_transport_header(skb); skb_push(skb, sizeof(struct mctp_hdr)); skb_reset_network_header(skb); - skb->dev = rt->dev->dev; - - /* cb->net will have been set on initial ingress */ - cb->src = saddr; + skb->dev = dst->dev->dev; /* set up common header fields */ hdr = mctp_hdr(skb); @@ -1048,73 +1112,64 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, hdr->dest = daddr; hdr->src = saddr; - mtu = mctp_route_mtu(rt); + mtu = dst->mtu; if (skb->len + sizeof(struct mctp_hdr) <= mtu) { hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | tag; - rc = rt->output(rt, skb); + rc = dst->output(dst, skb); } else { - rc = mctp_do_fragment_route(rt, skb, mtu, tag); + rc = mctp_do_fragment_route(dst, skb, mtu, tag); } /* route output functions consume the skb, even on error */ skb = NULL; out_release: - if (!ext_rt) - mctp_route_release(rt); - - mctp_dev_put(tmp_rt.dev); - -out_free: kfree_skb(skb); return rc; } /* route management */ -static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, - unsigned int daddr_extent, unsigned int mtu, - unsigned char type) + +/* mctp_route_add(): Add the provided route, previously allocated via + * mctp_route_alloc(). On success, takes ownership of @rt, which includes a + * hold on rt->dev for usage in the route table. On failure a caller will want + * to mctp_route_release(). + * + * We expect that the caller has set rt->type, rt->dst_type, rt->min, rt->max, + * rt->mtu and either rt->dev (with a reference held appropriately) or + * rt->gateway. Other fields will be populated. + */ +static int mctp_route_add(struct net *net, struct mctp_route *rt) { - int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); - struct net *net = dev_net(mdev->dev); - struct mctp_route *rt, *ert; + struct mctp_route *ert; - if (!mctp_address_unicast(daddr_start)) + if (!mctp_address_unicast(rt->min) || !mctp_address_unicast(rt->max)) return -EINVAL; - if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) + if (rt->dst_type == MCTP_ROUTE_DIRECT && !rt->dev) return -EINVAL; - switch (type) { + if (rt->dst_type == MCTP_ROUTE_GATEWAY && !rt->gateway.eid) + return -EINVAL; + + switch (rt->type) { case RTN_LOCAL: - rtfn = mctp_route_input; + rt->output = mctp_dst_input; break; case RTN_UNICAST: - rtfn = mctp_route_output; + rt->output = mctp_dst_output; break; default: return -EINVAL; } - rt = mctp_route_alloc(); - if (!rt) - return -ENOMEM; - - rt->min = daddr_start; - rt->max = daddr_start + daddr_extent; - rt->mtu = mtu; - rt->dev = mdev; - mctp_dev_hold(rt->dev); - rt->type = type; - rt->output = rtfn; - ASSERT_RTNL(); + /* Prevent duplicate identical routes. */ list_for_each_entry(ert, &net->mctp.routes, list) { if (mctp_rt_compare_exact(rt, ert)) { - mctp_route_release(rt); return -EEXIST; } } @@ -1124,10 +1179,10 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, return 0; } -static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, - unsigned int daddr_extent, unsigned char type) +static int mctp_route_remove(struct net *net, unsigned int netid, + mctp_eid_t daddr_start, unsigned int daddr_extent, + unsigned char type) { - struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *tmp; mctp_eid_t daddr_end; bool dropped; @@ -1141,7 +1196,7 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, ASSERT_RTNL(); list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { - if (rt->dev == mdev && + if (mctp_route_netid(rt) == netid && rt->min == daddr_start && rt->max == daddr_end && rt->type == type) { list_del_rcu(&rt->list); @@ -1156,12 +1211,32 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) { - return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL); + struct mctp_route *rt; + int rc; + + rt = mctp_route_alloc(); + if (!rt) + return -ENOMEM; + + rt->min = addr; + rt->max = addr; + rt->dst_type = MCTP_ROUTE_DIRECT; + rt->dev = mdev; + rt->type = RTN_LOCAL; + + mctp_dev_hold(rt->dev); + + rc = mctp_route_add(dev_net(mdev->dev), rt); + if (rc) + mctp_route_release(rt); + + return rc; } int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) { - return mctp_route_remove(mdev, addr, 0, RTN_LOCAL); + return mctp_route_remove(dev_net(mdev->dev), mdev->net, + addr, 0, RTN_LOCAL); } /* removes all entries for a given device */ @@ -1172,7 +1247,7 @@ void mctp_route_remove_dev(struct mctp_dev *mdev) ASSERT_RTNL(); list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { - if (rt->dev == mdev) { + if (rt->dst_type == MCTP_ROUTE_DIRECT && rt->dev == mdev) { list_del_rcu(&rt->list); /* TODO: immediate RTM_DELROUTE */ mctp_route_release(rt); @@ -1189,8 +1264,9 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, struct net *net = dev_net(dev); struct mctp_dev *mdev; struct mctp_skb_cb *cb; - struct mctp_route *rt; + struct mctp_dst dst; struct mctp_hdr *mh; + int rc; rcu_read_lock(); mdev = __mctp_dev_get(dev); @@ -1232,17 +1308,17 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, cb->net = READ_ONCE(mdev->net); cb->ifindex = dev->ifindex; - rt = mctp_route_lookup(net, cb->net, mh->dest); + rc = mctp_route_lookup(net, cb->net, mh->dest, &dst); /* NULL EID, but addressed to our physical address */ - if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) - rt = mctp_route_lookup_null(net, dev); + if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) + rc = mctp_route_lookup_null(net, dev, &dst); - if (!rt) + if (rc) goto err_drop; - rt->output(rt, skb); - mctp_route_release(rt); + dst.output(&dst, skb); + mctp_dst_release(&dst); mctp_dev_put(mdev); return NET_RX_SUCCESS; @@ -1264,19 +1340,28 @@ static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { [RTA_DST] = { .type = NLA_U8 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = NLA_POLICY_EXACT_LEN(sizeof(struct mctp_fq_addr)), +}; + +static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { + [RTAX_MTU] = { .type = NLA_U32 }, }; -/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. - * tb must hold RTA_MAX+1 elements. +/* base parsing; common to both _lookup and _populate variants. + * + * For gateway routes (which have a RTA_GATEWAY, and no RTA_OIF), we populate + * *gatweayp. for direct routes (RTA_OIF, no RTA_GATEWAY), we populate *mdev. */ -static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack, - struct nlattr **tb, struct rtmsg **rtm, - struct mctp_dev **mdev, mctp_eid_t *daddr_start) +static int mctp_route_nlparse_common(struct net *net, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr **tb, struct rtmsg **rtm, + struct mctp_dev **mdev, + struct mctp_fq_addr *gatewayp, + mctp_eid_t *daddr_start) { - struct net *net = sock_net(skb->sk); + struct mctp_fq_addr *gateway = NULL; + unsigned int ifindex = 0; struct net_device *dev; - unsigned int ifindex; int rc; rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, @@ -1292,11 +1377,44 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, } *daddr_start = nla_get_u8(tb[RTA_DST]); - if (!tb[RTA_OIF]) { - NL_SET_ERR_MSG(extack, "ifindex missing"); + if (tb[RTA_OIF]) + ifindex = nla_get_u32(tb[RTA_OIF]); + + if (tb[RTA_GATEWAY]) + gateway = nla_data(tb[RTA_GATEWAY]); + + if (ifindex && gateway) { + NL_SET_ERR_MSG(extack, + "cannot specify both ifindex and gateway"); + return -EINVAL; + + } else if (ifindex) { + dev = __dev_get_by_index(net, ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "bad ifindex"); + return -ENODEV; + } + *mdev = mctp_dev_get_rtnl(dev); + if (!*mdev) + return -ENODEV; + gatewayp->eid = 0; + + } else if (gateway) { + if (!mctp_address_unicast(gateway->eid)) { + NL_SET_ERR_MSG(extack, "bad gateway"); + return -EINVAL; + } + + gatewayp->eid = gateway->eid; + gatewayp->net = gateway->net != MCTP_NET_ANY ? + gateway->net : + READ_ONCE(net->mctp.default_net); + *mdev = NULL; + + } else { + NL_SET_ERR_MSG(extack, "no route output provided"); return -EINVAL; } - ifindex = nla_get_u32(tb[RTA_OIF]); *rtm = nlmsg_data(nlh); if ((*rtm)->rtm_family != AF_MCTP) { @@ -1304,82 +1422,157 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - dev = __dev_get_by_index(net, ifindex); - if (!dev) { - NL_SET_ERR_MSG(extack, "bad ifindex"); - return -ENODEV; - } - *mdev = mctp_dev_get_rtnl(dev); - if (!*mdev) - return -ENODEV; - - if (dev->flags & IFF_LOOPBACK) { - NL_SET_ERR_MSG(extack, "no routes to loopback"); + if ((*rtm)->rtm_type != RTN_UNICAST) { + NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); return -EINVAL; } return 0; } -static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { - [RTAX_MTU] = { .type = NLA_U32 }, -}; - -static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +/* Route parsing for lookup operations; we only need the "route target" + * components (ie., network and dest-EID range). + */ +static int mctp_route_nlparse_lookup(struct net *net, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + unsigned char *type, unsigned int *netid, + mctp_eid_t *daddr_start, + unsigned int *daddr_extent) { struct nlattr *tb[RTA_MAX + 1]; + struct mctp_fq_addr gw; + struct mctp_dev *mdev; + struct rtmsg *rtm; + int rc; + + rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm, + &mdev, &gw, daddr_start); + if (rc) + return rc; + + if (mdev) { + *netid = mdev->net; + } else if (gw.eid) { + *netid = gw.net; + } else { + /* bug: _nlparse_common should not allow this */ + return -1; + } + + *type = rtm->rtm_type; + *daddr_extent = rtm->rtm_dst_len; + + return 0; +} + +/* Full route parse for RTM_NEWROUTE: populate @rt. On success, + * MCTP_ROUTE_DIRECT routes (ie, those with a direct dev) will hold a reference + * to that dev. + */ +static int mctp_route_nlparse_populate(struct net *net, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct mctp_route *rt) +{ struct nlattr *tbx[RTAX_MAX + 1]; + struct nlattr *tb[RTA_MAX + 1]; + unsigned int daddr_extent; + struct mctp_fq_addr gw; mctp_eid_t daddr_start; - struct mctp_dev *mdev; + struct mctp_dev *dev; struct rtmsg *rtm; - unsigned int mtu; + u32 mtu = 0; int rc; - rc = mctp_route_nlparse(skb, nlh, extack, tb, - &rtm, &mdev, &daddr_start); - if (rc < 0) + rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm, + &dev, &gw, &daddr_start); + if (rc) return rc; - if (rtm->rtm_type != RTN_UNICAST) { - NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); + daddr_extent = rtm->rtm_dst_len; + + if (daddr_extent > 0xff || daddr_extent + daddr_start >= 255) { + NL_SET_ERR_MSG(extack, "invalid eid range"); return -EINVAL; } - mtu = 0; if (tb[RTA_METRICS]) { rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS], rta_metrics_policy, NULL); - if (rc < 0) + if (rc < 0) { + NL_SET_ERR_MSG(extack, "incorrect RTA_METRICS format"); return rc; + } if (tbx[RTAX_MTU]) mtu = nla_get_u32(tbx[RTAX_MTU]); } - rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu, - rtm->rtm_type); + rt->type = rtm->rtm_type; + rt->min = daddr_start; + rt->max = daddr_start + daddr_extent; + rt->mtu = mtu; + if (gw.eid) { + rt->dst_type = MCTP_ROUTE_GATEWAY; + rt->gateway.eid = gw.eid; + rt->gateway.net = gw.net; + } else { + rt->dst_type = MCTP_ROUTE_DIRECT; + rt->dev = dev; + mctp_dev_hold(rt->dev); + } + + return 0; +} + +static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct mctp_route *rt; + int rc; + + rt = mctp_route_alloc(); + if (!rt) + return -ENOMEM; + + rc = mctp_route_nlparse_populate(net, nlh, extack, rt); + if (rc < 0) + goto err_free; + + if (rt->dst_type == MCTP_ROUTE_DIRECT && + rt->dev->dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, "no routes to loopback"); + rc = -EINVAL; + goto err_free; + } + + rc = mctp_route_add(net, rt); + if (!rc) + return 0; + +err_free: + mctp_route_release(rt); return rc; } static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct nlattr *tb[RTA_MAX + 1]; + struct net *net = sock_net(skb->sk); + unsigned int netid, daddr_extent; + unsigned char type = RTN_UNSPEC; mctp_eid_t daddr_start; - struct mctp_dev *mdev; - struct rtmsg *rtm; int rc; - rc = mctp_route_nlparse(skb, nlh, extack, tb, - &rtm, &mdev, &daddr_start); + rc = mctp_route_nlparse_lookup(net, nlh, extack, &type, &netid, + &daddr_start, &daddr_extent); if (rc < 0) return rc; /* we only have unicast routes */ - if (rtm->rtm_type != RTN_UNICAST) + if (type != RTN_UNICAST) return -EINVAL; - rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST); + rc = mctp_route_remove(net, netid, daddr_start, daddr_extent, type); return rc; } @@ -1405,7 +1598,6 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, hdr->rtm_tos = 0; hdr->rtm_table = RT_TABLE_DEFAULT; hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ - hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ hdr->rtm_type = rt->type; if (nla_put_u8(skb, RTA_DST, rt->min)) @@ -1422,13 +1614,17 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, nla_nest_end(skb, metrics); - if (rt->dev) { + if (rt->dst_type == MCTP_ROUTE_DIRECT) { + hdr->rtm_scope = RT_SCOPE_LINK; if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) goto cancel; + } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { + hdr->rtm_scope = RT_SCOPE_UNIVERSE; + if (nla_put(skb, RTA_GATEWAY, + sizeof(rt->gateway), &rt->gateway)) + goto cancel; } - /* TODO: conditional neighbour physaddr? */ - nlmsg_end(skb, nlh); return 0; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 06c1897b685a..7a398f41b621 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -2,132 +2,11 @@ #include <kunit/test.h> -#include "utils.h" - -struct mctp_test_route { - struct mctp_route rt; - struct sk_buff_head pkts; -}; - -static int mctp_test_route_output(struct mctp_route *rt, struct sk_buff *skb) -{ - struct mctp_test_route *test_rt = container_of(rt, struct mctp_test_route, rt); - - skb_queue_tail(&test_rt->pkts, skb); - - return 0; -} - -/* local version of mctp_route_alloc() */ -static struct mctp_test_route *mctp_route_test_alloc(void) -{ - struct mctp_test_route *rt; - - rt = kzalloc(sizeof(*rt), GFP_KERNEL); - if (!rt) - return NULL; - - INIT_LIST_HEAD(&rt->rt.list); - refcount_set(&rt->rt.refs, 1); - rt->rt.output = mctp_test_route_output; - - skb_queue_head_init(&rt->pkts); - - return rt; -} - -static struct mctp_test_route *mctp_test_create_route(struct net *net, - struct mctp_dev *dev, - mctp_eid_t eid, - unsigned int mtu) -{ - struct mctp_test_route *rt; - - rt = mctp_route_test_alloc(); - if (!rt) - return NULL; - - rt->rt.min = eid; - rt->rt.max = eid; - rt->rt.mtu = mtu; - rt->rt.type = RTN_UNSPEC; - if (dev) - mctp_dev_hold(dev); - rt->rt.dev = dev; - - list_add_rcu(&rt->rt.list, &net->mctp.routes); - - return rt; -} - -static void mctp_test_route_destroy(struct kunit *test, - struct mctp_test_route *rt) -{ - unsigned int refs; - - rtnl_lock(); - list_del_rcu(&rt->rt.list); - rtnl_unlock(); - - skb_queue_purge(&rt->pkts); - if (rt->rt.dev) - mctp_dev_put(rt->rt.dev); - - refs = refcount_read(&rt->rt.refs); - KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance"); - - kfree_rcu(&rt->rt, rcu); -} - -static void mctp_test_skb_set_dev(struct sk_buff *skb, - struct mctp_test_dev *dev) -{ - struct mctp_skb_cb *cb; - - cb = mctp_cb(skb); - cb->net = READ_ONCE(dev->mdev->net); - skb->dev = dev->ndev; -} - -static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, - unsigned int data_len) -{ - size_t hdr_len = sizeof(*hdr); - struct sk_buff *skb; - unsigned int i; - u8 *buf; - - skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); - if (!skb) - return NULL; - - __mctp_cb(skb); - memcpy(skb_put(skb, hdr_len), hdr, hdr_len); - - buf = skb_put(skb, data_len); - for (i = 0; i < data_len; i++) - buf[i] = i & 0xff; - - return skb; -} - -static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, - const void *data, - size_t data_len) -{ - size_t hdr_len = sizeof(*hdr); - struct sk_buff *skb; - - skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); - if (!skb) - return NULL; - - __mctp_cb(skb); - memcpy(skb_put(skb, hdr_len), hdr, hdr_len); - memcpy(skb_put(skb, data_len), data, data_len); +/* keep clangd happy when compiled outside of the route.c include */ +#include <net/mctp.h> +#include <net/mctpdevice.h> - return skb; -} +#include "utils.h" #define mctp_test_create_skb_data(h, d) \ __mctp_test_create_skb_data(h, d, sizeof(*d)) @@ -141,8 +20,10 @@ struct mctp_frag_test { static void mctp_test_fragment(struct kunit *test) { const struct mctp_frag_test *params; + struct mctp_test_pktqueue tpq; int rc, i, n, mtu, msgsize; - struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct mctp_dst dst; struct sk_buff *skb; struct mctp_hdr hdr; u8 seq; @@ -159,13 +40,15 @@ static void mctp_test_fragment(struct kunit *test) skb = mctp_test_create_skb(&hdr, msgsize); KUNIT_ASSERT_TRUE(test, skb); - rt = mctp_test_create_route(&init_net, NULL, 10, mtu); - KUNIT_ASSERT_TRUE(test, rt); + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + mctp_test_dst_setup(test, &dst, dev, &tpq, mtu); - rc = mctp_do_fragment_route(&rt->rt, skb, mtu, MCTP_TAG_OWNER); + rc = mctp_do_fragment_route(&dst, skb, mtu, MCTP_TAG_OWNER); KUNIT_EXPECT_FALSE(test, rc); - n = rt->pkts.qlen; + n = tpq.pkts.qlen; KUNIT_EXPECT_EQ(test, n, params->n_frags); @@ -178,7 +61,7 @@ static void mctp_test_fragment(struct kunit *test) first = i == 0; last = i == (n - 1); - skb2 = skb_dequeue(&rt->pkts); + skb2 = skb_dequeue(&tpq.pkts); if (!skb2) break; @@ -216,7 +99,8 @@ static void mctp_test_fragment(struct kunit *test) kfree_skb(skb2); } - mctp_test_route_destroy(test, rt); + mctp_test_dst_release(&dst, &tpq); + mctp_test_destroy_dev(dev); } static const struct mctp_frag_test mctp_frag_tests[] = { @@ -246,25 +130,30 @@ struct mctp_rx_input_test { static void mctp_test_rx_input(struct kunit *test) { const struct mctp_rx_input_test *params; + struct mctp_test_pktqueue tpq; struct mctp_test_route *rt; struct mctp_test_dev *dev; struct sk_buff *skb; params = test->param_value; + test->priv = &tpq; dev = mctp_test_create_dev(); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); - rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); + rt = mctp_test_create_route_direct(&init_net, dev->mdev, 8, 68); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); skb = mctp_test_create_skb(¶ms->hdr, 1); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + mctp_test_pktqueue_init(&tpq); + mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL); - KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input); + KUNIT_EXPECT_EQ(test, !!tpq.pkts.qlen, params->input); + skb_queue_purge(&tpq.pkts); mctp_test_route_destroy(test, rt); mctp_test_destroy_dev(dev); } @@ -292,12 +181,12 @@ KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests, /* set up a local dev, route on EID 8, and a socket listening on type 0 */ static void __mctp_route_test_init(struct kunit *test, struct mctp_test_dev **devp, - struct mctp_test_route **rtp, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket **sockp, unsigned int netid) { struct sockaddr_mctp addr = {0}; - struct mctp_test_route *rt; struct mctp_test_dev *dev; struct socket *sock; int rc; @@ -307,8 +196,7 @@ static void __mctp_route_test_init(struct kunit *test, if (netid != MCTP_NET_ANY) WRITE_ONCE(dev->mdev->net, netid); - rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + mctp_test_dst_setup(test, dst, dev, tpq, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); @@ -320,18 +208,18 @@ static void __mctp_route_test_init(struct kunit *test, rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr)); KUNIT_ASSERT_EQ(test, rc, 0); - *rtp = rt; *devp = dev; *sockp = sock; } static void __mctp_route_test_fini(struct kunit *test, struct mctp_test_dev *dev, - struct mctp_test_route *rt, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket *sock) { sock_release(sock); - mctp_test_route_destroy(test, rt); + mctp_test_dst_release(dst, tpq); mctp_test_destroy_dev(dev); } @@ -344,22 +232,24 @@ struct mctp_route_input_sk_test { static void mctp_test_route_input_sk(struct kunit *test) { const struct mctp_route_input_sk_test *params; + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct socket *sock; int rc; params = test->param_value; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); skb = mctp_test_create_skb_data(¶ms->hdr, ¶ms->type); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); mctp_test_skb_set_dev(skb, dev); + mctp_test_pktqueue_init(&tpq); - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); if (params->deliver) { KUNIT_EXPECT_EQ(test, rc, 0); @@ -376,7 +266,7 @@ static void mctp_test_route_input_sk(struct kunit *test) KUNIT_EXPECT_NULL(test, skb2); } - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } #define FL_S (MCTP_HDR_FLAG_SOM) @@ -413,16 +303,17 @@ struct mctp_route_input_sk_reasm_test { static void mctp_test_route_input_sk_reasm(struct kunit *test) { const struct mctp_route_input_sk_reasm_test *params; + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct socket *sock; int i, rc; u8 c; params = test->param_value; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); for (i = 0; i < params->n_hdrs; i++) { c = i; @@ -431,7 +322,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) mctp_test_skb_set_dev(skb, dev); - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); } skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); @@ -445,7 +336,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test) KUNIT_EXPECT_NULL(test, skb2); } - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } #define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT)) @@ -547,7 +438,7 @@ struct mctp_route_input_sk_keys_test { static void mctp_test_route_input_sk_keys(struct kunit *test) { const struct mctp_route_input_sk_keys_test *params; - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; struct mctp_test_dev *dev; struct mctp_sk_key *key; @@ -555,6 +446,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) struct mctp_sock *msk; struct socket *sock; unsigned long flags; + struct mctp_dst dst; unsigned int net; int rc; u8 c; @@ -565,8 +457,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); net = READ_ONCE(dev->mdev->net); - rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + mctp_test_dst_setup(test, &dst, dev, &tpq, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); @@ -592,7 +483,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) mctp_test_skb_set_dev(skb, dev); - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); /* (potentially) receive message */ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); @@ -606,7 +497,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test) skb_free_datagram(sock->sk, skb2); mctp_key_unref(key); - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = { @@ -681,7 +572,8 @@ KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests, struct test_net { unsigned int netid; struct mctp_test_dev *dev; - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; + struct mctp_dst dst; struct socket *sock; struct sk_buff *skb; struct mctp_sk_key *key; @@ -699,18 +591,20 @@ mctp_test_route_input_multiple_nets_bind_init(struct kunit *test, t->msg.data = t->netid; - __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid); + __mctp_route_test_init(test, &t->dev, &t->dst, &t->tpq, &t->sock, + t->netid); t->skb = mctp_test_create_skb_data(&hdr, &t->msg); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb); mctp_test_skb_set_dev(t->skb, t->dev); + mctp_test_pktqueue_init(&t->tpq); } static void mctp_test_route_input_multiple_nets_bind_fini(struct kunit *test, struct test_net *t) { - __mctp_route_test_fini(test, t->dev, t->rt, t->sock); + __mctp_route_test_fini(test, t->dev, &t->dst, &t->tpq, t->sock); } /* Test that skbs from different nets (otherwise identical) get routed to their @@ -731,9 +625,9 @@ static void mctp_test_route_input_multiple_nets_bind(struct kunit *test) mctp_test_route_input_multiple_nets_bind_init(test, &t1); mctp_test_route_input_multiple_nets_bind_init(test, &t2); - rc = mctp_route_input(&t1.rt->rt, t1.skb); + rc = mctp_dst_input(&t1.dst, t1.skb); KUNIT_ASSERT_EQ(test, rc, 0); - rc = mctp_route_input(&t2.rt->rt, t2.skb); + rc = mctp_dst_input(&t2.dst, t2.skb); KUNIT_ASSERT_EQ(test, rc, 0); rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc); @@ -767,7 +661,8 @@ mctp_test_route_input_multiple_nets_key_init(struct kunit *test, t->msg.data = t->netid; - __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid); + __mctp_route_test_init(test, &t->dev, &t->dst, &t->tpq, &t->sock, + t->netid); msk = container_of(t->sock->sk, struct mctp_sock, sk); @@ -790,7 +685,7 @@ mctp_test_route_input_multiple_nets_key_fini(struct kunit *test, struct test_net *t) { mctp_key_unref(t->key); - __mctp_route_test_fini(test, t->dev, t->rt, t->sock); + __mctp_route_test_fini(test, t->dev, &t->dst, &t->tpq, t->sock); } /* test that skbs from different nets (otherwise identical) get routed to their @@ -812,9 +707,9 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test) mctp_test_route_input_multiple_nets_key_init(test, &t1); mctp_test_route_input_multiple_nets_key_init(test, &t2); - rc = mctp_route_input(&t1.rt->rt, t1.skb); + rc = mctp_dst_input(&t1.dst, t1.skb); KUNIT_ASSERT_EQ(test, rc, 0); - rc = mctp_route_input(&t2.rt->rt, t2.skb); + rc = mctp_dst_input(&t2.dst, t2.skb); KUNIT_ASSERT_EQ(test, rc, 0); rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc); @@ -843,13 +738,14 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test) static void mctp_test_route_input_sk_fail_single(struct kunit *test) { const struct mctp_hdr hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO); - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; struct mctp_test_dev *dev; + struct mctp_dst dst; struct socket *sock; struct sk_buff *skb; int rc; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); /* No rcvbuf space, so delivery should fail. __sock_set_rcvbuf will * clamp the minimum to SOCK_MIN_RCVBUF, so we open-code this. @@ -865,14 +761,14 @@ static void mctp_test_route_input_sk_fail_single(struct kunit *test) mctp_test_skb_set_dev(skb, dev); /* do route input, which should fail */ - rc = mctp_route_input(&rt->rt, skb); + rc = mctp_dst_input(&dst, skb); KUNIT_EXPECT_NE(test, rc, 0); /* we should hold the only reference to skb */ KUNIT_EXPECT_EQ(test, refcount_read(&skb->users), 1); kfree_skb(skb); - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } /* Input route to socket, using a fragmented message, where sock delivery fails. @@ -880,14 +776,15 @@ static void mctp_test_route_input_sk_fail_single(struct kunit *test) static void mctp_test_route_input_sk_fail_frag(struct kunit *test) { const struct mctp_hdr hdrs[2] = { RX_FRAG(FL_S, 0), RX_FRAG(FL_E, 1) }; - struct mctp_test_route *rt; + struct mctp_test_pktqueue tpq; struct mctp_test_dev *dev; struct sk_buff *skbs[2]; + struct mctp_dst dst; struct socket *sock; unsigned int i; int rc; - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); lock_sock(sock->sk); WRITE_ONCE(sock->sk->sk_rcvbuf, 0); @@ -904,11 +801,11 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) /* first route input should succeed, we're only queueing to the * frag list */ - rc = mctp_route_input(&rt->rt, skbs[0]); + rc = mctp_dst_input(&dst, skbs[0]); KUNIT_EXPECT_EQ(test, rc, 0); /* final route input should fail to deliver to the socket */ - rc = mctp_route_input(&rt->rt, skbs[1]); + rc = mctp_dst_input(&dst, skbs[1]); KUNIT_EXPECT_NE(test, rc, 0); /* we should hold the only reference to both skbs */ @@ -918,7 +815,7 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test) KUNIT_EXPECT_EQ(test, refcount_read(&skbs[1]->users), 1); kfree_skb(skbs[1]); - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } /* Input route to socket, using a fragmented message created from clones. @@ -933,23 +830,22 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) RX_FRAG(FL_S, 0), RX_FRAG(FL_E, 1), }; - struct mctp_test_route *rt; + const size_t data_len = 3; /* arbitrary */ + u8 compare[3 * ARRAY_SIZE(hdrs)]; + u8 flat[3 * ARRAY_SIZE(hdrs)]; + struct mctp_test_pktqueue tpq; struct mctp_test_dev *dev; struct sk_buff *skb[5]; struct sk_buff *rx_skb; + struct mctp_dst dst; struct socket *sock; - size_t data_len; - u8 compare[100]; - u8 flat[100]; size_t total; void *p; int rc; - /* Arbitrary length */ - data_len = 3; total = data_len + sizeof(struct mctp_hdr); - __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); /* Create a single skb initially with concatenated packets */ skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total); @@ -988,7 +884,7 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) /* Feed the fragments into MCTP core */ for (int i = 0; i < 5; i++) { - rc = mctp_route_input(&rt->rt, skb[i]); + rc = mctp_dst_input(&dst, skb[i]); KUNIT_EXPECT_EQ(test, rc, 0); } @@ -1026,29 +922,29 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) kfree_skb(skb[i]); } - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, &dst, &tpq, sock); } #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, struct mctp_test_dev **devp, - struct mctp_test_route **rtp, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket **sock, struct sk_buff **skbp, unsigned int len) { - struct mctp_test_route *rt; struct mctp_test_dev *dev; struct sk_buff *skb; /* we have a slightly odd routing setup here; the test route * is for EID 8, which is our local EID. We don't do a routing * lookup, so that's fine - all we require is a path through - * mctp_local_output, which will call rt->output on whatever + * mctp_local_output, which will call dst->output on whatever * route we provide */ - __mctp_route_test_init(test, &dev, &rt, sock, MCTP_NET_ANY); + __mctp_route_test_init(test, &dev, dst, tpq, sock, MCTP_NET_ANY); /* Assign a single EID. ->addrs is freed on mctp netdev release */ dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); @@ -1061,42 +957,41 @@ static void mctp_test_flow_init(struct kunit *test, skb_reserve(skb, sizeof(struct mctp_hdr) + 1); memset(skb_put(skb, len), 0, len); - /* take a ref for the route, we'll decrement in local output */ - refcount_inc(&rt->rt.refs); *devp = dev; - *rtp = rt; *skbp = skb; } static void mctp_test_flow_fini(struct kunit *test, struct mctp_test_dev *dev, - struct mctp_test_route *rt, + struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq, struct socket *sock) { - __mctp_route_test_fini(test, dev, rt, sock); + __mctp_route_test_fini(test, dev, dst, tpq, sock); } /* test that an outgoing skb has the correct MCTP extension data set */ static void mctp_test_packet_flow(struct kunit *test) { + struct mctp_test_pktqueue tpq; struct sk_buff *skb, *skb2; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct mctp_flow *flow; struct socket *sock; - u8 dst = 8; + u8 dst_eid = 8; int n, rc; - mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 30); + mctp_test_flow_init(test, &dev, &dst, &tpq, &sock, &skb, 30); - rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER); KUNIT_ASSERT_EQ(test, rc, 0); - n = rt->pkts.qlen; + n = tpq.pkts.qlen; KUNIT_ASSERT_EQ(test, n, 1); - skb2 = skb_dequeue(&rt->pkts); + skb2 = skb_dequeue(&tpq.pkts); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); flow = skb_ext_find(skb2, SKB_EXT_MCTP); @@ -1105,7 +1000,7 @@ static void mctp_test_packet_flow(struct kunit *test) KUNIT_ASSERT_PTR_EQ(test, flow->key->sk, sock->sk); kfree_skb(skb2); - mctp_test_flow_fini(test, dev, rt, sock); + mctp_test_flow_fini(test, dev, &dst, &tpq, sock); } /* test that outgoing skbs, after fragmentation, all have the correct MCTP @@ -1113,26 +1008,27 @@ static void mctp_test_packet_flow(struct kunit *test) */ static void mctp_test_fragment_flow(struct kunit *test) { + struct mctp_test_pktqueue tpq; struct mctp_flow *flows[2]; struct sk_buff *tx_skbs[2]; - struct mctp_test_route *rt; struct mctp_test_dev *dev; + struct mctp_dst dst; struct sk_buff *skb; struct socket *sock; - u8 dst = 8; + u8 dst_eid = 8; int n, rc; - mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 100); + mctp_test_flow_init(test, &dev, &dst, &tpq, &sock, &skb, 100); - rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER); KUNIT_ASSERT_EQ(test, rc, 0); - n = rt->pkts.qlen; + n = tpq.pkts.qlen; KUNIT_ASSERT_EQ(test, n, 2); /* both resulting packets should have the same flow data */ - tx_skbs[0] = skb_dequeue(&rt->pkts); - tx_skbs[1] = skb_dequeue(&rt->pkts); + tx_skbs[0] = skb_dequeue(&tpq.pkts); + tx_skbs[1] = skb_dequeue(&tpq.pkts); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[0]); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[1]); @@ -1148,7 +1044,7 @@ static void mctp_test_fragment_flow(struct kunit *test) kfree_skb(tx_skbs[0]); kfree_skb(tx_skbs[1]); - mctp_test_flow_fini(test, dev, rt, sock); + mctp_test_flow_fini(test, dev, &dst, &tpq, sock); } #else @@ -1166,15 +1062,16 @@ static void mctp_test_fragment_flow(struct kunit *test) /* Test that outgoing skbs cause a suitable tag to be created */ static void mctp_test_route_output_key_create(struct kunit *test) { + const u8 dst_eid = 26, src_eid = 15; + struct mctp_test_pktqueue tpq; const unsigned int netid = 50; - const u8 dst = 26, src = 15; - struct mctp_test_route *rt; struct mctp_test_dev *dev; struct mctp_sk_key *key; struct netns_mctp *mns; unsigned long flags; struct socket *sock; struct sk_buff *skb; + struct mctp_dst dst; bool empty, single; const int len = 2; int rc; @@ -1183,15 +1080,14 @@ static void mctp_test_route_output_key_create(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); WRITE_ONCE(dev->mdev->net, netid); - rt = mctp_test_create_route(&init_net, dev->mdev, dst, 68); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + mctp_test_dst_setup(test, &dst, dev, &tpq, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); dev->mdev->num_addrs = 1; - dev->mdev->addrs[0] = src; + dev->mdev->addrs[0] = src_eid; skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); KUNIT_ASSERT_TRUE(test, skb); @@ -1199,8 +1095,6 @@ static void mctp_test_route_output_key_create(struct kunit *test) skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len); memset(skb_put(skb, len), 0, len); - refcount_inc(&rt->rt.refs); - mns = &sock_net(sock->sk)->mctp; /* We assume we're starting from an empty keys list, which requires @@ -1211,7 +1105,7 @@ static void mctp_test_route_output_key_create(struct kunit *test) spin_unlock_irqrestore(&mns->keys_lock, flags); KUNIT_ASSERT_TRUE(test, empty); - rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER); + rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER); KUNIT_ASSERT_EQ(test, rc, 0); key = NULL; @@ -1227,16 +1121,295 @@ static void mctp_test_route_output_key_create(struct kunit *test) KUNIT_ASSERT_TRUE(test, single); KUNIT_EXPECT_EQ(test, key->net, netid); - KUNIT_EXPECT_EQ(test, key->local_addr, src); - KUNIT_EXPECT_EQ(test, key->peer_addr, dst); + KUNIT_EXPECT_EQ(test, key->local_addr, src_eid); + KUNIT_EXPECT_EQ(test, key->peer_addr, dst_eid); /* key has incoming tag, so inverse of what we sent */ KUNIT_EXPECT_FALSE(test, key->tag & MCTP_TAG_OWNER); sock_release(sock); - mctp_test_route_destroy(test, rt); + mctp_test_dst_release(&dst, &tpq); mctp_test_destroy_dev(dev); } +static void mctp_test_route_extaddr_input(struct kunit *test) +{ + static const unsigned char haddr[] = { 0xaa, 0x55 }; + struct mctp_test_pktqueue tpq; + struct mctp_skb_cb *cb, *cb2; + const unsigned int len = 40; + struct mctp_test_dev *dev; + struct sk_buff *skb, *skb2; + struct mctp_dst dst; + struct mctp_hdr hdr; + struct socket *sock; + int rc; + + hdr.ver = 1; + hdr.src = 10; + hdr.dest = 8; + hdr.flags_seq_tag = FL_S | FL_E | FL_TO; + + __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY); + + skb = mctp_test_create_skb(&hdr, len); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + /* set our hardware addressing data */ + cb = mctp_cb(skb); + memcpy(cb->haddr, haddr, sizeof(haddr)); + cb->halen = sizeof(haddr); + + mctp_test_skb_set_dev(skb, dev); + + rc = mctp_dst_input(&dst, skb); + KUNIT_ASSERT_EQ(test, rc, 0); + + mctp_test_dst_release(&dst, &tpq); + + skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + KUNIT_ASSERT_EQ(test, skb2->len, len); + + cb2 = mctp_cb(skb2); + + /* Received SKB should have the hardware addressing as set above. + * We're likely to have the same actual cb here (ie., cb == cb2), + * but it's the comparison that we care about + */ + KUNIT_EXPECT_EQ(test, cb2->halen, sizeof(haddr)); + KUNIT_EXPECT_MEMEQ(test, cb2->haddr, haddr, sizeof(haddr)); + + skb_free_datagram(sock->sk, skb2); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_gw_lookup(struct kunit *test) +{ + struct mctp_test_route *rt1, *rt2; + struct mctp_dst dst = { 0 }; + struct mctp_test_dev *dev; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* 8 (local) -> 10 (gateway) via 9 (direct) */ + rt1 = mctp_test_create_route_direct(&init_net, dev->mdev, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_PTR_EQ(test, dst.dev, dev->mdev); + KUNIT_EXPECT_EQ(test, dst.mtu, dev->ndev->mtu); + KUNIT_EXPECT_EQ(test, dst.nexthop, 9); + KUNIT_EXPECT_EQ(test, dst.halen, 0); + + mctp_dst_release(&dst); + + mctp_test_route_destroy(test, rt2); + mctp_test_route_destroy(test, rt1); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_gw_loop(struct kunit *test) +{ + struct mctp_test_route *rt1, *rt2; + struct mctp_dst dst = { 0 }; + struct mctp_test_dev *dev; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* two routes using each other as the gw */ + rt1 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 9, 10, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + /* this should fail, rather than infinite-loop */ + rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst); + KUNIT_EXPECT_NE(test, rc, 0); + + mctp_test_route_destroy(test, rt2); + mctp_test_route_destroy(test, rt1); + mctp_test_destroy_dev(dev); +} + +struct mctp_route_gw_mtu_test { + /* working away from the local stack */ + unsigned int dev, neigh, gw, dst; + unsigned int exp; +}; + +static void mctp_route_gw_mtu_to_desc(const struct mctp_route_gw_mtu_test *t, + char *desc) +{ + sprintf(desc, "dev %d, neigh %d, gw %d, dst %d -> %d", + t->dev, t->neigh, t->gw, t->dst, t->exp); +} + +static const struct mctp_route_gw_mtu_test mctp_route_gw_mtu_tests[] = { + /* no route-specific MTUs */ + { 68, 0, 0, 0, 68 }, + { 100, 0, 0, 0, 100 }, + /* one route MTU (smaller than dev mtu), others unrestricted */ + { 100, 68, 0, 0, 68 }, + { 100, 0, 68, 0, 68 }, + { 100, 0, 0, 68, 68 }, + /* smallest applied, regardless of order */ + { 100, 99, 98, 68, 68 }, + { 99, 100, 98, 68, 68 }, + { 98, 99, 100, 68, 68 }, + { 68, 98, 99, 100, 68 }, +}; + +KUNIT_ARRAY_PARAM(mctp_route_gw_mtu, mctp_route_gw_mtu_tests, + mctp_route_gw_mtu_to_desc); + +static void mctp_test_route_gw_mtu(struct kunit *test) +{ + const struct mctp_route_gw_mtu_test *mtus = test->param_value; + struct mctp_test_route *rt1, *rt2, *rt3; + struct mctp_dst dst = { 0 }; + struct mctp_test_dev *dev; + struct mctp_dev *mdev; + unsigned int netid; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + dev->ndev->mtu = mtus->dev; + mdev = dev->mdev; + netid = mdev->net; + + /* 8 (local) -> 11 (dst) via 10 (gw) via 9 (neigh) */ + rt1 = mctp_test_create_route_direct(&init_net, mdev, 9, mtus->neigh); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + + rt2 = mctp_test_create_route_gw(&init_net, netid, 10, 9, mtus->gw); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + rt3 = mctp_test_create_route_gw(&init_net, netid, 11, 10, mtus->dst); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt3); + + rc = mctp_route_lookup(&init_net, dev->mdev->net, 11, &dst); + KUNIT_EXPECT_EQ(test, rc, 0); + KUNIT_EXPECT_EQ(test, dst.mtu, mtus->exp); + + mctp_dst_release(&dst); + + mctp_test_route_destroy(test, rt3); + mctp_test_route_destroy(test, rt2); + mctp_test_route_destroy(test, rt1); + mctp_test_destroy_dev(dev); +} + +#define MCTP_TEST_LLADDR_LEN 2 +struct mctp_test_llhdr { + unsigned int magic; + unsigned char src[MCTP_TEST_LLADDR_LEN]; + unsigned char dst[MCTP_TEST_LLADDR_LEN]; +}; + +static const unsigned int mctp_test_llhdr_magic = 0x5c78339c; + +static int test_dev_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct kunit *test = current->kunit_test; + struct mctp_test_llhdr *hdr; + + hdr = skb_push(skb, sizeof(*hdr)); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, hdr); + skb_reset_mac_header(skb); + + hdr->magic = mctp_test_llhdr_magic; + memcpy(&hdr->src, saddr, sizeof(hdr->src)); + memcpy(&hdr->dst, daddr, sizeof(hdr->dst)); + + return 0; +} + +/* Test the dst_output path for a gateway-routed skb: we should have it + * lookup the nexthop EID in the neighbour table, and call into + * header_ops->create to resolve that to a lladdr. Our mock header_ops->create + * will just set a synthetic link-layer header, which we check after transmit. + */ +static void mctp_test_route_gw_output(struct kunit *test) +{ + const unsigned char haddr_self[MCTP_TEST_LLADDR_LEN] = { 0xaa, 0x03 }; + const unsigned char haddr_peer[MCTP_TEST_LLADDR_LEN] = { 0xaa, 0x02 }; + const struct header_ops ops = { + .create = test_dev_header_create, + }; + struct mctp_neigh neigh = { 0 }; + struct mctp_test_llhdr *ll_hdr; + struct mctp_dst dst = { 0 }; + struct mctp_hdr hdr = { 0 }; + struct mctp_test_dev *dev; + struct sk_buff *skb; + unsigned char *buf; + int i, rc; + + dev = mctp_test_create_dev_lladdr(sizeof(haddr_self), haddr_self); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + dev->ndev->header_ops = &ops; + + dst.dev = dev->mdev; + __mctp_dev_get(dst.dev->dev); + dst.mtu = 68; + dst.nexthop = 9; + + /* simple mctp_neigh_add for the gateway (not dest!) endpoint */ + INIT_LIST_HEAD(&neigh.list); + neigh.dev = dev->mdev; + mctp_dev_hold(dev->mdev); + neigh.eid = 9; + neigh.source = MCTP_NEIGH_STATIC; + memcpy(neigh.ha, haddr_peer, sizeof(haddr_peer)); + list_add_rcu(&neigh.list, &init_net.mctp.neighbours); + + hdr.ver = 1; + hdr.src = 8; + hdr.dest = 10; + hdr.flags_seq_tag = FL_S | FL_E | FL_TO; + + /* construct enough for a future link-layer header, the provided + * mctp header, and 4 bytes of data + */ + skb = alloc_skb(sizeof(*ll_hdr) + sizeof(hdr) + 4, GFP_KERNEL); + skb->dev = dev->ndev; + __mctp_cb(skb); + + skb_reserve(skb, sizeof(*ll_hdr)); + + memcpy(skb_put(skb, sizeof(hdr)), &hdr, sizeof(hdr)); + buf = skb_put(skb, 4); + for (i = 0; i < 4; i++) + buf[i] = i; + + /* extra ref over the dev_xmit */ + skb_get(skb); + + rc = mctp_dst_output(&dst, skb); + KUNIT_EXPECT_EQ(test, rc, 0); + + mctp_dst_release(&dst); + list_del_rcu(&neigh.list); + mctp_dev_put(dev->mdev); + + /* check that we have our header created with the correct neighbour */ + ll_hdr = (void *)skb_mac_header(skb); + KUNIT_EXPECT_EQ(test, ll_hdr->magic, mctp_test_llhdr_magic); + KUNIT_EXPECT_MEMEQ(test, ll_hdr->src, haddr_self, sizeof(haddr_self)); + KUNIT_EXPECT_MEMEQ(test, ll_hdr->dst, haddr_peer, sizeof(haddr_peer)); + kfree_skb(skb); +} + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -1253,11 +1426,16 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), KUNIT_CASE(mctp_test_route_input_cloned_frag), + KUNIT_CASE(mctp_test_route_extaddr_input), + KUNIT_CASE(mctp_test_route_gw_lookup), + KUNIT_CASE(mctp_test_route_gw_loop), + KUNIT_CASE_PARAM(mctp_test_route_gw_mtu, mctp_route_gw_mtu_gen_params), + KUNIT_CASE(mctp_test_route_gw_output), {} }; static struct kunit_suite mctp_test_suite = { - .name = "mctp", + .name = "mctp-route", .test_cases = mctp_test_cases, }; diff --git a/net/mctp/test/sock-test.c b/net/mctp/test/sock-test.c new file mode 100644 index 000000000000..4eb3a724dca3 --- /dev/null +++ b/net/mctp/test/sock-test.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <kunit/static_stub.h> +#include <kunit/test.h> + +#include <linux/socket.h> +#include <linux/spinlock.h> + +#include "utils.h" + +static const u8 dev_default_lladdr[] = { 0x01, 0x02 }; + +/* helper for simple sock setup: single device, with dev_default_lladdr as its + * hardware address, assigned with a local EID 8, and a route to EID 9 + */ +static void __mctp_sock_test_init(struct kunit *test, + struct mctp_test_dev **devp, + struct mctp_test_route **rtp, + struct socket **sockp) +{ + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + unsigned long flags; + u8 *addrs; + int rc; + + dev = mctp_test_create_dev_lladdr(sizeof(dev_default_lladdr), + dev_default_lladdr); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + addrs = kmalloc(1, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, addrs); + addrs[0] = 8; + + spin_lock_irqsave(&dev->mdev->addrs_lock, flags); + dev->mdev->num_addrs = 1; + swap(addrs, dev->mdev->addrs); + spin_unlock_irqrestore(&dev->mdev->addrs_lock, flags); + + kfree(addrs); + + rt = mctp_test_create_route_direct(dev_net(dev->ndev), dev->mdev, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + *devp = dev; + *rtp = rt; + *sockp = sock; +} + +static void __mctp_sock_test_fini(struct kunit *test, + struct mctp_test_dev *dev, + struct mctp_test_route *rt, + struct socket *sock) +{ + sock_release(sock); + mctp_test_route_destroy(test, rt); + mctp_test_destroy_dev(dev); +} + +struct mctp_test_sock_local_output_config { + struct mctp_test_dev *dev; + size_t halen; + u8 haddr[MAX_ADDR_LEN]; + bool invoked; + int rc; +}; + +static int mctp_test_sock_local_output(struct sock *sk, + struct mctp_dst *dst, + struct sk_buff *skb, + mctp_eid_t daddr, u8 req_tag) +{ + struct kunit *test = kunit_get_current_test(); + struct mctp_test_sock_local_output_config *cfg = test->priv; + + KUNIT_EXPECT_PTR_EQ(test, dst->dev, cfg->dev->mdev); + KUNIT_EXPECT_EQ(test, dst->halen, cfg->halen); + KUNIT_EXPECT_MEMEQ(test, dst->haddr, cfg->haddr, dst->halen); + + cfg->invoked = true; + + kfree_skb(skb); + + return cfg->rc; +} + +static void mctp_test_sock_sendmsg_extaddr(struct kunit *test) +{ + struct sockaddr_mctp_ext addr = { + .smctp_base = { + .smctp_family = AF_MCTP, + .smctp_tag = MCTP_TAG_OWNER, + .smctp_network = MCTP_NET_ANY, + }, + }; + struct mctp_test_sock_local_output_config cfg = { 0 }; + u8 haddr[] = { 0xaa, 0x01 }; + u8 buf[4] = { 0, 1, 2, 3 }; + struct mctp_test_route *rt; + struct msghdr msg = { 0 }; + struct mctp_test_dev *dev; + struct mctp_sock *msk; + struct socket *sock; + ssize_t send_len; + struct kvec vec = { + .iov_base = buf, + .iov_len = sizeof(buf), + }; + + __mctp_sock_test_init(test, &dev, &rt, &sock); + + /* Expect to see the dst configured up with the addressing data we + * provide in the struct sockaddr_mctp_ext + */ + cfg.dev = dev; + cfg.halen = sizeof(haddr); + memcpy(cfg.haddr, haddr, sizeof(haddr)); + + test->priv = &cfg; + + kunit_activate_static_stub(test, mctp_local_output, + mctp_test_sock_local_output); + + /* enable and configure direct addressing */ + msk = container_of(sock->sk, struct mctp_sock, sk); + msk->addr_ext = true; + + addr.smctp_ifindex = dev->ndev->ifindex; + addr.smctp_halen = sizeof(haddr); + memcpy(addr.smctp_haddr, haddr, sizeof(haddr)); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &vec, 1, sizeof(buf)); + send_len = mctp_sendmsg(sock, &msg, sizeof(buf)); + KUNIT_EXPECT_EQ(test, send_len, sizeof(buf)); + KUNIT_EXPECT_TRUE(test, cfg.invoked); + + __mctp_sock_test_fini(test, dev, rt, sock); +} + +static void mctp_test_sock_recvmsg_extaddr(struct kunit *test) +{ + struct sockaddr_mctp_ext recv_addr = { 0 }; + u8 rcv_buf[1], rcv_data[] = { 0, 1 }; + u8 haddr[] = { 0xaa, 0x02 }; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct mctp_skb_cb *cb; + struct mctp_sock *msk; + struct sk_buff *skb; + struct mctp_hdr hdr; + struct socket *sock; + struct msghdr msg; + ssize_t recv_len; + int rc; + struct kvec vec = { + .iov_base = rcv_buf, + .iov_len = sizeof(rcv_buf), + }; + + __mctp_sock_test_init(test, &dev, &rt, &sock); + + /* enable extended addressing on recv */ + msk = container_of(sock->sk, struct mctp_sock, sk); + msk->addr_ext = true; + + /* base incoming header, using a nul-EID dest */ + hdr.ver = 1; + hdr.dest = 0; + hdr.src = 9; + hdr.flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | + MCTP_HDR_FLAG_TO; + + skb = mctp_test_create_skb_data(&hdr, &rcv_data); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + mctp_test_skb_set_dev(skb, dev); + + /* set incoming extended address data */ + cb = mctp_cb(skb); + cb->halen = sizeof(haddr); + cb->ifindex = dev->ndev->ifindex; + memcpy(cb->haddr, haddr, sizeof(haddr)); + + /* Deliver to socket. The route input path pulls the network header, + * leaving skb data at type byte onwards. recvmsg will consume the + * type for addr.smctp_type + */ + skb_pull(skb, sizeof(hdr)); + rc = sock_queue_rcv_skb(sock->sk, skb); + KUNIT_ASSERT_EQ(test, rc, 0); + + msg.msg_name = &recv_addr; + msg.msg_namelen = sizeof(recv_addr); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, sizeof(rcv_buf)); + + recv_len = mctp_recvmsg(sock, &msg, sizeof(rcv_buf), + MSG_DONTWAIT | MSG_TRUNC); + + KUNIT_EXPECT_EQ(test, recv_len, sizeof(rcv_buf)); + + /* expect our extended address to be populated from hdr and cb */ + KUNIT_EXPECT_EQ(test, msg.msg_namelen, sizeof(recv_addr)); + KUNIT_EXPECT_EQ(test, recv_addr.smctp_base.smctp_family, AF_MCTP); + KUNIT_EXPECT_EQ(test, recv_addr.smctp_ifindex, dev->ndev->ifindex); + KUNIT_EXPECT_EQ(test, recv_addr.smctp_halen, sizeof(haddr)); + KUNIT_EXPECT_MEMEQ(test, recv_addr.smctp_haddr, haddr, sizeof(haddr)); + + __mctp_sock_test_fini(test, dev, rt, sock); +} + +static struct kunit_case mctp_test_cases[] = { + KUNIT_CASE(mctp_test_sock_sendmsg_extaddr), + KUNIT_CASE(mctp_test_sock_recvmsg_extaddr), + {} +}; + +static struct kunit_suite mctp_test_suite = { + .name = "mctp-sock", + .test_cases = mctp_test_cases, +}; + +kunit_test_suite(mctp_test_suite); diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 565763eb0211..01f5af416b81 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -26,19 +26,22 @@ static void mctp_test_dev_setup(struct net_device *ndev) ndev->type = ARPHRD_MCTP; ndev->mtu = MCTP_DEV_TEST_MTU; ndev->hard_header_len = 0; - ndev->addr_len = 0; ndev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; ndev->flags = IFF_NOARP; ndev->netdev_ops = &mctp_test_netdev_ops; ndev->needs_free_netdev = true; } -struct mctp_test_dev *mctp_test_create_dev(void) +static struct mctp_test_dev *__mctp_test_create_dev(unsigned short lladdr_len, + const unsigned char *lladdr) { struct mctp_test_dev *dev; struct net_device *ndev; int rc; + if (WARN_ON(lladdr_len > MAX_ADDR_LEN)) + return NULL; + ndev = alloc_netdev(sizeof(*dev), "mctptest%d", NET_NAME_ENUM, mctp_test_dev_setup); if (!ndev) @@ -46,6 +49,8 @@ struct mctp_test_dev *mctp_test_create_dev(void) dev = netdev_priv(ndev); dev->ndev = ndev; + ndev->addr_len = lladdr_len; + dev_addr_set(ndev, lladdr); rc = register_netdev(ndev); if (rc) { @@ -61,8 +66,195 @@ struct mctp_test_dev *mctp_test_create_dev(void) return dev; } +struct mctp_test_dev *mctp_test_create_dev(void) +{ + return __mctp_test_create_dev(0, NULL); +} + +struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, + const unsigned char *lladdr) +{ + return __mctp_test_create_dev(lladdr_len, lladdr); +} + void mctp_test_destroy_dev(struct mctp_test_dev *dev) { mctp_dev_put(dev->mdev); unregister_netdev(dev->ndev); } + +static const unsigned int test_pktqueue_magic = 0x5f713aef; + +void mctp_test_pktqueue_init(struct mctp_test_pktqueue *tpq) +{ + tpq->magic = test_pktqueue_magic; + skb_queue_head_init(&tpq->pkts); +} + +static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb) +{ + struct kunit *test = current->kunit_test; + struct mctp_test_pktqueue *tpq = test->priv; + + KUNIT_ASSERT_EQ(test, tpq->magic, test_pktqueue_magic); + + skb_queue_tail(&tpq->pkts, skb); + + return 0; +} + +/* local version of mctp_route_alloc() */ +static struct mctp_test_route *mctp_route_test_alloc(void) +{ + struct mctp_test_route *rt; + + rt = kzalloc(sizeof(*rt), GFP_KERNEL); + if (!rt) + return NULL; + + INIT_LIST_HEAD(&rt->rt.list); + refcount_set(&rt->rt.refs, 1); + rt->rt.output = mctp_test_dst_output; + + return rt; +} + +struct mctp_test_route *mctp_test_create_route_direct(struct net *net, + struct mctp_dev *dev, + mctp_eid_t eid, + unsigned int mtu) +{ + struct mctp_test_route *rt; + + rt = mctp_route_test_alloc(); + if (!rt) + return NULL; + + rt->rt.min = eid; + rt->rt.max = eid; + rt->rt.mtu = mtu; + rt->rt.type = RTN_UNSPEC; + rt->rt.dst_type = MCTP_ROUTE_DIRECT; + if (dev) + mctp_dev_hold(dev); + rt->rt.dev = dev; + + list_add_rcu(&rt->rt.list, &net->mctp.routes); + + return rt; +} + +struct mctp_test_route *mctp_test_create_route_gw(struct net *net, + unsigned int netid, + mctp_eid_t eid, + mctp_eid_t gw, + unsigned int mtu) +{ + struct mctp_test_route *rt; + + rt = mctp_route_test_alloc(); + if (!rt) + return NULL; + + rt->rt.min = eid; + rt->rt.max = eid; + rt->rt.mtu = mtu; + rt->rt.type = RTN_UNSPEC; + rt->rt.dst_type = MCTP_ROUTE_GATEWAY; + rt->rt.gateway.eid = gw; + rt->rt.gateway.net = netid; + + list_add_rcu(&rt->rt.list, &net->mctp.routes); + + return rt; +} + +/* Convenience function for our test dst; release with mctp_test_dst_release() + */ +void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, + struct mctp_test_dev *dev, + struct mctp_test_pktqueue *tpq, unsigned int mtu) +{ + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, dev); + + memset(dst, 0, sizeof(*dst)); + + dst->dev = dev->mdev; + __mctp_dev_get(dst->dev->dev); + dst->mtu = mtu; + dst->output = mctp_test_dst_output; + mctp_test_pktqueue_init(tpq); + test->priv = tpq; +} + +void mctp_test_dst_release(struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq) +{ + mctp_dst_release(dst); + skb_queue_purge(&tpq->pkts); +} + +void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt) +{ + unsigned int refs; + + rtnl_lock(); + list_del_rcu(&rt->rt.list); + rtnl_unlock(); + + if (rt->rt.dst_type == MCTP_ROUTE_DIRECT && rt->rt.dev) + mctp_dev_put(rt->rt.dev); + + refs = refcount_read(&rt->rt.refs); + KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance"); + + kfree_rcu(&rt->rt, rcu); +} + +void mctp_test_skb_set_dev(struct sk_buff *skb, struct mctp_test_dev *dev) +{ + struct mctp_skb_cb *cb; + + cb = mctp_cb(skb); + cb->net = READ_ONCE(dev->mdev->net); + skb->dev = dev->ndev; +} + +struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, + unsigned int data_len) +{ + size_t hdr_len = sizeof(*hdr); + struct sk_buff *skb; + unsigned int i; + u8 *buf; + + skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); + if (!skb) + return NULL; + + __mctp_cb(skb); + memcpy(skb_put(skb, hdr_len), hdr, hdr_len); + + buf = skb_put(skb, data_len); + for (i = 0; i < data_len; i++) + buf[i] = i & 0xff; + + return skb; +} + +struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, + const void *data, size_t data_len) +{ + size_t hdr_len = sizeof(*hdr); + struct sk_buff *skb; + + skb = alloc_skb(hdr_len + data_len, GFP_KERNEL); + if (!skb) + return NULL; + + __mctp_cb(skb); + memcpy(skb_put(skb, hdr_len), hdr, hdr_len); + memcpy(skb_put(skb, data_len), data, data_len); + + return skb; +} diff --git a/net/mctp/test/utils.h b/net/mctp/test/utils.h index df6aa1c03440..f10d1d9066cc 100644 --- a/net/mctp/test/utils.h +++ b/net/mctp/test/utils.h @@ -3,6 +3,11 @@ #ifndef __NET_MCTP_TEST_UTILS_H #define __NET_MCTP_TEST_UTILS_H +#include <uapi/linux/netdevice.h> + +#include <net/mctp.h> +#include <net/mctpdevice.h> + #include <kunit/test.h> #define MCTP_DEV_TEST_MTU 68 @@ -10,11 +15,50 @@ struct mctp_test_dev { struct net_device *ndev; struct mctp_dev *mdev; + + unsigned short lladdr_len; + unsigned char lladdr[MAX_ADDR_LEN]; }; struct mctp_test_dev; +struct mctp_test_route { + struct mctp_route rt; +}; + +struct mctp_test_pktqueue { + unsigned int magic; + struct sk_buff_head pkts; +}; + struct mctp_test_dev *mctp_test_create_dev(void); +struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, + const unsigned char *lladdr); void mctp_test_destroy_dev(struct mctp_test_dev *dev); +struct mctp_test_route *mctp_test_create_route_direct(struct net *net, + struct mctp_dev *dev, + mctp_eid_t eid, + unsigned int mtu); +struct mctp_test_route *mctp_test_create_route_gw(struct net *net, + unsigned int netid, + mctp_eid_t eid, + mctp_eid_t gw, + unsigned int mtu); +void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, + struct mctp_test_dev *dev, + struct mctp_test_pktqueue *tpq, unsigned int mtu); +void mctp_test_dst_release(struct mctp_dst *dst, + struct mctp_test_pktqueue *tpq); +void mctp_test_pktqueue_init(struct mctp_test_pktqueue *tpq); +void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt); +void mctp_test_skb_set_dev(struct sk_buff *skb, struct mctp_test_dev *dev); +struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr, + unsigned int data_len); +struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr, + const void *data, size_t data_len); + +#define mctp_test_create_skb_data(h, d) \ + __mctp_test_create_skb_data(h, d, sizeof(*d)) + #endif /* __NET_MCTP_TEST_UTILS_H */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index edf14c2c2062..5f904fc5ac4c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> +#include <net/aligned_data.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -3503,7 +3504,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); sk_set_socket(sk, parent); - sk->sk_uid = SOCK_INODE(parent)->i_uid; + WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); write_unlock_bh(&sk->sk_callback_lock); } @@ -3729,7 +3730,7 @@ static struct proto mptcp_prot = { .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, - .memory_allocated = &tcp_memory_allocated, + .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index e76c6de0c784..adee6dcabdc3 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -110,7 +110,7 @@ struct ncsi_channel_version { u8 update; /* NCSI version update */ char alpha1; /* NCSI version alpha1 */ char alpha2; /* NCSI version alpha2 */ - u8 fw_name[12]; /* Firmware name string */ + u8 fw_name[12 + 1]; /* Firmware name string */ u32 fw_version; /* Firmware version */ u16 pci_ids[4]; /* PCI identification */ u32 mf_id; /* Manufacture ID */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 472cc68ad86f..271ec6c3929e 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -775,6 +775,7 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr) ncv->alpha1 = rsp->alpha1; ncv->alpha2 = rsp->alpha2; memcpy(ncv->fw_name, rsp->fw_name, 12); + ncv->fw_name[12] = '\0'; ncv->fw_version = ntohl(rsp->fw_version); for (i = 0; i < ARRAY_SIZE(ncv->pci_ids); i++) ncv->pci_ids[i] = ntohs(rsp->pci_ids[i]); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2560416218d0..ba60b48d7567 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -195,16 +195,6 @@ config NF_CONNTRACK_LABELS config NF_CONNTRACK_OVS bool -config NF_CT_PROTO_DCCP - bool 'DCCP protocol connection tracking support' - depends on NETFILTER_ADVANCED - default y - help - With this option enabled, the layer 3 independent connection - tracking code will be able to do state tracking on DCCP connections. - - If unsure, say Y. - config NF_CT_PROTO_GRE bool @@ -516,6 +506,12 @@ config NFT_CT This option adds the "ct" expression that you can use to match connection tracking information such as the flow state. +config NFT_EXTHDR_DCCP + bool "Netfilter nf_tables exthdr DCCP support (DEPRECATED)" + default n + help + This option adds support for matching on DCCP extension headers. + config NFT_FLOW_OFFLOAD depends on NF_CONNTRACK && NF_FLOW_TABLE tristate "Netfilter nf_tables hardware flow offload module" @@ -1278,9 +1274,9 @@ config NETFILTER_XT_MATCH_CPU To compile it as a module, choose M here. If unsure, say N. config NETFILTER_XT_MATCH_DCCP - tristate '"dccp" protocol match support' + tristate '"dccp" protocol match support (DEPRECATED)' depends on NETFILTER_ADVANCED - default IP_DCCP + default n help With this option enabled, you will be able to use the iptables `dccp' match in order to match on DCCP source/destination ports diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f0aa4d7ef499..e43e20f529f8 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -12,7 +12,6 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o -nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o ifeq ($(CONFIG_NF_CONNTRACK),m) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 014f07740369..95af252b2939 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -97,7 +97,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest) if (!dest_dst) return NULL; dst = dest_dst->dst_cache; - if (dst->obsolete && + if (READ_ONCE(dst->obsolete) && dst->ops->check(dst, dest_dst->dst_cookie) == NULL) return NULL; return dest_dst; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 201d3c4ec623..1097f26a6788 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -329,9 +329,6 @@ nf_ct_get_tuple(const struct sk_buff *skb, #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - case IPPROTO_DCCP: -#endif /* fallthrough */ return nf_ct_get_tuple_ports(skb, dataoff, tuple); default: @@ -1982,11 +1979,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, return nf_conntrack_sctp_packet(ct, skb, dataoff, ctinfo, state); #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - case IPPROTO_DCCP: - return nf_conntrack_dccp_packet(ct, skb, dataoff, - ctinfo, state); -#endif #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return nf_conntrack_gre_packet(ct, skb, dataoff, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 2cc0fde23344..486d52b45fe5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2036,7 +2036,6 @@ static void ctnetlink_change_mark(struct nf_conn *ct, static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { [CTA_PROTOINFO_TCP] = { .type = NLA_NESTED }, - [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED }, [CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index f36727ed91e1..bc1d96686b9c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -100,9 +100,6 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) case IPPROTO_UDP: return &nf_conntrack_l4proto_udp; case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp; case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp; -#ifdef CONFIG_NF_CT_PROTO_DCCP - case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp; -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp; #endif @@ -681,9 +678,6 @@ void nf_conntrack_proto_pernet_init(struct net *net) #if IS_ENABLED(CONFIG_IPV6) nf_conntrack_icmpv6_init_net(net); #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - nf_conntrack_dccp_init_net(net); -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP nf_conntrack_sctp_init_net(net); #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c deleted file mode 100644 index ebc4f733bb2e..000000000000 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ /dev/null @@ -1,826 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * DCCP connection tracking protocol helper - * - * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/sysctl.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/dccp.h> -#include <linux/slab.h> - -#include <net/net_namespace.h> -#include <net/netns/generic.h> - -#include <linux/netfilter/nfnetlink_conntrack.h> -#include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_l4proto.h> -#include <net/netfilter/nf_conntrack_ecache.h> -#include <net/netfilter/nf_conntrack_timeout.h> -#include <net/netfilter/nf_log.h> - -/* Timeouts are based on values from RFC4340: - * - * - REQUEST: - * - * 8.1.2. Client Request - * - * A client MAY give up on its DCCP-Requests after some time - * (3 minutes, for example). - * - * - RESPOND: - * - * 8.1.3. Server Response - * - * It MAY also leave the RESPOND state for CLOSED after a timeout of - * not less than 4MSL (8 minutes); - * - * - PARTOPEN: - * - * 8.1.5. Handshake Completion - * - * If the client remains in PARTOPEN for more than 4MSL (8 minutes), - * it SHOULD reset the connection with Reset Code 2, "Aborted". - * - * - OPEN: - * - * The DCCP timestamp overflows after 11.9 hours. If the connection - * stays idle this long the sequence number won't be recognized - * as valid anymore. - * - * - CLOSEREQ/CLOSING: - * - * 8.3. Termination - * - * The retransmission timer should initially be set to go off in two - * round-trip times and should back off to not less than once every - * 64 seconds ... - * - * - TIMEWAIT: - * - * 4.3. States - * - * A server or client socket remains in this state for 2MSL (4 minutes) - * after the connection has been town down, ... - */ - -#define DCCP_MSL (2 * 60 * HZ) - -#ifdef CONFIG_NF_CONNTRACK_PROCFS -static const char * const dccp_state_names[] = { - [CT_DCCP_NONE] = "NONE", - [CT_DCCP_REQUEST] = "REQUEST", - [CT_DCCP_RESPOND] = "RESPOND", - [CT_DCCP_PARTOPEN] = "PARTOPEN", - [CT_DCCP_OPEN] = "OPEN", - [CT_DCCP_CLOSEREQ] = "CLOSEREQ", - [CT_DCCP_CLOSING] = "CLOSING", - [CT_DCCP_TIMEWAIT] = "TIMEWAIT", - [CT_DCCP_IGNORE] = "IGNORE", - [CT_DCCP_INVALID] = "INVALID", -}; -#endif - -#define sNO CT_DCCP_NONE -#define sRQ CT_DCCP_REQUEST -#define sRS CT_DCCP_RESPOND -#define sPO CT_DCCP_PARTOPEN -#define sOP CT_DCCP_OPEN -#define sCR CT_DCCP_CLOSEREQ -#define sCG CT_DCCP_CLOSING -#define sTW CT_DCCP_TIMEWAIT -#define sIG CT_DCCP_IGNORE -#define sIV CT_DCCP_INVALID - -/* - * DCCP state transition table - * - * The assumption is the same as for TCP tracking: - * - * We are the man in the middle. All the packets go through us but might - * get lost in transit to the destination. It is assumed that the destination - * can't receive segments we haven't seen. - * - * The following states exist: - * - * NONE: Initial state, expecting Request - * REQUEST: Request seen, waiting for Response from server - * RESPOND: Response from server seen, waiting for Ack from client - * PARTOPEN: Ack after Response seen, waiting for packet other than Response, - * Reset or Sync from server - * OPEN: Packet other than Response, Reset or Sync seen - * CLOSEREQ: CloseReq from server seen, expecting Close from client - * CLOSING: Close seen, expecting Reset - * TIMEWAIT: Reset seen - * IGNORE: Not determinable whether packet is valid - * - * Some states exist only on one side of the connection: REQUEST, RESPOND, - * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to - * the one it was in before. - * - * Packets are marked as ignored (sIG) if we don't know if they're valid - * (for example a reincarnation of a connection we didn't notice is dead - * already) and the server may send back a connection closing Reset or a - * Response. They're also used for Sync/SyncAck packets, which we don't - * care about. - */ -static const u_int8_t -dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = { - [CT_DCCP_ROLE_CLIENT] = { - [DCCP_PKT_REQUEST] = { - /* - * sNO -> sRQ Regular Request - * sRQ -> sRQ Retransmitted Request or reincarnation - * sRS -> sRS Retransmitted Request (apparently Response - * got lost after we saw it) or reincarnation - * sPO -> sIG Ignore, conntrack might be out of sync - * sOP -> sIG Ignore, conntrack might be out of sync - * sCR -> sIG Ignore, conntrack might be out of sync - * sCG -> sIG Ignore, conntrack might be out of sync - * sTW -> sRQ Reincarnation - * - * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */ - sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ, - }, - [DCCP_PKT_RESPONSE] = { - /* - * sNO -> sIV Invalid - * sRQ -> sIG Ignore, might be response to ignored Request - * sRS -> sIG Ignore, might be response to ignored Request - * sPO -> sIG Ignore, might be response to ignored Request - * sOP -> sIG Ignore, might be response to ignored Request - * sCR -> sIG Ignore, might be response to ignored Request - * sCG -> sIG Ignore, might be response to ignored Request - * sTW -> sIV Invalid, reincarnation in reverse direction - * goes through sRQ - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV, - }, - [DCCP_PKT_ACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) - * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN - * sOP -> sOP Regular ACK, remain in OPEN - * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.) - * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV - }, - [DCCP_PKT_DATA] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.) - * sOP -> sOP Regular Data packet - * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.) - * sCG -> sCG Data in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV, - }, - [DCCP_PKT_DATAACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) - * sPO -> sPO Remain in PARTOPEN state - * sOP -> sOP Regular DataAck packet in OPEN state - * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.) - * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV - }, - [DCCP_PKT_CLOSEREQ] = { - /* - * CLOSEREQ may only be sent by the server. - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV - }, - [DCCP_PKT_CLOSE] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sCG Client-initiated close - * sOP -> sCG Client-initiated close - * sCR -> sCG Close in response to CloseReq (8.3.) - * sCG -> sCG Retransmit - * sTW -> sIV Late retransmit, already in TIME_WAIT - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV - }, - [DCCP_PKT_RESET] = { - /* - * sNO -> sIV No connection - * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.) - * sRS -> sTW Response received without Request - * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.) - * sOP -> sTW Connection reset - * sCR -> sTW Connection reset - * sCG -> sTW Connection reset - * sTW -> sIG Ignore (don't refresh timer) - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG - }, - [DCCP_PKT_SYNC] = { - /* - * We currently ignore Sync packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - [DCCP_PKT_SYNCACK] = { - /* - * We currently ignore SyncAck packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - }, - [CT_DCCP_ROLE_SERVER] = { - [DCCP_PKT_REQUEST] = { - /* - * sNO -> sIV Invalid - * sRQ -> sIG Ignore, conntrack might be out of sync - * sRS -> sIG Ignore, conntrack might be out of sync - * sPO -> sIG Ignore, conntrack might be out of sync - * sOP -> sIG Ignore, conntrack might be out of sync - * sCR -> sIG Ignore, conntrack might be out of sync - * sCG -> sIG Ignore, conntrack might be out of sync - * sTW -> sRQ Reincarnation, must reverse roles - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ - }, - [DCCP_PKT_RESPONSE] = { - /* - * sNO -> sIV Response without Request - * sRQ -> sRS Response to clients Request - * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT) - * sPO -> sIG Response to an ignored Request or late retransmit - * sOP -> sIG Ignore, might be response to ignored Request - * sCR -> sIG Ignore, might be response to ignored Request - * sCG -> sIG Ignore, might be response to ignored Request - * sTW -> sIV Invalid, Request from client in sTW moves to sRQ - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV - }, - [DCCP_PKT_ACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP Enter OPEN state (8.1.5.) - * sOP -> sOP Regular Ack in OPEN state - * sCR -> sIV Waiting for Close from client - * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV - }, - [DCCP_PKT_DATA] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP Enter OPEN state (8.1.5.) - * sOP -> sOP Regular Data packet in OPEN state - * sCR -> sIV Waiting for Close from client - * sCG -> sCG Data in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV - }, - [DCCP_PKT_DATAACK] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP Enter OPEN state (8.1.5.) - * sOP -> sOP Regular DataAck in OPEN state - * sCR -> sIV Waiting for Close from client - * sCG -> sCG Data in CLOSING MAY be processed (8.3.) - * sTW -> sIV - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV - }, - [DCCP_PKT_CLOSEREQ] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.) - * sOP -> sCR CloseReq in OPEN state - * sCR -> sCR Retransmit - * sCG -> sCR Simultaneous close, client sends another Close - * sTW -> sIV Already closed - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV - }, - [DCCP_PKT_CLOSE] = { - /* - * sNO -> sIV No connection - * sRQ -> sIV No connection - * sRS -> sIV No connection - * sPO -> sOP -> sCG Move direcly to CLOSING - * sOP -> sCG Move to CLOSING - * sCR -> sIV Close after CloseReq is invalid - * sCG -> sCG Retransmit - * sTW -> sIV Already closed - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV - }, - [DCCP_PKT_RESET] = { - /* - * sNO -> sIV No connection - * sRQ -> sTW Reset in response to Request - * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.) - * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.) - * sOP -> sTW - * sCR -> sTW - * sCG -> sTW - * sTW -> sIG Ignore (don't refresh timer) - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */ - sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG - }, - [DCCP_PKT_SYNC] = { - /* - * We currently ignore Sync packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - [DCCP_PKT_SYNCACK] = { - /* - * We currently ignore SyncAck packets - * - * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ - sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, - }, - }, -}; - -static noinline bool -dccp_new(struct nf_conn *ct, const struct sk_buff *skb, - const struct dccp_hdr *dh, - const struct nf_hook_state *hook_state) -{ - struct net *net = nf_ct_net(ct); - struct nf_dccp_net *dn; - const char *msg; - u_int8_t state; - - state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; - switch (state) { - default: - dn = nf_dccp_pernet(net); - if (dn->dccp_loose == 0) { - msg = "not picking up existing connection "; - goto out_invalid; - } - break; - case CT_DCCP_REQUEST: - break; - case CT_DCCP_INVALID: - msg = "invalid state transition "; - goto out_invalid; - } - - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; - ct->proto.dccp.state = CT_DCCP_NONE; - ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; - ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; - ct->proto.dccp.handshake_seq = 0; - return true; - -out_invalid: - nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg); - return false; -} - -static u64 dccp_ack_seq(const struct dccp_hdr *dh) -{ - const struct dccp_hdr_ack_bits *dhack; - - dhack = (void *)dh + __dccp_basic_hdr_len(dh); - return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + - ntohl(dhack->dccph_ack_nr_low); -} - -static bool dccp_error(const struct dccp_hdr *dh, - struct sk_buff *skb, unsigned int dataoff, - const struct nf_hook_state *state) -{ - static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST | - 1 << DCCP_PKT_RESPONSE | - 1 << DCCP_PKT_CLOSEREQ | - 1 << DCCP_PKT_CLOSE | - 1 << DCCP_PKT_RESET | - 1 << DCCP_PKT_SYNC | - 1 << DCCP_PKT_SYNCACK; - unsigned int dccp_len = skb->len - dataoff; - unsigned int cscov; - const char *msg; - u8 type; - - BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG); - - if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || - dh->dccph_doff * 4 > dccp_len) { - msg = "nf_ct_dccp: truncated/malformed packet "; - goto out_invalid; - } - - cscov = dccp_len; - if (dh->dccph_cscov) { - cscov = (dh->dccph_cscov - 1) * 4; - if (cscov > dccp_len) { - msg = "nf_ct_dccp: bad checksum coverage "; - goto out_invalid; - } - } - - if (state->hook == NF_INET_PRE_ROUTING && - state->net->ct.sysctl_checksum && - nf_checksum_partial(skb, state->hook, dataoff, cscov, - IPPROTO_DCCP, state->pf)) { - msg = "nf_ct_dccp: bad checksum "; - goto out_invalid; - } - - type = dh->dccph_type; - if (type >= DCCP_PKT_INVALID) { - msg = "nf_ct_dccp: reserved packet type "; - goto out_invalid; - } - - if (test_bit(type, &require_seq48) && !dh->dccph_x) { - msg = "nf_ct_dccp: type lacks 48bit sequence numbers"; - goto out_invalid; - } - - return false; -out_invalid: - nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg); - return true; -} - -struct nf_conntrack_dccp_buf { - struct dccp_hdr dh; /* generic header part */ - struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */ - union { /* depends on header type */ - struct dccp_hdr_ack_bits ack; - struct dccp_hdr_request req; - struct dccp_hdr_response response; - struct dccp_hdr_reset rst; - } u; -}; - -static struct dccp_hdr * -dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh, - struct nf_conntrack_dccp_buf *buf) -{ - unsigned int hdrlen = __dccp_hdr_len(dh); - - if (hdrlen > sizeof(*buf)) - return NULL; - - return skb_header_pointer(skb, offset, hdrlen, buf); -} - -int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct nf_conntrack_dccp_buf _dh; - u_int8_t type, old_state, new_state; - enum ct_dccp_roles role; - unsigned int *timeouts; - struct dccp_hdr *dh; - - dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh); - if (!dh) - return -NF_ACCEPT; - - if (dccp_error(dh, skb, dataoff, state)) - return -NF_ACCEPT; - - /* pull again, including possible 48 bit sequences and subtype header */ - dh = dccp_header_pointer(skb, dataoff, dh, &_dh); - if (!dh) - return -NF_ACCEPT; - - type = dh->dccph_type; - if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state)) - return -NF_ACCEPT; - - if (type == DCCP_PKT_RESET && - !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - /* Tear down connection immediately if only reply is a RESET */ - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_ACCEPT; - } - - spin_lock_bh(&ct->lock); - - role = ct->proto.dccp.role[dir]; - old_state = ct->proto.dccp.state; - new_state = dccp_state_table[role][type][old_state]; - - switch (new_state) { - case CT_DCCP_REQUEST: - if (old_state == CT_DCCP_TIMEWAIT && - role == CT_DCCP_ROLE_SERVER) { - /* Reincarnation in the reverse direction: reopen and - * reverse client/server roles. */ - ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; - } - break; - case CT_DCCP_RESPOND: - if (old_state == CT_DCCP_REQUEST) - ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); - break; - case CT_DCCP_PARTOPEN: - if (old_state == CT_DCCP_RESPOND && - type == DCCP_PKT_ACK && - dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) - set_bit(IPS_ASSURED_BIT, &ct->status); - break; - case CT_DCCP_IGNORE: - /* - * Connection tracking might be out of sync, so we ignore - * packets that might establish a new connection and resync - * if the server responds with a valid Response. - */ - if (ct->proto.dccp.last_dir == !dir && - ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && - type == DCCP_PKT_RESPONSE) { - ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; - ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); - new_state = CT_DCCP_RESPOND; - break; - } - ct->proto.dccp.last_dir = dir; - ct->proto.dccp.last_pkt = type; - - spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet"); - return NF_ACCEPT; - case CT_DCCP_INVALID: - spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition"); - return -NF_ACCEPT; - } - - ct->proto.dccp.last_dir = dir; - ct->proto.dccp.last_pkt = type; - ct->proto.dccp.state = new_state; - spin_unlock_bh(&ct->lock); - - if (new_state != old_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, ct); - - timeouts = nf_ct_timeout_lookup(ct); - if (!timeouts) - timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout; - nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); - - return NF_ACCEPT; -} - -static bool dccp_can_early_drop(const struct nf_conn *ct) -{ - switch (ct->proto.dccp.state) { - case CT_DCCP_CLOSEREQ: - case CT_DCCP_CLOSING: - case CT_DCCP_TIMEWAIT: - return true; - default: - break; - } - - return false; -} - -#ifdef CONFIG_NF_CONNTRACK_PROCFS -static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) -{ - seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); -} -#endif - -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) -static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - struct nf_conn *ct, bool destroy) -{ - struct nlattr *nest_parms; - - spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP); - if (!nest_parms) - goto nla_put_failure; - if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state)) - goto nla_put_failure; - - if (destroy) - goto skip_state; - - if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || - nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, - cpu_to_be64(ct->proto.dccp.handshake_seq), - CTA_PROTOINFO_DCCP_PAD)) - goto nla_put_failure; -skip_state: - nla_nest_end(skb, nest_parms); - spin_unlock_bh(&ct->lock); - - return 0; - -nla_put_failure: - spin_unlock_bh(&ct->lock); - return -1; -} - -static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { - [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, - [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, - [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, -}; - -#define DCCP_NLATTR_SIZE ( \ - NLA_ALIGN(NLA_HDRLEN + 1) + \ - NLA_ALIGN(NLA_HDRLEN + 1) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \ - NLA_ALIGN(NLA_HDRLEN + 0)) - -static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) -{ - struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; - struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1]; - int err; - - if (!attr) - return 0; - - err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr, - dccp_nla_policy, NULL); - if (err < 0) - return err; - - if (!tb[CTA_PROTOINFO_DCCP_STATE] || - !tb[CTA_PROTOINFO_DCCP_ROLE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { - return -EINVAL; - } - - spin_lock_bh(&ct->lock); - ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); - if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; - ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; - } else { - ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; - ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; - } - if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { - ct->proto.dccp.handshake_seq = - be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); - } - spin_unlock_bh(&ct->lock); - return 0; -} -#endif - -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - -#include <linux/netfilter/nfnetlink.h> -#include <linux/netfilter/nfnetlink_cttimeout.h> - -static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], - struct net *net, void *data) -{ - struct nf_dccp_net *dn = nf_dccp_pernet(net); - unsigned int *timeouts = data; - int i; - - if (!timeouts) - timeouts = dn->dccp_timeout; - - /* set default DCCP timeouts. */ - for (i=0; i<CT_DCCP_MAX; i++) - timeouts[i] = dn->dccp_timeout[i]; - - /* there's a 1:1 mapping between attributes and protocol states. */ - for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { - if (tb[i]) { - timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; - } - } - - timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST]; - return 0; -} - -static int -dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) -{ - const unsigned int *timeouts = data; - int i; - - for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { - if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) - goto nla_put_failure; - } - return 0; - -nla_put_failure: - return -ENOSPC; -} - -static const struct nla_policy -dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { - [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 }, - [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 }, -}; -#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - -void nf_conntrack_dccp_init_net(struct net *net) -{ - struct nf_dccp_net *dn = nf_dccp_pernet(net); - - /* default values */ - dn->dccp_loose = 1; - dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; - - /* timeouts[0] is unused, make it same as SYN_SENT so - * ->timeouts[0] contains 'new' timeout, like udp or icmp. - */ - dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; -} - -const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { - .l4proto = IPPROTO_DCCP, - .can_early_drop = dccp_can_early_drop, -#ifdef CONFIG_NF_CONNTRACK_PROCFS - .print_conntrack = dccp_print_conntrack, -#endif -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .nlattr_size = DCCP_NLATTR_SIZE, - .to_nlattr = dccp_to_nlattr, - .from_nlattr = nlattr_to_dccp, - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nla_policy = nf_ct_port_nla_policy, -#endif -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - .ctnl_timeout = { - .nlattr_to_obj = dccp_timeout_nlattr_to_obj, - .obj_to_nlattr = dccp_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_DCCP_MAX, - .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, - .nla_policy = dccp_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -}; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6c4cff10357d..829f60496008 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -67,11 +67,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ntohs(tuple->dst.u.udp.port)); break; - case IPPROTO_DCCP: - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.dccp.port), - ntohs(tuple->dst.u.dccp.port)); - break; case IPPROTO_SCTP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.sctp.port), @@ -279,7 +274,6 @@ static const char* l4proto_name(u16 proto) case IPPROTO_ICMP: return "icmp"; case IPPROTO_TCP: return "tcp"; case IPPROTO_UDP: return "udp"; - case IPPROTO_DCCP: return "dccp"; case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; case IPPROTO_UDPLITE: return "udplite"; @@ -612,16 +606,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING, - NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT, - NF_SYSCTL_CT_PROTO_DCCP_LOOSE, -#endif #ifdef CONFIG_NF_CT_PROTO_GRE NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, @@ -895,58 +879,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, #endif -#ifdef CONFIG_NF_CT_PROTO_DCCP - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { - .procname = "nf_conntrack_dccp_timeout_request", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = { - .procname = "nf_conntrack_dccp_timeout_respond", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = { - .procname = "nf_conntrack_dccp_timeout_partopen", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = { - .procname = "nf_conntrack_dccp_timeout_open", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = { - .procname = "nf_conntrack_dccp_timeout_closereq", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = { - .procname = "nf_conntrack_dccp_timeout_closing", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = { - .procname = "nf_conntrack_dccp_timeout_timewait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = { - .procname = "nf_conntrack_dccp_loose", - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = proc_dou8vec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_NF_CT_PROTO_GRE [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = { .procname = "nf_conntrack_gre_timeout", @@ -1032,29 +964,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, #endif } -static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net, - struct ctl_table *table) -{ -#ifdef CONFIG_NF_CT_PROTO_DCCP - struct nf_dccp_net *dn = nf_dccp_pernet(net); - -#define XASSIGN(XNAME, dn) \ - table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \ - &(dn)->dccp_timeout[CT_DCCP_ ## XNAME] - - XASSIGN(REQUEST, dn); - XASSIGN(RESPOND, dn); - XASSIGN(PARTOPEN, dn); - XASSIGN(OPEN, dn); - XASSIGN(CLOSEREQ, dn); - XASSIGN(CLOSING, dn); - XASSIGN(TIMEWAIT, dn); -#undef XASSIGN - - table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose; -#endif -} - static void nf_conntrack_standalone_init_gre_sysctl(struct net *net, struct ctl_table *table) { @@ -1100,7 +1009,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) nf_conntrack_standalone_init_tcp_sysctl(net, table); nf_conntrack_standalone_init_sctp_sysctl(net, table); - nf_conntrack_standalone_init_dccp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); /* Don't allow non-init_net ns to alter global sysctls */ diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index f391cd267922..78a61dac4ade 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -69,7 +69,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_dport = t->dst.u.all; } @@ -81,7 +80,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_sport = t->src.u.all; } @@ -102,7 +100,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_dport = t->dst.u.all; } @@ -114,7 +111,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_sport = t->src.u.all; } @@ -432,7 +428,6 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: - case IPPROTO_DCCP: case IPPROTO_SCTP: if (maniptype == NF_NAT_MANIP_SRC) port = tuple->src.u.all; @@ -632,7 +627,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: - case IPPROTO_DCCP: if (maniptype == NF_NAT_MANIP_SRC) keyptr = &tuple->src.u.all; else diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index dc450cc81222..b14a434b9561 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -180,46 +180,6 @@ tcp_manip_pkt(struct sk_buff *skb, } static bool -dccp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ -#ifdef CONFIG_NF_CT_PROTO_DCCP - struct dccp_hdr *hdr; - __be16 *portptr, oldport, newport; - int hdrsize = 8; /* DCCP connection tracking guarantees this much */ - - if (skb->len >= hdroff + sizeof(struct dccp_hdr)) - hdrsize = sizeof(struct dccp_hdr); - - if (skb_ensure_writable(skb, hdroff + hdrsize)) - return false; - - hdr = (struct dccp_hdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - newport = tuple->src.u.dccp.port; - portptr = &hdr->dccph_sport; - } else { - newport = tuple->dst.u.dccp.port; - portptr = &hdr->dccph_dport; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); - inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - false); -#endif - return true; -} - -static bool icmp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, @@ -338,9 +298,6 @@ static bool l4proto_manip_pkt(struct sk_buff *skb, case IPPROTO_ICMPV6: return icmpv6_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); - case IPPROTO_DCCP: - return dccp_manip_pkt(skb, iphdroff, hdroff, - tuple, maniptype); case IPPROTO_GRE: return gre_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24c71ecb2179..620824a56a55 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1153,9 +1153,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; @@ -1165,7 +1165,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, NFTA_TABLE_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELTABLE) { + if (event == NFT_MSG_DELTABLE || + event == NFT_MSG_DESTROYTABLE) { nlmsg_end(skb, nlh); return 0; } @@ -2016,9 +2017,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; @@ -2028,7 +2029,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, NFTA_CHAIN_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELCHAIN && !hook_list) { + if (!hook_list && + (event == NFT_MSG_DELCHAIN || + event == NFT_MSG_DESTROYCHAIN)) { nlmsg_end(skb, nlh); return 0; } @@ -4039,7 +4042,7 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) /* can only be used if rule is no longer visible to dumps */ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - lockdep_commit_lock_is_held(ctx->net); + WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net)); nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -4845,9 +4848,10 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq = ctx->seq; int i; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family, - NFNETLINK_V0, nft_base_seq(ctx->net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, ctx->family, NFNETLINK_V0, + nft_base_seq(ctx->net)); if (!nlh) goto nla_put_failure; @@ -4859,7 +4863,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, NFTA_SET_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELSET) { + if (event == NFT_MSG_DELSET || + event == NFT_MSG_DESTROYSET) { nlmsg_end(skb, nlh); return 0; } @@ -5859,7 +5864,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { - lockdep_commit_lock_is_held(ctx->net); + WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net)); switch (phase) { case NFT_TRANS_PREPARE_ERROR: @@ -8338,25 +8343,26 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || + nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle), NFTA_OBJ_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELOBJ) { + if (event == NFT_MSG_DELOBJ || + event == NFT_MSG_DESTROYOBJ) { nlmsg_end(skb, nlh); return 0; } - if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || - nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || + if (nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) goto nla_put_failure; @@ -9382,9 +9388,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, struct nft_hook *hook; struct nlmsghdr *nlh; - event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nfnl_msg_put(skb, portid, seq, event, flags, family, - NFNETLINK_V0, nft_base_seq(net)); + nlh = nfnl_msg_put(skb, portid, seq, + nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event), + flags, family, NFNETLINK_V0, nft_base_seq(net)); if (!nlh) goto nla_put_failure; @@ -9394,7 +9400,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELFLOWTABLE && !hook_list) { + if (!hook_list && + (event == NFT_MSG_DELFLOWTABLE || + event == NFT_MSG_DESTROYFLOWTABLE)) { nlmsg_end(skb, nlh); return 0; } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index eab4f476b47f..38d75484e531 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -461,11 +461,6 @@ static int cttimeout_default_get(struct sk_buff *skb, case IPPROTO_UDPLITE: timeouts = nf_udp_pernet(info->net)->timeouts; break; - case IPPROTO_DCCP: -#ifdef CONFIG_NF_CT_PROTO_DCCP - timeouts = nf_dccp_pernet(info->net)->dccp_timeout; -#endif - break; case IPPROTO_ICMPV6: timeouts = &nf_icmpv6_pernet(info->net)->timeout; break; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index c74012c99125..7eedf4e3ae9c 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -407,6 +407,7 @@ err: regs->verdict.code = NFT_BREAK; } +#ifdef CONFIG_NFT_EXTHDR_DCCP static void nft_exthdr_dccp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -482,6 +483,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr, err: *dest = 0; } +#endif static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, @@ -634,6 +636,7 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, return 0; } +#ifdef CONFIG_NFT_EXTHDR_DCCP static int nft_exthdr_dccp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -649,6 +652,7 @@ static int nft_exthdr_dccp_init(const struct nft_ctx *ctx, return 0; } +#endif static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) { @@ -779,6 +783,7 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = { .reduce = nft_exthdr_reduce, }; +#ifdef CONFIG_NFT_EXTHDR_DCCP static const struct nft_expr_ops nft_exthdr_dccp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), @@ -787,6 +792,7 @@ static const struct nft_expr_ops nft_exthdr_dccp_ops = { .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; +#endif static const struct nft_expr_ops * nft_exthdr_select_ops(const struct nft_ctx *ctx, @@ -822,10 +828,12 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_sctp_ops; break; +#ifdef CONFIG_NFT_EXTHDR_DCCP case NFT_EXTHDR_OP_DCCP: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_dccp_ops; break; +#endif } return ERR_PTR(-EOPNOTSUPP); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6332a0e06596..1ce6c7b75735 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2473,7 +2473,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, unsigned int flags = 0; size_t tlvlen; - /* Error messages get the original request appened, unless the user + /* Error messages get the original request appended, unless the user * requests to cap the error message, and get extra error data if * requested. */ diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 6a40b8d0350d..a18e2c503da6 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1192,7 +1192,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) continue; uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]); - if (uri == NULL || *uri == 0) + if (*uri == 0) continue; tid = local->sdreq_next_tid++; @@ -1540,10 +1540,6 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) } apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); - if (!apdu) { - rc = -EINVAL; - goto put_dev; - } ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); if (!ctx) { diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 3add108340bf..2832e0794197 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -941,8 +941,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - if (dp->user_features & - OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + if (OVS_CB(skb)->upcall_pid) + upcall.portid = OVS_CB(skb)->upcall_pid; + else if (dp->user_features & + OVS_DP_F_DISPATCH_UPCALL_PER_CPU) upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b990dc83504f..d5b6e2002bc1 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -267,7 +267,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; - if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) + if (OVS_CB(skb)->upcall_pid) + upcall.portid = OVS_CB(skb)->upcall_pid; + else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU) upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id()); else @@ -651,6 +653,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) !!(hash & OVS_PACKET_HASH_L4_BIT)); } + OVS_CB(packet)->upcall_pid = + nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0); + /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); @@ -719,6 +724,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, + [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 }, }; static const struct genl_small_ops dp_packet_genl_ops[] = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index cfeb817a1889..db0c3e69d66c 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -121,6 +121,8 @@ struct datapath { * @cutlen: The number of bytes from the packet end to be removed. * @probability: The sampling probability that was applied to this skb; 0 means * no sampling has occurred; U32_MAX means 100% probability. + * @upcall_pid: Netlink socket PID to use for sending this packet to userspace; + * 0 means "not set" and default per-CPU or per-vport dispatch should be used. */ struct ovs_skb_cb { struct vport *input_vport; @@ -128,6 +130,7 @@ struct ovs_skb_cb { u16 acts_origlen; u32 cutlen; u32 probability; + u32 upcall_pid; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 8732f6e51ae5..6bbbc16ab778 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -501,6 +501,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; OVS_CB(skb)->probability = 0; + OVS_CB(skb)->upcall_pid = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3d43f3eae759..f6b1ff883c93 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4783,7 +4783,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) READ_ONCE(po->ifindex), packet_sock_flag(po, PACKET_SOCK_RUNNING), atomic_read(&s->sk_rmem_alloc), - from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), + from_kuid_munged(seq_user_ns(seq), sk_uid(s)), sock_i_ino(s)); } diff --git a/net/packet/diag.c b/net/packet/diag.c index 47f69f3dbf73..6ce1dcc284d9 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -153,7 +153,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, if ((req->pdiag_show & PACKET_SHOW_INFO) && nla_put_u32(skb, PACKET_DIAG_UID, - from_kuid_munged(user_ns, sock_i_uid(sk)))) + from_kuid_munged(user_ns, sk_uid(sk)))) goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_MCLIST) && diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 5ce0b3ee5def..ea4d5e6533db 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -584,7 +584,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) sk->sk_protocol, pn->sobject, pn->dobject, pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sk)), sock_i_ino(sk), refcount_read(&sk->sk_refcnt), sk, atomic_read(&sk->sk_drops)); @@ -755,7 +755,7 @@ static int pn_res_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%02X %5u %lu", (int) (psk - pnres.sk), - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sk)), sock_i_ino(sk)); } seq_pad(seq, '\n'); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 8435a20968ef..086a13170e09 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -598,7 +598,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, } if (addr_type & IPV6_ADDR_LINKLOCAL) { - /* If socket is arleady bound to a link local address, + /* If socket is already bound to a link local address, * the peer address must be on the same link. */ if (sin6->sin6_scope_id == 0 || diff --git a/net/rds/send.c b/net/rds/send.c index 09a280110654..42d991bc8543 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -232,7 +232,7 @@ restart: * If not already working on one, grab the next message. * * cp_xmit_rm holds a ref while we're sending this message down - * the connction. We can use this ref while holding the + * the connection. We can use this ref while holding the * send_sem.. rds_send_reset() is serialized with it. */ if (!rm) { diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d89bd8d0c354..b5c801c629a4 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -298,15 +298,15 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) sin6 = (struct sockaddr_in6 *)&ss; sin6->sin6_family = PF_INET6; sin6->sin6_addr = in6addr_any; - sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT); + sin6->sin6_port = htons(RDS_TCP_PORT); sin6->sin6_scope_id = 0; sin6->sin6_flowinfo = 0; addr_len = sizeof(*sin6); } else { sin = (struct sockaddr_in *)&ss; sin->sin_family = PF_INET; - sin->sin_addr.s_addr = INADDR_ANY; - sin->sin_port = (__force u16)htons(RDS_TCP_PORT); + sin->sin_addr.s_addr = htonl(INADDR_ANY); + sin->sin_port = htons(RDS_TCP_PORT); addr_len = sizeof(*sin); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 057e20cef375..9e468e463467 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -933,18 +933,25 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo) { struct idr *idr = &idrinfo->action_idr; + bool mutex_taken = false; struct tc_action *p; - int ret; unsigned long id = 1; unsigned long tmp; + int ret; idr_for_each_entry_ul(idr, p, tmp, id) { + if (tc_act_in_hw(p) && !mutex_taken) { + rtnl_lock(); + mutex_taken = true; + } ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return; } + if (mutex_taken) + rtnl_unlock(); idr_destroy(&idrinfo->action_idr); } EXPORT_SYMBOL(tcf_idrinfo_destroy); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 420c66203b17..6b3d0af72c39 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -108,7 +108,7 @@ static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) struct text_match *tm = EM_TEXT_PRIV(m); struct tcf_em_text conf; - strncpy(conf.algo, tm->config->ops->name, sizeof(conf.algo) - 1); + strscpy(conf.algo, tm->config->ops->name); conf.from_offset = tm->from_offset; conf.to_offset = tm->to_offset; conf.from_layer = tm->from_layer; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 48dd8c88903f..dbcfb948c867 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1407,7 +1407,10 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) return cake_calc_overhead(q, len, off); /* borrowed from qdisc_pkt_len_init() */ - hdr_len = skb_transport_offset(skb); + if (!skb->encapsulation) + hdr_len = skb_transport_offset(skb); + else + hdr_len = skb_inner_transport_offset(skb); /* + transport layer */ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | diff --git a/net/sctp/input.c b/net/sctp/input.c index 0c0d2757f6f8..2dc2666988fb 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -756,7 +756,7 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk2 = ep2->base.sk; if (!net_eq(sock_net(sk2), net) || sk2 == sk || - !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) || + !uid_eq(sk_uid(sk2), sk_uid(sk)) || !sk2->sk_reuseport) continue; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a9ed2ccab1bd..3336dcfb4515 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -261,9 +261,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_set_inner_ipproto(skb, IPPROTO_SCTP); label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6); - return udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, - &fl6->daddr, tclass, ip6_dst_hoplimit(dst), - label, sctp_sk(sk)->udp_port, t->encap_port, false); + udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr, + tclass, ip6_dst_hoplimit(dst), label, + sctp_sk(sk)->udp_port, t->encap_port, false, 0); + return 0; } /* Returns the dst cache entry for the given source and destination ip diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ec00ee75d59a..74bff317e205 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -177,7 +177,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, ep->base.bind_addr.port, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sk)), sock_i_ino(sk)); sctp_seq_dump_local_addrs(seq, &ep->base); @@ -267,7 +267,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc->assoc_id, assoc->sndbuf_used, atomic_read(&assoc->rmem_alloc), - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sk)), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f402f90eb6b6..a5ccada55f2b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1103,7 +1103,8 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_set_inner_ipproto(skb, IPPROTO_SCTP); udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr, fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, - sctp_sk(sk)->udp_port, t->encap_port, false, false); + sctp_sk(sk)->udp_port, t->encap_port, false, false, + 0); return 0; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1e5739858c20..4921416434f9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8345,8 +8345,8 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) bool reuse = (sk->sk_reuse || sp->reuse); struct sctp_bind_hashbucket *head; /* hash list */ struct net *net = sock_net(sk); - kuid_t uid = sock_i_uid(sk); struct sctp_bind_bucket *pp; + kuid_t uid = sk_uid(sk); unsigned short snum; int ret; @@ -8444,7 +8444,7 @@ pp_found: (reuse && (sk2->sk_reuse || sp2->reuse) && sk2->sk_state != SCTP_SS_LISTENING) || (sk->sk_reuseport && sk2->sk_reuseport && - uid_eq(uid, sock_i_uid(sk2)))) + uid_eq(uid, sk_uid(sk2)))) continue; if ((!sk->sk_bound_dev_if || !bound_dev_if2 || @@ -9492,8 +9492,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_sndbuf = sk->sk_sndbuf; newsk->sk_rcvbuf = sk->sk_rcvbuf; newsk->sk_lingertime = sk->sk_lingertime; - newsk->sk_rcvtimeo = sk->sk_rcvtimeo; - newsk->sk_sndtimeo = sk->sk_sndtimeo; + newsk->sk_rcvtimeo = READ_ONCE(sk->sk_rcvtimeo); + newsk->sk_sndtimeo = READ_ONCE(sk->sk_sndtimeo); newsk->sk_rxhash = sk->sk_rxhash; newinet = inet_sk(newsk); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 6946c1462793..4d258a6e8033 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -240,7 +240,7 @@ void sctp_transport_set_owner(struct sctp_transport *transport, void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ - if (!transport->dst || transport->dst->obsolete) { + if (!transport->dst || READ_ONCE(transport->dst->obsolete)) { sctp_transport_dst_release(transport); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3760131f1484..bdbaad17f980 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -486,8 +486,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, { /* options we don't get control via setsockopt for */ nsk->sk_type = osk->sk_type; - nsk->sk_sndtimeo = osk->sk_sndtimeo; - nsk->sk_rcvtimeo = osk->sk_rcvtimeo; + nsk->sk_sndtimeo = READ_ONCE(osk->sk_sndtimeo); + nsk->sk_rcvtimeo = READ_ONCE(osk->sk_rcvtimeo); nsk->sk_mark = READ_ONCE(osk->sk_mark); nsk->sk_priority = READ_ONCE(osk->sk_priority); nsk->sk_rcvlowat = osk->sk_rcvlowat; @@ -1585,7 +1585,7 @@ static void smc_connect_work(struct work_struct *work) { struct smc_sock *smc = container_of(work, struct smc_sock, connect_work); - long timeo = smc->sk.sk_sndtimeo; + long timeo = READ_ONCE(smc->sk.sk_sndtimeo); int rc = 0; if (!timeo) @@ -2735,8 +2735,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock, if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) { /* wait till data arrives on the socket */ - timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * - MSEC_PER_SEC); + timeo = secs_to_jiffies(lsmc->sockopt_defer_accept); if (smc_sk(nsk)->use_fallback) { struct sock *clcsk = smc_sk(nsk)->clcsock->sk; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 521f5df80e10..5a4db151fe95 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -688,7 +688,7 @@ out: int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type, unsigned long timeout) { - long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo; + long rcvtimeo = READ_ONCE(smc->clcsock->sk->sk_rcvtimeo); struct sock *clc_sk = smc->clcsock->sk; struct smc_clc_msg_hdr *clcm = buf; struct msghdr msg = {NULL, 0}; @@ -707,7 +707,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, * sizeof(struct smc_clc_msg_hdr) */ krflags = MSG_PEEK | MSG_WAITALL; - clc_sk->sk_rcvtimeo = timeout; + WRITE_ONCE(clc_sk->sk_rcvtimeo, timeout); iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, sizeof(struct smc_clc_msg_hdr)); len = sock_recvmsg(smc->clcsock, &msg, krflags); @@ -795,7 +795,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } out: - clc_sk->sk_rcvtimeo = rcvtimeo; + WRITE_ONCE(clc_sk->sk_rcvtimeo, rcvtimeo); return reason_code; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index ac07b963aede..262746e304dd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -2100,8 +2100,7 @@ int smc_uncompress_bufsize(u8 compressed) /* try to reuse a sndbuf or rmb description slot for a certain * buffer size; if not available, return NULL */ -static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, - struct rw_semaphore *lock, +static struct smc_buf_desc *smc_buf_get_slot(struct rw_semaphore *lock, struct list_head *buf_list) { struct smc_buf_desc *buf_slot; @@ -2442,7 +2441,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) bufsize = smc_uncompress_bufsize(bufsize_comp); /* check for reusable slot in the link group */ - buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list); + buf_desc = smc_buf_get_slot(lock, buf_list); if (buf_desc) { buf_desc->is_dma_need_sync = 0; SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 6fdb2d96777a..8ed2f6689b01 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -64,7 +64,7 @@ static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown)) return 1; - r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + r->diag_uid = from_kuid_munged(user_ns, sk_uid(sk)); r->diag_inode = sock_i_ino(sk); return 0; } diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c index 3c5f64ca4115..0eb00bbefd17 100644 --- a/net/smc/smc_loopback.c +++ b/net/smc/smc_loopback.c @@ -251,11 +251,6 @@ static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, return 0; } -static int smc_lo_supports_v2(void) -{ - return SMC_LO_V2_CAPABLE; -} - static void smc_lo_get_local_gid(struct smcd_dev *smcd, struct smcd_gid *smcd_gid) { @@ -288,7 +283,6 @@ static const struct smcd_ops lo_ops = { .reset_vlan_required = NULL, .signal_event = NULL, .move_data = smc_lo_move_data, - .supports_v2 = smc_lo_supports_v2, .get_local_gid = smc_lo_get_local_gid, .get_chid = smc_lo_get_chid, .get_dev = smc_lo_get_dev, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index b391c2ef463f..76ad29e31d60 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -370,7 +370,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, goto out_put; new_pe->type = SMC_PNET_ETH; memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); - strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); + strscpy(new_pe->eth_name, eth_name); rc = -EEXIST; new_netdev = true; mutex_lock(&pnettable->lock); diff --git a/net/socket.c b/net/socket.c index 9a0e720f0859..682969deaed3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -592,10 +592,12 @@ static int sockfs_setattr(struct mnt_idmap *idmap, if (!err && (iattr->ia_valid & ATTR_UID)) { struct socket *sock = SOCKET_I(d_inode(dentry)); - if (sock->sk) - sock->sk->sk_uid = iattr->ia_uid; - else + if (sock->sk) { + /* Paired with READ_ONCE() in sk_uid() */ + WRITE_ONCE(sock->sk->sk_uid, iattr->ia_uid); + } else { err = -ENOENT; + } } return err; @@ -843,6 +845,52 @@ static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb, sizeof(ts_pktinfo), &ts_pktinfo); } +bool skb_has_tx_timestamp(struct sk_buff *skb, const struct sock *sk) +{ + const struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + u32 tsflags = READ_ONCE(sk->sk_tsflags); + + if (serr->ee.ee_errno != ENOMSG || + serr->ee.ee_origin != SO_EE_ORIGIN_TIMESTAMPING) + return false; + + /* software time stamp available and wanted */ + if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) && skb->tstamp) + return true; + /* hardware time stamps available and wanted */ + return (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && + skb_hwtstamps(skb)->hwtstamp; +} + +int skb_get_tx_timestamp(struct sk_buff *skb, struct sock *sk, + struct timespec64 *ts) +{ + u32 tsflags = READ_ONCE(sk->sk_tsflags); + ktime_t hwtstamp; + int if_index = 0; + + if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec64_cond(skb->tstamp, ts)) + return SOF_TIMESTAMPING_TX_SOFTWARE; + + if (!(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) || + skb_is_swtx_tstamp(skb, false)) + return -ENOENT; + + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV) + hwtstamp = get_timestamp(sk, skb, &if_index); + else + hwtstamp = skb_hwtstamps(skb)->hwtstamp; + + if (tsflags & SOF_TIMESTAMPING_BIND_PHC) + hwtstamp = ptp_convert_timestamp(&hwtstamp, + READ_ONCE(sk->sk_bind_phc)); + if (!ktime_to_timespec64_cond(hwtstamp, ts)) + return -ENOENT; + + return SOF_TIMESTAMPING_TX_HARDWARE; +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index d946bfb424c7..43b1f558b33d 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -333,7 +333,7 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, struct strparser *strp = (struct strparser *)desc->arg.data; return __strp_recv(desc, orig_skb, orig_offset, orig_len, - strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo); + strp->sk->sk_rcvbuf, READ_ONCE(strp->sk->sk_rcvtimeo)); } static int default_read_sock_done(struct strparser *strp, int err) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7c61d47ea208..e028bf658499 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3642,7 +3642,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || nla_put_u32(skb, TIPC_NLA_SOCK_UID, from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk), - sock_i_uid(sk))) || + sk_uid(sk))) || nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, tipc_diag_gen_cookie(sk), TIPC_NLA_SOCK_PAD)) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 258d6aa4f21a..b85ab0fb3b8c 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -172,7 +172,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, struct udp_media_addr *dst, struct dst_cache *cache) { struct dst_entry *ndst; - int ttl, err = 0; + int ttl, err; local_bh_disable(); ndst = dst_cache_get(cache); @@ -197,7 +197,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->port, - dst->port, false, true); + dst->port, false, true, 0); #if IS_ENABLED(CONFIG_IPV6) } else { if (!ndst) { @@ -217,13 +217,13 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, dst_cache_set_ip6(cache, ndst, &fl6.saddr); } ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, - &src->ipv6, &dst->ipv6, 0, ttl, 0, - src->port, dst->port, false); + udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, + &src->ipv6, &dst->ipv6, 0, ttl, 0, + src->port, dst->port, false, 0); #endif } local_bh_enable(); - return err; + return 0; tx_error: local_bh_enable(); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fc88e34b7f33..549d1ea01a72 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -872,6 +872,19 @@ more_data: delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); delta -= msg->sg.size; + + if ((s32)delta > 0) { + /* It indicates that we executed bpf_msg_pop_data(), + * causing the plaintext data size to decrease. + * Therefore the encrypted data size also needs to + * correspondingly decrease. We only need to subtract + * delta to calculate the new ciphertext length since + * ktls does not support block encryption. + */ + struct sk_msg *enc = &ctx->open_rec->msg_encrypted; + + sk_msg_trim(sk, enc, enc->sg.size - delta); + } } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc && !full_record) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 52b155123985..7a92733706fe 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -934,6 +934,52 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) #define unix_show_fdinfo NULL #endif +static bool unix_custom_sockopt(int optname) +{ + switch (optname) { + case SO_INQ: + return true; + default: + return false; + } +} + +static int unix_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct unix_sock *u = unix_sk(sock->sk); + struct sock *sk = sock->sk; + int val; + + if (level != SOL_SOCKET) + return -EOPNOTSUPP; + + if (!unix_custom_sockopt(optname)) + return sock_setsockopt(sock, level, optname, optval, optlen); + + if (optlen != sizeof(int)) + return -EINVAL; + + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + + switch (optname) { + case SO_INQ: + if (sk->sk_type != SOCK_STREAM) + return -EINVAL; + + if (val > 1 || val < 0) + return -EINVAL; + + WRITE_ONCE(u->recvmsg_inq, val); + break; + default: + return -ENOPROTOOPT; + } + + return 0; +} + static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, @@ -950,6 +996,7 @@ static const struct proto_ops unix_stream_ops = { #endif .listen = unix_listen, .shutdown = unix_shutdown, + .setsockopt = unix_setsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .read_skb = unix_stream_read_skb, @@ -1116,6 +1163,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, switch (sock->type) { case SOCK_STREAM: + set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); sock->ops = &unix_stream_ops; break; /* @@ -1847,6 +1895,9 @@ static int unix_accept(struct socket *sock, struct socket *newsock, skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait); + if (tsk->sk_type == SOCK_STREAM) + set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + /* attach accepted sock to socket */ unix_state_lock(tsk); unix_update_edges(unix_sk(tsk)); @@ -2297,6 +2348,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, spin_lock(&other->sk_receive_queue.lock); WRITE_ONCE(ousk->oob_skb, skb); + WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); @@ -2319,6 +2371,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sock *other = NULL; + struct unix_sock *otheru; struct scm_cookie scm; bool fds_sent = false; int err, sent = 0; @@ -2342,14 +2395,16 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_namelen) { err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; - } else { - other = unix_peer(sk); - if (!other) { - err = -ENOTCONN; - goto out_err; - } } + other = unix_peer(sk); + if (!other) { + err = -ENOTCONN; + goto out_err; + } + + otheru = unix_sk(other); + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto out_pipe; @@ -2388,8 +2443,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { skb->ip_summed = CHECKSUM_UNNECESSARY; - err = skb_splice_from_iter(skb, &msg->msg_iter, size, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, size); if (err < 0) goto out_free; @@ -2418,7 +2472,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); + + spin_lock(&other->sk_receive_queue.lock); + WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); + unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2528,12 +2587,10 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, &err, &timeo, last)); if (!skb) { /* implies iolock unlocked */ - unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && - (sk->sk_shutdown & RCV_SHUTDOWN)) + (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN)) err = 0; - unix_state_unlock(sk); goto out; } @@ -2707,6 +2764,7 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) if (!(state->flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); + WRITE_ONCE(u->inq_len, u->inq_len - 1); if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue && !unix_skb_len(oob_skb->prev)) { @@ -2789,6 +2847,7 @@ unlock: static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { + struct sk_buff_head *queue = &sk->sk_receive_queue; struct unix_sock *u = unix_sk(sk); struct sk_buff *skb; int err; @@ -2796,60 +2855,57 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) return -ENOTCONN; - mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); - mutex_unlock(&u->iolock); - if (!skb) + err = sock_error(sk); + if (err) return err; -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - if (unlikely(skb == READ_ONCE(u->oob_skb))) { - bool drop = false; - - unix_state_lock(sk); + mutex_lock(&u->iolock); + spin_lock(&queue->lock); - if (sock_flag(sk, SOCK_DEAD)) { - unix_state_unlock(sk); - kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE); - return -ECONNRESET; - } + skb = __skb_dequeue(queue); + if (!skb) { + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); + return -EAGAIN; + } - spin_lock(&sk->sk_receive_queue.lock); - if (likely(skb == u->oob_skb)) { - WRITE_ONCE(u->oob_skb, NULL); - drop = true; - } - spin_unlock(&sk->sk_receive_queue.lock); + WRITE_ONCE(u->inq_len, u->inq_len - skb->len); - unix_state_unlock(sk); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb == u->oob_skb) { + WRITE_ONCE(u->oob_skb, NULL); + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); - if (drop) { - kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); - return -EAGAIN; - } + kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB); + return -EAGAIN; } #endif + spin_unlock(&queue->lock); + mutex_unlock(&u->iolock); + return recv_actor(sk, skb); } static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { - struct scm_cookie scm; + int noblock = state->flags & MSG_DONTWAIT; struct socket *sock = state->socket; + struct msghdr *msg = state->msg; struct sock *sk = sock->sk; - struct unix_sock *u = unix_sk(sk); - int copied = 0; + size_t size = state->size; int flags = state->flags; - int noblock = flags & MSG_DONTWAIT; bool check_creds = false; - int target; + struct scm_cookie scm; + unsigned int last_len; + struct unix_sock *u; + int copied = 0; int err = 0; long timeo; + int target; int skip; - size_t size = state->size; - unsigned int last_len; if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { err = -EINVAL; @@ -2869,6 +2925,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, memset(&scm, 0, sizeof(scm)); + u = unix_sk(sk); + /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ @@ -2960,14 +3018,12 @@ unlock: } /* Copy address just once */ - if (state->msg && state->msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, - state->msg->msg_name); - unix_copy_addr(state->msg, skb->sk); + if (msg && msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); - BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, - state->msg->msg_name, - &state->msg->msg_namelen); + unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name, + &msg->msg_namelen); sunaddr = NULL; } @@ -2996,7 +3052,11 @@ unlock: if (unix_skb_len(skb)) break; - skb_unlink(skb, &sk->sk_receive_queue); + spin_lock(&sk->sk_receive_queue.lock); + WRITE_ONCE(u->inq_len, u->inq_len - skb->len); + __skb_unlink(skb, &sk->sk_receive_queue); + spin_unlock(&sk->sk_receive_queue.lock); + consume_skb(skb); if (scm.fp) @@ -3025,10 +3085,17 @@ unlock: } while (size); mutex_unlock(&u->iolock); - if (state->msg) - scm_recv_unix(sock, state->msg, &scm, flags); - else + if (msg) { + scm_recv_unix(sock, msg, &scm, flags); + + if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) { + msg->msg_inq = READ_ONCE(u->inq_len); + put_cmsg(msg, SOL_SOCKET, SCM_INQ, + sizeof(msg->msg_inq), &msg->msg_inq); + } + } else { scm_destroy(&scm); + } out: return copied ? : err; } @@ -3167,9 +3234,11 @@ long unix_inq_len(struct sock *sk) if (READ_ONCE(sk->sk_state) == TCP_LISTEN) return -EINVAL; + if (sk->sk_type == SOCK_STREAM) + return READ_ONCE(unix_sk(sk)->inq_len); + spin_lock(&sk->sk_receive_queue.lock); - if (sk->sk_type == SOCK_STREAM || - sk->sk_type == SOCK_SEQPACKET) { + if (sk->sk_type == SOCK_SEQPACKET) { skb_queue_walk(&sk->sk_receive_queue, skb) amount += unix_skb_len(skb); } else { @@ -3697,7 +3766,7 @@ static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) goto unlock; } - uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); + uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk)); meta.seq = seq; prog = bpf_iter_get_info(&meta, false); ret = unix_prog_seq_show(prog, &meta, v, uid); diff --git a/net/unix/diag.c b/net/unix/diag.c index 79b182d0e62a..ca3473026151 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -106,7 +106,7 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, struct user_namespace *user_ns) { - uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + uid_t uid = from_kuid_munged(user_ns, sk_uid(sk)); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 1053662725f8..218d91e6b32b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1423,6 +1423,28 @@ static int vsock_do_ioctl(struct socket *sock, unsigned int cmd, vsk = vsock_sk(sk); switch (cmd) { + case SIOCINQ: { + ssize_t n_bytes; + + if (!vsk->transport) { + ret = -EOPNOTSUPP; + break; + } + + if (sock_type_connectible(sk->sk_type) && + sk->sk_state == TCP_LISTEN) { + ret = -EINVAL; + break; + } + + n_bytes = vsock_stream_has_data(vsk); + if (n_bytes < 0) { + ret = n_bytes; + break; + } + ret = put_user(n_bytes, arg); + break; + } case SIOCOUTQ: { ssize_t n_bytes; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 31342ab502b4..432fcbbd14d4 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -694,15 +694,26 @@ out: static s64 hvs_stream_has_data(struct vsock_sock *vsk) { struct hvsock *hvs = vsk->trans; + bool need_refill; s64 ret; if (hvs->recv_data_len > 0) - return 1; + return hvs->recv_data_len; switch (hvs_channel_readable_payload(hvs->chan)) { case 1: - ret = 1; - break; + need_refill = !hvs->recv_desc; + if (!need_refill) + return -EIO; + + hvs->recv_desc = hv_pkt_iter_first(hvs->chan); + if (!hvs->recv_desc) + return -ENOBUFS; + + ret = hvs_update_recv_data(hvs); + if (ret) + return ret; + return hvs->recv_data_len; case 0: vsk->peer_shutdown |= SEND_SHUTDOWN; ret = 0; diff --git a/net/wireless/core.c b/net/wireless/core.c index 5c3c72df0591..a7e2931ffb2e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -239,7 +239,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (rdev->scan_req && rdev->scan_req->req.wdev == wdev) { if (WARN_ON(!rdev->scan_req->notified && (!rdev->int_scan_req || !rdev->int_scan_req->notified))) @@ -995,6 +995,24 @@ int wiphy_register(struct wiphy *wiphy) wiphy->max_num_akm_suites > CFG80211_MAX_NUM_AKM_SUITES) return -EINVAL; + /* Allocate radio configuration space for multi-radio wiphy */ + if (wiphy->n_radio > 0) { + int idx; + + wiphy->radio_cfg = kcalloc(wiphy->n_radio, + sizeof(*wiphy->radio_cfg), + GFP_KERNEL); + if (!wiphy->radio_cfg) + return -ENOMEM; + /* + * Initialize wiphy radio parameters to IEEE 802.11 + * MIB default values. RTS threshold is disabled by + * default with the special -1 value. + */ + for (idx = 0; idx < wiphy->n_radio; idx++) + wiphy->radio_cfg[idx].rts_threshold = (u32)-1; + } + /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); @@ -1222,6 +1240,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) void wiphy_free(struct wiphy *wiphy) { + kfree(wiphy->radio_cfg); put_device(&wiphy->dev); } EXPORT_SYMBOL(wiphy_free); @@ -1555,7 +1574,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_DOWN: wiphy_lock(&rdev->wiphy); cfg80211_update_iface_num(rdev, wdev->iftype, -1); - if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (rdev->scan_req && rdev->scan_req->req.wdev == wdev) { if (WARN_ON(!rdev->scan_req->notified && (!rdev->int_scan_req || !rdev->int_scan_req->notified))) diff --git a/net/wireless/core.h b/net/wireless/core.h index c56a35040caa..b6bd7f4d6385 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -21,6 +21,13 @@ #define WIPHY_IDX_INVALID -1 +struct cfg80211_scan_request_int { + struct cfg80211_scan_info info; + bool notified; + /* must be last - variable members */ + struct cfg80211_scan_request req; +}; + struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; @@ -70,8 +77,8 @@ struct cfg80211_registered_device { struct rb_root bss_tree; u32 bss_generation; u32 bss_entries; - struct cfg80211_scan_request *scan_req; /* protected by RTNL */ - struct cfg80211_scan_request *int_scan_req; + struct cfg80211_scan_request_int *scan_req; /* protected by RTNL */ + struct cfg80211_scan_request_int *int_scan_req; struct sk_buff *scan_msg; struct list_head sched_scan_req_list; time64_t suspend_at; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 05d44a443518..bb5bc6ff09d4 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -352,8 +352,25 @@ cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a, return -EINVAL; } - if (ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_a) != - ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_b)) { + /* + * Only verify the values in Extended MLD Capabilities that are + * not reserved when transmitted by an AP (and expected to remain the + * same over time). + * The Recommended Max Simultaneous Links subfield in particular is + * reserved when included in a unicast Probe Response frame and may + * also change when the AP adds/removes links. The BTM MLD + * Recommendation For Multiple APs Support subfield is reserved when + * transmitted by an AP. All other bits are currently reserved. + * See IEEE P802.11be/D7.0, Table 9-417o. + */ + if ((ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_a) & + (IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK)) != + (ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_b) & + (IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE | + IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK))) { NL_SET_ERR_MSG(extack, "extended link MLD capabilities/ops mismatch"); return -EINVAL; @@ -1331,7 +1348,8 @@ void cfg80211_mlo_reconf_add_done(struct net_device *dev, lockdep_assert_wiphy(wiphy); trace_cfg80211_mlo_reconf_add_done(dev, data->added_links, - data->buf, data->len); + data->buf, data->len, + data->driver_initiated); if (WARN_ON(!wdev->valid_links)) return; @@ -1361,11 +1379,16 @@ void cfg80211_mlo_reconf_add_done(struct net_device *dev, wdev->links[link_id].client.current_bss = bss_from_pub(bss); + if (data->driver_initiated) + cfg80211_hold_bss(bss_from_pub(bss)); + memcpy(wdev->links[link_id].addr, data->links[link_id].addr, ETH_ALEN); } else { - cfg80211_unhold_bss(bss_from_pub(bss)); + if (!data->driver_initiated) + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); } } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 50202d170f3a..4e6c0a4e2a82 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -857,6 +857,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 }, [NL80211_ATTR_EPCS] = { .type = NLA_FLAG }, [NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS] = { .type = NLA_U16 }, + [NL80211_ATTR_WIPHY_RADIO_INDEX] = { .type = NLA_U8 }, }; /* policy for the key attributes */ @@ -2449,6 +2450,7 @@ fail: static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx) { const struct wiphy_radio *r = &wiphy->radio[idx]; + const struct wiphy_radio_cfg *rcfg = &wiphy->radio_cfg[idx]; struct nlattr *radio, *freq; int i; @@ -2459,6 +2461,11 @@ static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx) if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx)) goto nla_put_failure; + if (rcfg->rts_threshold && + nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_RTS_THRESHOLD, + rcfg->rts_threshold)) + goto nla_put_failure; + if (r->antenna_mask && nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, r->antenna_mask)) @@ -2642,7 +2649,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, u32 tx_ant = 0, rx_ant = 0; int res; - res = rdev_get_antenna(rdev, &tx_ant, &rx_ant); + res = rdev_get_antenna(rdev, -1, &tx_ant, &rx_ant); if (!res) { if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, @@ -3611,6 +3618,33 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) return __nl80211_set_channel(rdev, netdev, info, link_id); } +static int nl80211_set_wiphy_radio(struct genl_info *info, + struct cfg80211_registered_device *rdev, + int radio_idx) +{ + u32 rts_threshold = 0, old_rts, changed = 0; + int result; + + if (!rdev->ops->set_wiphy_params) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]) { + rts_threshold = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]); + changed |= WIPHY_PARAM_RTS_THRESHOLD; + } + + old_rts = rdev->wiphy.radio_cfg[radio_idx].rts_threshold; + + rdev->wiphy.radio_cfg[radio_idx].rts_threshold = rts_threshold; + + result = rdev_set_wiphy_params(rdev, radio_idx, changed); + if (result) + rdev->wiphy.radio_cfg[radio_idx].rts_threshold = old_rts; + + return 0; +} + static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; @@ -3623,6 +3657,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 frag_threshold = 0, rts_threshold = 0; u8 coverage_class = 0; u32 txq_limit = 0, txq_memory_limit = 0, txq_quantum = 0; + int radio_idx = -1; rtnl_lock(); /* @@ -3673,6 +3708,19 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (result) return result; + if (info->attrs[NL80211_ATTR_WIPHY_RADIO_INDEX]) { + /* Radio idx is not expected for non-multi radio wiphy */ + if (rdev->wiphy.n_radio <= 0) + return -EINVAL; + + radio_idx = nla_get_u8( + info->attrs[NL80211_ATTR_WIPHY_RADIO_INDEX]); + if (radio_idx >= rdev->wiphy.n_radio) + return -EINVAL; + + return nl80211_set_wiphy_radio(info, rdev, radio_idx); + } + if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; @@ -3762,7 +3810,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) mbm = nla_get_u32(info->attrs[idx]); } - result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); + result = rdev_set_tx_power(rdev, txp_wdev, radio_idx, type, + mbm); if (result) return result; } @@ -3788,7 +3837,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; - result = rdev_set_antenna(rdev, tx_ant, rx_ant); + result = rdev_set_antenna(rdev, radio_idx, tx_ant, rx_ant); if (result) return result; } @@ -3882,16 +3931,30 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (changed) { u8 old_retry_short, old_retry_long; u32 old_frag_threshold, old_rts_threshold; - u8 old_coverage_class; + u8 old_coverage_class, i; u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum; + u32 *old_radio_rts_threshold = NULL; if (!rdev->ops->set_wiphy_params) return -EOPNOTSUPP; + if (rdev->wiphy.n_radio) { + old_radio_rts_threshold = kcalloc(rdev->wiphy.n_radio, + sizeof(u32), + GFP_KERNEL); + if (!old_radio_rts_threshold) + return -ENOMEM; + } + old_retry_short = rdev->wiphy.retry_short; old_retry_long = rdev->wiphy.retry_long; old_frag_threshold = rdev->wiphy.frag_threshold; old_rts_threshold = rdev->wiphy.rts_threshold; + if (old_radio_rts_threshold) { + for (i = 0 ; i < rdev->wiphy.n_radio; i++) + old_radio_rts_threshold[i] = + rdev->wiphy.radio_cfg[i].rts_threshold; + } old_coverage_class = rdev->wiphy.coverage_class; old_txq_limit = rdev->wiphy.txq_limit; old_txq_memory_limit = rdev->wiphy.txq_memory_limit; @@ -3903,8 +3966,13 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.retry_long = retry_long; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) rdev->wiphy.frag_threshold = frag_threshold; - if (changed & WIPHY_PARAM_RTS_THRESHOLD) + if ((changed & WIPHY_PARAM_RTS_THRESHOLD) && + old_radio_rts_threshold) { rdev->wiphy.rts_threshold = rts_threshold; + for (i = 0 ; i < rdev->wiphy.n_radio; i++) + rdev->wiphy.radio_cfg[i].rts_threshold = + rdev->wiphy.rts_threshold; + } if (changed & WIPHY_PARAM_COVERAGE_CLASS) rdev->wiphy.coverage_class = coverage_class; if (changed & WIPHY_PARAM_TXQ_LIMIT) @@ -3914,18 +3982,26 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (changed & WIPHY_PARAM_TXQ_QUANTUM) rdev->wiphy.txq_quantum = txq_quantum; - result = rdev_set_wiphy_params(rdev, changed); + result = rdev_set_wiphy_params(rdev, radio_idx, changed); if (result) { rdev->wiphy.retry_short = old_retry_short; rdev->wiphy.retry_long = old_retry_long; rdev->wiphy.frag_threshold = old_frag_threshold; rdev->wiphy.rts_threshold = old_rts_threshold; + if (old_radio_rts_threshold) { + for (i = 0 ; i < rdev->wiphy.n_radio; i++) + rdev->wiphy.radio_cfg[i].rts_threshold = + old_radio_rts_threshold[i]; + } rdev->wiphy.coverage_class = old_coverage_class; rdev->wiphy.txq_limit = old_txq_limit; rdev->wiphy.txq_memory_limit = old_txq_memory_limit; rdev->wiphy.txq_quantum = old_txq_quantum; - return result; } + + if (old_rts_threshold) + kfree(old_radio_rts_threshold); + return result; } return 0; @@ -4015,7 +4091,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (rdev->ops->get_tx_power && !wdev->valid_links) { int dbm, ret; - ret = rdev_get_tx_power(rdev, wdev, 0, &dbm); + ret = rdev_get_tx_power(rdev, wdev, -1, 0, &dbm); if (ret == 0 && nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, DBM_TO_MBM(dbm))) @@ -4087,7 +4163,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (rdev->ops->get_tx_power) { int dbm, ret; - ret = rdev_get_tx_power(rdev, wdev, link_id, &dbm); + ret = rdev_get_tx_power(rdev, wdev, -1, link_id, &dbm); if (ret == 0 && nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, DBM_TO_MBM(dbm))) @@ -6731,6 +6807,185 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, return true; } +static int nl80211_fill_link_station(struct sk_buff *msg, + struct cfg80211_registered_device *rdev, + struct link_station_info *link_sinfo) +{ + struct nlattr *bss_param, *link_sinfoattr; + +#define PUT_LINK_SINFO(attr, memb, type) do { \ + BUILD_BUG_ON(sizeof(type) == sizeof(u64)); \ + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ + nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \ + link_sinfo->memb)) \ + goto nla_put_failure; \ + } while (0) +#define PUT_LINK_SINFO_U64(attr, memb) do { \ + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ + nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr, \ + link_sinfo->memb, NL80211_STA_INFO_PAD)) \ + goto nla_put_failure; \ + } while (0) + + link_sinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_STA_INFO); + if (!link_sinfoattr) + goto nla_put_failure; + + PUT_LINK_SINFO(INACTIVE_TIME, inactive_time, u32); + + if (link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) | + BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) && + nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, + (u32)link_sinfo->rx_bytes)) + goto nla_put_failure; + + if (link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES) | + BIT_ULL(NL80211_STA_INFO_TX_BYTES64)) && + nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, + (u32)link_sinfo->tx_bytes)) + goto nla_put_failure; + + PUT_LINK_SINFO_U64(RX_BYTES64, rx_bytes); + PUT_LINK_SINFO_U64(TX_BYTES64, tx_bytes); + PUT_LINK_SINFO_U64(RX_DURATION, rx_duration); + PUT_LINK_SINFO_U64(TX_DURATION, tx_duration); + + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + PUT_LINK_SINFO(AIRTIME_WEIGHT, airtime_weight, u16); + + switch (rdev->wiphy.signal_type) { + case CFG80211_SIGNAL_TYPE_MBM: + PUT_LINK_SINFO(SIGNAL, signal, u8); + PUT_LINK_SINFO(SIGNAL_AVG, signal_avg, u8); + break; + default: + break; + } + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) { + if (!nl80211_put_signal(msg, link_sinfo->chains, + link_sinfo->chain_signal, + NL80211_STA_INFO_CHAIN_SIGNAL)) + goto nla_put_failure; + } + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) { + if (!nl80211_put_signal(msg, link_sinfo->chains, + link_sinfo->chain_signal_avg, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) + goto nla_put_failure; + } + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) { + if (!nl80211_put_sta_rate(msg, &link_sinfo->txrate, + NL80211_STA_INFO_TX_BITRATE)) + goto nla_put_failure; + } + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) { + if (!nl80211_put_sta_rate(msg, &link_sinfo->rxrate, + NL80211_STA_INFO_RX_BITRATE)) + goto nla_put_failure; + } + + PUT_LINK_SINFO(RX_PACKETS, rx_packets, u32); + PUT_LINK_SINFO(TX_PACKETS, tx_packets, u32); + PUT_LINK_SINFO(TX_RETRIES, tx_retries, u32); + PUT_LINK_SINFO(TX_FAILED, tx_failed, u32); + PUT_LINK_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32); + PUT_LINK_SINFO(BEACON_LOSS, beacon_loss_count, u32); + + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { + bss_param = nla_nest_start_noflag(msg, + NL80211_STA_INFO_BSS_PARAM); + if (!bss_param) + goto nla_put_failure; + + if (((link_sinfo->bss_param.flags & + BSS_PARAM_FLAGS_CTS_PROT) && + nla_put_flag(msg, NL80211_STA_BSS_PARAM_CTS_PROT)) || + ((link_sinfo->bss_param.flags & + BSS_PARAM_FLAGS_SHORT_PREAMBLE) && + nla_put_flag(msg, + NL80211_STA_BSS_PARAM_SHORT_PREAMBLE)) || + ((link_sinfo->bss_param.flags & + BSS_PARAM_FLAGS_SHORT_SLOT_TIME) && + nla_put_flag(msg, + NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME)) || + nla_put_u8(msg, NL80211_STA_BSS_PARAM_DTIM_PERIOD, + link_sinfo->bss_param.dtim_period) || + nla_put_u16(msg, NL80211_STA_BSS_PARAM_BEACON_INTERVAL, + link_sinfo->bss_param.beacon_interval)) + goto nla_put_failure; + + nla_nest_end(msg, bss_param); + } + + PUT_LINK_SINFO_U64(RX_DROP_MISC, rx_dropped_misc); + PUT_LINK_SINFO_U64(BEACON_RX, rx_beacon); + PUT_LINK_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); + PUT_LINK_SINFO(RX_MPDUS, rx_mpdu_count, u32); + PUT_LINK_SINFO(FCS_ERROR_COUNT, fcs_err_count, u32); + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT)) { + PUT_LINK_SINFO(ACK_SIGNAL, ack_signal, u8); + PUT_LINK_SINFO(ACK_SIGNAL_AVG, avg_ack_signal, s8); + } + +#undef PUT_LINK_SINFO +#undef PUT_LINK_SINFO_U64 + + if (link_sinfo->pertid) { + struct nlattr *tidsattr; + int tid; + + tidsattr = nla_nest_start_noflag(msg, + NL80211_STA_INFO_TID_STATS); + if (!tidsattr) + goto nla_put_failure; + + for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) { + struct cfg80211_tid_stats *tidstats; + struct nlattr *tidattr; + + tidstats = &link_sinfo->pertid[tid]; + + if (!tidstats->filled) + continue; + + tidattr = nla_nest_start_noflag(msg, tid + 1); + if (!tidattr) + goto nla_put_failure; + +#define PUT_TIDVAL_U64(attr, memb) do { \ + if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) && \ + nla_put_u64_64bit(msg, NL80211_TID_STATS_ ## attr, \ + tidstats->memb, NL80211_TID_STATS_PAD)) \ + goto nla_put_failure; \ + } while (0) + + PUT_TIDVAL_U64(RX_MSDU, rx_msdu); + PUT_TIDVAL_U64(TX_MSDU, tx_msdu); + PUT_TIDVAL_U64(TX_MSDU_RETRIES, tx_msdu_retries); + PUT_TIDVAL_U64(TX_MSDU_FAILED, tx_msdu_failed); + +#undef PUT_TIDVAL_U64 + if ((tidstats->filled & + BIT(NL80211_TID_STATS_TXQ_STATS)) && + !nl80211_put_txq_stats(msg, &tidstats->txq_stats, + NL80211_TID_STATS_TXQ_STATS)) + goto nla_put_failure; + + nla_nest_end(msg, tidattr); + } + + nla_nest_end(msg, tidsattr); + } + + nla_nest_end(msg, link_sinfoattr); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -6739,6 +6994,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, { void *hdr; struct nlattr *sinfoattr, *bss_param; + struct link_station_info *link_sinfo; + struct nlattr *links, *link; + int link_id; hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) { @@ -6953,6 +7211,40 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, goto nla_put_failure; } + if (sinfo->valid_links) { + links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); + if (!links) + goto nla_put_failure; + + for_each_valid_link(sinfo, link_id) { + link_sinfo = sinfo->links[link_id]; + + if (WARN_ON_ONCE(!link_sinfo)) + continue; + + if (!is_valid_ether_addr(link_sinfo->addr)) + continue; + + link = nla_nest_start(msg, link_id + 1); + if (!link) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, + link_id)) + goto nla_put_failure; + + if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, + link_sinfo->addr)) + goto nla_put_failure; + + if (nl80211_fill_link_station(msg, rdev, link_sinfo)) + goto nla_put_failure; + + nla_nest_end(msg, link); + } + nla_nest_end(msg, links); + } + cfg80211_sinfo_release_content(sinfo); genlmsg_end(msg, hdr); return 0; @@ -6963,6 +7255,194 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, return -EMSGSIZE; } +static void cfg80211_sta_set_mld_sinfo(struct station_info *sinfo) +{ + struct link_station_info *link_sinfo; + int link_id, init = 0; + u32 link_inactive_time; + + sinfo->signal = -99; + + for_each_valid_link(sinfo, link_id) { + link_sinfo = sinfo->links[link_id]; + if (!link_sinfo) + continue; + + if ((link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) { + sinfo->tx_packets += link_sinfo->tx_packets; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); + } + + if ((link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) { + sinfo->rx_packets += link_sinfo->rx_packets; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS); + } + + if (link_sinfo->filled & + (BIT_ULL(NL80211_STA_INFO_TX_BYTES) | + BIT_ULL(NL80211_STA_INFO_TX_BYTES64))) { + sinfo->tx_bytes += link_sinfo->tx_bytes; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES); + } + + if (link_sinfo->filled & + (BIT_ULL(NL80211_STA_INFO_RX_BYTES) | + BIT_ULL(NL80211_STA_INFO_TX_BYTES64))) { + sinfo->rx_bytes += link_sinfo->rx_bytes; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES); + } + + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_TX_RETRIES)) { + sinfo->tx_retries += link_sinfo->tx_retries; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); + } + + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) { + sinfo->tx_failed += link_sinfo->tx_failed; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); + } + + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC)) { + sinfo->rx_dropped_misc += link_sinfo->rx_dropped_misc; + sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC); + } + + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_BEACON_LOSS)) { + sinfo->beacon_loss_count += + link_sinfo->beacon_loss_count; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS); + } + + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) { + sinfo->expected_throughput += + link_sinfo->expected_throughput; + sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT); + } + + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_MPDUS)) { + sinfo->rx_mpdu_count += link_sinfo->rx_mpdu_count; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_MPDUS); + } + + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_FCS_ERROR_COUNT)) { + sinfo->fcs_err_count += link_sinfo->fcs_err_count; + sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_FCS_ERROR_COUNT); + } + + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_BEACON_RX)) { + sinfo->rx_beacon += link_sinfo->rx_beacon; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX); + } + + /* Update MLO signal, signal_avg as best among links */ + if ((link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) && + link_sinfo->signal > sinfo->signal) { + sinfo->signal = link_sinfo->signal; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); + } + + if ((link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG)) && + link_sinfo->signal_avg > sinfo->signal_avg) { + sinfo->signal_avg = link_sinfo->signal_avg; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); + } + + /* Update MLO inactive_time, bss_param based on least + * value for corresponding field of link. + */ + if ((link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME)) && + (!init || + link_inactive_time > link_sinfo->inactive_time)) { + link_inactive_time = link_sinfo->inactive_time; + sinfo->inactive_time = link_sinfo->inactive_time; + sinfo->filled |= NL80211_STA_INFO_INACTIVE_TIME; + } + + if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM) && + (!init || + sinfo->bss_param.dtim_period > + link_sinfo->bss_param.dtim_period)) { + sinfo->bss_param.dtim_period = + link_sinfo->bss_param.dtim_period; + sinfo->filled |= NL80211_STA_BSS_PARAM_DTIM_PERIOD; + sinfo->bss_param.beacon_interval = + link_sinfo->bss_param.beacon_interval; + sinfo->filled |= NL80211_STA_BSS_PARAM_BEACON_INTERVAL; + } + + /* Update MLO rates as per last updated link rate */ + if ((link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) && + (!init || + link_inactive_time > link_sinfo->inactive_time)) { + sinfo->txrate = link_sinfo->txrate; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); + } + if ((link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) && + (!init || + link_inactive_time > link_sinfo->inactive_time)) { + sinfo->rxrate = link_sinfo->rxrate; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); + } + + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_TX_DURATION) && + (!init || + link_inactive_time > link_sinfo->inactive_time)) { + sinfo->tx_duration += link_sinfo->tx_duration; + sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_TX_DURATION); + } + if (link_sinfo->filled & + BIT_ULL(NL80211_STA_INFO_RX_DURATION) && + (!init || + link_inactive_time > link_sinfo->inactive_time)) { + sinfo->rx_duration += link_sinfo->rx_duration; + sinfo->filled |= + BIT_ULL(NL80211_STA_INFO_RX_DURATION); + } + init++; + + /* pertid stats accumulate for rx/tx fields */ + if (sinfo->pertid) { + sinfo->pertid->rx_msdu += + link_sinfo->pertid->rx_msdu; + sinfo->pertid->tx_msdu += + link_sinfo->pertid->tx_msdu; + sinfo->pertid->tx_msdu_retries += + link_sinfo->pertid->tx_msdu_retries; + sinfo->pertid->tx_msdu_failed += + link_sinfo->pertid->tx_msdu_failed; + + sinfo->pertid->filled |= + BIT(NL80211_TID_STATS_RX_MSDU) | + BIT(NL80211_TID_STATS_TX_MSDU) | + BIT(NL80211_TID_STATS_TX_MSDU_RETRIES) | + BIT(NL80211_TID_STATS_TX_MSDU_FAILED); + } + } + + /* Reset sinfo->filled bits to exclude fields which don't make + * much sense at the MLO level. + */ + sinfo->filled &= ~BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); + sinfo->filled &= ~BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); +} + static int nl80211_dump_station(struct sk_buff *skb, struct netlink_callback *cb) { @@ -6971,7 +7451,7 @@ static int nl80211_dump_station(struct sk_buff *skb, struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN]; int sta_idx = cb->args[2]; - int err; + int err, i; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) @@ -6991,6 +7471,16 @@ static int nl80211_dump_station(struct sk_buff *skb, while (1) { memset(&sinfo, 0, sizeof(sinfo)); + + for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { + sinfo.links[i] = + kzalloc(sizeof(*sinfo.links[0]), GFP_KERNEL); + if (!sinfo.links[i]) { + err = -ENOMEM; + goto out_err; + } + } + err = rdev_dump_station(rdev, wdev->netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) @@ -6998,6 +7488,9 @@ static int nl80211_dump_station(struct sk_buff *skb, if (err) goto out_err; + if (sinfo.valid_links) + cfg80211_sta_set_mld_sinfo(&sinfo); + if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -7012,6 +7505,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: + cfg80211_sinfo_release_content(&sinfo); wiphy_unlock(&rdev->wiphy); return err; @@ -7024,7 +7518,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) struct station_info sinfo; struct sk_buff *msg; u8 *mac_addr = NULL; - int err; + int err, i; memset(&sinfo, 0, sizeof(sinfo)); @@ -7036,9 +7530,19 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->get_station) return -EOPNOTSUPP; + for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { + sinfo.links[i] = kzalloc(sizeof(*sinfo.links[0]), GFP_KERNEL); + if (!sinfo.links[i]) { + cfg80211_sinfo_release_content(&sinfo); + return -ENOMEM; + } + } + err = rdev_get_station(rdev, dev, mac_addr, &sinfo); - if (err) + if (err) { + cfg80211_sinfo_release_content(&sinfo); return err; + } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { @@ -7046,6 +7550,9 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; } + if (sinfo.valid_links) + cfg80211_sta_set_mld_sinfo(&sinfo); + if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { @@ -7352,6 +7859,10 @@ static int nl80211_set_station_tdls(struct genl_info *info, } } + if (info->attrs[NL80211_ATTR_S1G_CAPABILITY]) + params->link_sta_params.s1g_capa = + nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY]); + err = nl80211_parse_sta_channel_info(info, params); if (err) return err; @@ -7678,6 +8189,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.link_sta_params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); + if (info->attrs[NL80211_ATTR_S1G_CAPABILITY]) + params.link_sta_params.s1g_capa = + nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY]); + if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.link_sta_params.opmode_notif_used = true; params.link_sta_params.opmode_notif = @@ -9299,34 +9814,12 @@ static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag, static int nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, - void *request, struct nlattr **attrs, - bool is_sched_scan) + struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask, + u32 *flags, enum nl80211_feature_flags randomness_flag) { - u8 *mac_addr, *mac_addr_mask; - u32 *flags; - enum nl80211_feature_flags randomness_flag; - if (!attrs[NL80211_ATTR_SCAN_FLAGS]) return 0; - if (is_sched_scan) { - struct cfg80211_sched_scan_request *req = request; - - randomness_flag = wdev ? - NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : - NL80211_FEATURE_ND_RANDOM_MAC_ADDR; - flags = &req->flags; - mac_addr = req->mac_addr; - mac_addr_mask = req->mac_addr_mask; - } else { - struct cfg80211_scan_request *req = request; - - randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; - flags = &req->flags; - mac_addr = req->mac_addr; - mac_addr_mask = req->mac_addr_mask; - } - *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && @@ -9375,11 +9868,35 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, return 0; } +static int +nl80211_check_scan_flags_sched(struct wiphy *wiphy, struct wireless_dev *wdev, + struct nlattr **attrs, + struct cfg80211_sched_scan_request *req) +{ + return nl80211_check_scan_flags(wiphy, wdev, attrs, + req->mac_addr, req->mac_addr_mask, + &req->flags, + wdev ? NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : + NL80211_FEATURE_ND_RANDOM_MAC_ADDR); +} + +static int +nl80211_check_scan_flags_reg(struct wiphy *wiphy, struct wireless_dev *wdev, + struct nlattr **attrs, + struct cfg80211_scan_request_int *req) +{ + return nl80211_check_scan_flags(wiphy, wdev, attrs, + req->req.mac_addr, + req->req.mac_addr_mask, + &req->req.flags, + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR); +} + static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; - struct cfg80211_scan_request *request; + struct cfg80211_scan_request_int *request; struct nlattr *scan_freqs = NULL; bool scan_freqs_khz = false; struct nlattr *attr; @@ -9431,21 +9948,21 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (ie_len > wiphy->max_scan_ie_len) return -EINVAL; - size = struct_size(request, channels, n_channels); + size = struct_size(request, req.channels, n_channels); ssids_offset = size; - size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); + size = size_add(size, array_size(sizeof(*request->req.ssids), n_ssids)); ie_offset = size; size = size_add(size, ie_len); request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; - request->n_channels = n_channels; + request->req.n_channels = n_channels; if (n_ssids) - request->ssids = (void *)request + ssids_offset; - request->n_ssids = n_ssids; + request->req.ssids = (void *)request + ssids_offset; + request->req.n_ssids = n_ssids; if (ie_len) - request->ie = (void *)request + ie_offset; + request->req.ie = (void *)request + ie_offset; i = 0; if (scan_freqs) { @@ -9468,7 +9985,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) !cfg80211_wdev_channel_allowed(wdev, chan)) continue; - request->channels[i] = chan; + request->req.channels[i] = chan; i++; } } else { @@ -9489,7 +10006,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) !cfg80211_wdev_channel_allowed(wdev, chan)) continue; - request->channels[i] = chan; + request->req.channels[i] = chan; i++; } } @@ -9500,10 +10017,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out_free; } - request->n_channels = i; + request->req.n_channels = i; - for (i = 0; i < request->n_channels; i++) { - struct ieee80211_channel *chan = request->channels[i]; + for (i = 0; i < request->req.n_channels; i++) { + struct ieee80211_channel *chan = request->req.channels[i]; /* if we can go off-channel to the target channel we're good */ if (cfg80211_off_channel_oper_allowed(wdev, chan)) @@ -9522,22 +10039,23 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out_free; } - request->ssids[i].ssid_len = nla_len(attr); - memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); + request->req.ssids[i].ssid_len = nla_len(attr); + memcpy(request->req.ssids[i].ssid, + nla_data(attr), nla_len(attr)); i++; } } if (info->attrs[NL80211_ATTR_IE]) { - request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); - memcpy((void *)request->ie, + request->req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + memcpy((void *)request->req.ie, nla_data(info->attrs[NL80211_ATTR_IE]), - request->ie_len); + request->req.ie_len); } for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) - request->rates[i] = + request->req.rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; if (info->attrs[NL80211_ATTR_SCAN_SUPP_RATES]) { @@ -9557,25 +10075,24 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), - &request->rates[band]); + &request->req.rates[band]); if (err) goto out_free; } } if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) { - request->duration = + request->req.duration = nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]); - request->duration_mandatory = + request->req.duration_mandatory = nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); } - err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs, - false); + err = nl80211_check_scan_flags_reg(wiphy, wdev, info->attrs, request); if (err) goto out_free; - request->no_cck = + request->req.no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* Initial implementation used NL80211_ATTR_MAC to set the specific @@ -9588,19 +10105,21 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use). */ if (info->attrs[NL80211_ATTR_BSSID]) - memcpy(request->bssid, + memcpy(request->req.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); - else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && + else if (!(request->req.flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && info->attrs[NL80211_ATTR_MAC]) - memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), + memcpy(request->req.bssid, + nla_data(info->attrs[NL80211_ATTR_MAC]), ETH_ALEN); else - eth_broadcast_addr(request->bssid); + eth_broadcast_addr(request->req.bssid); - request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs); - request->wdev = wdev; - request->wiphy = &rdev->wiphy; - request->scan_start = jiffies; + request->req.tsf_report_link_id = + nl80211_link_id_or_invalid(info->attrs); + request->req.wdev = wdev; + request->req.wiphy = &rdev->wiphy; + request->req.scan_start = jiffies; rdev->scan_req = request; err = cfg80211_scan(rdev); @@ -10022,7 +10541,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->ie_len); } - err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true); + err = nl80211_check_scan_flags_sched(wiphy, wdev, attrs, request); if (err) goto out_free; @@ -17902,7 +18421,7 @@ void nl80211_notify_iface(struct cfg80211_registered_device *rdev, static int nl80211_add_scan_req(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { - struct cfg80211_scan_request *req = rdev->scan_req; + struct cfg80211_scan_request_int *req = rdev->scan_req; struct nlattr *nest; int i; struct cfg80211_scan_info *info; @@ -17913,19 +18432,20 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS); if (!nest) goto nla_put_failure; - for (i = 0; i < req->n_ssids; i++) { - if (nla_put(msg, i, req->ssids[i].ssid_len, req->ssids[i].ssid)) + for (i = 0; i < req->req.n_ssids; i++) { + if (nla_put(msg, i, req->req.ssids[i].ssid_len, + req->req.ssids[i].ssid)) goto nla_put_failure; } nla_nest_end(msg, nest); - if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) { + if (req->req.flags & NL80211_SCAN_FLAG_FREQ_KHZ) { nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ); if (!nest) goto nla_put_failure; - for (i = 0; i < req->n_channels; i++) { + for (i = 0; i < req->req.n_channels; i++) { if (nla_put_u32(msg, i, - ieee80211_channel_to_khz(req->channels[i]))) + ieee80211_channel_to_khz(req->req.channels[i]))) goto nla_put_failure; } nla_nest_end(msg, nest); @@ -17934,19 +18454,20 @@ static int nl80211_add_scan_req(struct sk_buff *msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nest) goto nla_put_failure; - for (i = 0; i < req->n_channels; i++) { - if (nla_put_u32(msg, i, req->channels[i]->center_freq)) + for (i = 0; i < req->req.n_channels; i++) { + if (nla_put_u32(msg, i, + req->req.channels[i]->center_freq)) goto nla_put_failure; } nla_nest_end(msg, nest); } - if (req->ie && - nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) + if (req->req.ie && + nla_put(msg, NL80211_ATTR_IE, req->req.ie_len, req->req.ie)) goto nla_put_failure; - if (req->flags && - nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) + if (req->req.flags && + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->req.flags)) goto nla_put_failure; info = rdev->int_scan_req ? &rdev->int_scan_req->info : diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 9f4783c2354c..ac6884bacf3f 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -456,15 +456,15 @@ rdev_set_monitor_channel(struct cfg80211_registered_device *rdev, } static inline int rdev_scan(struct cfg80211_registered_device *rdev, - struct cfg80211_scan_request *request) + struct cfg80211_scan_request_int *request) { int ret; - if (WARN_ON_ONCE(!request->n_ssids && request->ssids)) + if (WARN_ON_ONCE(!request->req.n_ssids && request->req.ssids)) return -EINVAL; trace_rdev_scan(&rdev->wiphy, request); - ret = rdev->ops->scan(&rdev->wiphy, request); + ret = rdev->ops->scan(&rdev->wiphy, &request->req); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -577,35 +577,40 @@ static inline int rdev_leave_ibss(struct cfg80211_registered_device *rdev, } static inline int -rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed) +rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, int radio_idx, + u32 changed) { int ret = -EOPNOTSUPP; - trace_rdev_set_wiphy_params(&rdev->wiphy, changed); + trace_rdev_set_wiphy_params(&rdev->wiphy, radio_idx, changed); if (rdev->ops->set_wiphy_params) - ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); + ret = rdev->ops->set_wiphy_params(&rdev->wiphy, radio_idx, + changed); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_set_tx_power(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - enum nl80211_tx_power_setting type, int mbm) + struct wireless_dev *wdev, int radio_idx, + enum nl80211_tx_power_setting type, + int mbm) { int ret; - trace_rdev_set_tx_power(&rdev->wiphy, wdev, type, mbm); - ret = rdev->ops->set_tx_power(&rdev->wiphy, wdev, type, mbm); + trace_rdev_set_tx_power(&rdev->wiphy, wdev, radio_idx, type, mbm); + ret = rdev->ops->set_tx_power(&rdev->wiphy, wdev, radio_idx, type, + mbm); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, unsigned int link_id, - int *dbm) + struct wireless_dev *wdev, int radio_idx, + unsigned int link_id, int *dbm) { int ret; - trace_rdev_get_tx_power(&rdev->wiphy, wdev, link_id); - ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, link_id, dbm); + trace_rdev_get_tx_power(&rdev->wiphy, wdev, radio_idx, link_id); + ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, radio_idx, link_id, + dbm); trace_rdev_return_int_int(&rdev->wiphy, ret, *dbm); return ret; } @@ -857,21 +862,21 @@ rdev_update_mgmt_frame_registrations(struct cfg80211_registered_device *rdev, } static inline int rdev_set_antenna(struct cfg80211_registered_device *rdev, - u32 tx_ant, u32 rx_ant) + int radio_idx, u32 tx_ant, u32 rx_ant) { int ret; - trace_rdev_set_antenna(&rdev->wiphy, tx_ant, rx_ant); - ret = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); + trace_rdev_set_antenna(&rdev->wiphy, radio_idx, tx_ant, rx_ant); + ret = rdev->ops->set_antenna(&rdev->wiphy, -1, tx_ant, rx_ant); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_get_antenna(struct cfg80211_registered_device *rdev, - u32 *tx_ant, u32 *rx_ant) + int radio_idx, u32 *tx_ant, u32 *rx_ant) { int ret; - trace_rdev_get_antenna(&rdev->wiphy); - ret = rdev->ops->get_antenna(&rdev->wiphy, tx_ant, rx_ant); + trace_rdev_get_antenna(&rdev->wiphy, radio_idx); + ret = rdev->ops->get_antenna(&rdev->wiphy, radio_idx, tx_ant, rx_ant); if (ret) trace_rdev_return_int(&rdev->wiphy, ret); else diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c1752b31734f..2524bc187a19 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -53,7 +53,7 @@ #include <linux/list.h> #include <linux/ctype.h> #include <linux/nl80211.h> -#include <linux/platform_device.h> +#include <linux/device/faux.h> #include <linux/verification.h> #include <linux/moduleparam.h> #include <linux/firmware.h> @@ -105,7 +105,7 @@ static struct regulatory_request __rcu *last_request = (void __force __rcu *)&core_request_world; /* To trigger userspace events and load firmware */ -static struct platform_device *reg_pdev; +static struct faux_device *reg_fdev; /* * Central wireless core regulatory domains, we only need two, @@ -583,7 +583,7 @@ static int call_crda(const char *alpha2) else pr_debug("Calling CRDA to update world regulatory domain\n"); - ret = kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); + ret = kobject_uevent_env(®_fdev->dev.kobj, KOBJ_CHANGE, env); if (ret) return ret; @@ -779,7 +779,7 @@ static bool regdb_has_valid_signature(const u8 *data, unsigned int size) const struct firmware *sig; bool result; - if (request_firmware(&sig, "regulatory.db.p7s", ®_pdev->dev)) + if (request_firmware(&sig, "regulatory.db.p7s", ®_fdev->dev)) return false; result = verify_pkcs7_signature(data, size, sig->data, sig->size, @@ -1061,7 +1061,7 @@ static int query_regdb_file(const char *alpha2) return -ENOMEM; err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db", - ®_pdev->dev, GFP_KERNEL, + ®_fdev->dev, GFP_KERNEL, (void *)alpha2, regdb_fw_cb); if (err) kfree(alpha2); @@ -1077,7 +1077,7 @@ int reg_reload_regdb(void) const struct ieee80211_regdomain *current_regdomain; struct regulatory_request *request; - err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); + err = request_firmware(&fw, "regulatory.db", ®_fdev->dev); if (err) return err; @@ -4300,12 +4300,12 @@ static int __init regulatory_init_db(void) * in that case, don't try to do any further work here as * it's doomed to lead to crashes. */ - if (IS_ERR_OR_NULL(reg_pdev)) + if (!reg_fdev) return -EINVAL; err = load_builtin_regdb_keys(); if (err) { - platform_device_unregister(reg_pdev); + faux_device_destroy(reg_fdev); return err; } @@ -4313,7 +4313,7 @@ static int __init regulatory_init_db(void) err = regulatory_hint_core(cfg80211_world_regdom->alpha2); if (err) { if (err == -ENOMEM) { - platform_device_unregister(reg_pdev); + faux_device_destroy(reg_fdev); return err; } /* @@ -4342,9 +4342,9 @@ late_initcall(regulatory_init_db); int __init regulatory_init(void) { - reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); - if (IS_ERR(reg_pdev)) - return PTR_ERR(reg_pdev); + reg_fdev = faux_device_create("regulatory", NULL, NULL); + if (!reg_fdev) + return -ENODEV; rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); @@ -4372,9 +4372,9 @@ void regulatory_exit(void) reset_regdomains(true, NULL); rtnl_unlock(); - dev_set_uevent_suppress(®_pdev->dev, true); + dev_set_uevent_suppress(®_fdev->dev, true); - platform_device_unregister(reg_pdev); + faux_device_destroy(reg_fdev); list_for_each_entry_safe(reg_beacon, btmp, ®_pending_beacons, list) { list_del(®_beacon->list); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e8a4fe44ec2d..a8339ed52404 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -782,9 +782,9 @@ cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, } EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_parse_colocated_ap); -static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, - struct ieee80211_channel *chan, - bool add_to_6ghz) +static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request, + struct ieee80211_channel *chan, + bool add_to_6ghz) { int i; u32 n_channels = request->n_channels; @@ -838,30 +838,32 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, return false; } -static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) +static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev, + bool first_part) { u8 i; struct cfg80211_colocated_ap *ap; int n_channels, count = 0, err; - struct cfg80211_scan_request *request, *rdev_req = rdev->scan_req; + struct cfg80211_scan_request_int *request, *rdev_req = rdev->scan_req; LIST_HEAD(coloc_ap_list); bool need_scan_psc = true; const struct ieee80211_sband_iftype_data *iftd; size_t size, offs_ssids, offs_6ghz_params, offs_ies; - rdev_req->scan_6ghz = true; + rdev_req->req.scan_6ghz = true; + rdev_req->req.first_part = first_part; if (!rdev->wiphy.bands[NL80211_BAND_6GHZ]) return -EOPNOTSUPP; iftd = ieee80211_get_sband_iftype_data(rdev->wiphy.bands[NL80211_BAND_6GHZ], - rdev_req->wdev->iftype); + rdev_req->req.wdev->iftype); if (!iftd || !iftd->he_cap.has_he) return -EOPNOTSUPP; n_channels = rdev->wiphy.bands[NL80211_BAND_6GHZ]->n_channels; - if (rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) { + if (rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) { struct cfg80211_internal_bss *intbss; spin_lock_bh(&rdev->bss_lock); @@ -883,8 +885,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) * This is relevant for ML probe requests when the lower * band APs have not been discovered. */ - if (is_broadcast_ether_addr(rdev_req->bssid) || - !ether_addr_equal(rdev_req->bssid, res->bssid) || + if (is_broadcast_ether_addr(rdev_req->req.bssid) || + !ether_addr_equal(rdev_req->req.bssid, res->bssid) || res->channel->band != NL80211_BAND_6GHZ) continue; @@ -911,13 +913,13 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) spin_unlock_bh(&rdev->bss_lock); } - size = struct_size(request, channels, n_channels); + size = struct_size(request, req.channels, n_channels); offs_ssids = size; - size += sizeof(*request->ssids) * rdev_req->n_ssids; + size += sizeof(*request->req.ssids) * rdev_req->req.n_ssids; offs_6ghz_params = size; - size += sizeof(*request->scan_6ghz_params) * count; + size += sizeof(*request->req.scan_6ghz_params) * count; offs_ies = size; - size += rdev_req->ie_len; + size += rdev_req->req.ie_len; request = kzalloc(size, GFP_KERNEL); if (!request) { @@ -926,26 +928,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) } *request = *rdev_req; - request->n_channels = 0; - request->n_6ghz_params = 0; - if (rdev_req->n_ssids) { + request->req.n_channels = 0; + request->req.n_6ghz_params = 0; + if (rdev_req->req.n_ssids) { /* * Add the ssids from the parent scan request to the new * scan request, so the driver would be able to use them * in its probe requests to discover hidden APs on PSC * channels. */ - request->ssids = (void *)request + offs_ssids; - memcpy(request->ssids, rdev_req->ssids, - sizeof(*request->ssids) * request->n_ssids); + request->req.ssids = (void *)request + offs_ssids; + memcpy(request->req.ssids, rdev_req->req.ssids, + sizeof(*request->req.ssids) * request->req.n_ssids); } - request->scan_6ghz_params = (void *)request + offs_6ghz_params; + request->req.scan_6ghz_params = (void *)request + offs_6ghz_params; - if (rdev_req->ie_len) { + if (rdev_req->req.ie_len) { void *ie = (void *)request + offs_ies; - memcpy(ie, rdev_req->ie, rdev_req->ie_len); - request->ie = ie; + memcpy(ie, rdev_req->req.ie, rdev_req->req.ie_len); + request->req.ie = ie; } /* @@ -953,10 +955,12 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) * and at least one of the reported co-located APs with same SSID * indicating that all APs in the same ESS are co-located */ - if (count && request->n_ssids == 1 && request->ssids[0].ssid_len) { + if (count && + request->req.n_ssids == 1 && + request->req.ssids[0].ssid_len) { list_for_each_entry(ap, &coloc_ap_list, list) { if (ap->colocated_ess && - cfg80211_find_ssid_match(ap, request)) { + cfg80211_find_ssid_match(ap, &request->req)) { need_scan_psc = false; break; } @@ -968,51 +972,52 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) * regardless of the collocated APs (PSC channels or all channels * in case that NL80211_SCAN_FLAG_COLOCATED_6GHZ is not set) */ - for (i = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i]->band == NL80211_BAND_6GHZ && + for (i = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i]->band == NL80211_BAND_6GHZ && ((need_scan_psc && - cfg80211_channel_is_psc(rdev_req->channels[i])) || - !(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))) { - cfg80211_scan_req_add_chan(request, - rdev_req->channels[i], + cfg80211_channel_is_psc(rdev_req->req.channels[i])) || + !(rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))) { + cfg80211_scan_req_add_chan(&request->req, + rdev_req->req.channels[i], false); } } - if (!(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ)) + if (!(rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ)) goto skip; list_for_each_entry(ap, &coloc_ap_list, list) { bool found = false; struct cfg80211_scan_6ghz_params *scan_6ghz_params = - &request->scan_6ghz_params[request->n_6ghz_params]; + &request->req.scan_6ghz_params[request->req.n_6ghz_params]; struct ieee80211_channel *chan = ieee80211_get_channel(&rdev->wiphy, ap->center_freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED || - !cfg80211_wdev_channel_allowed(rdev_req->wdev, chan)) + !cfg80211_wdev_channel_allowed(rdev_req->req.wdev, chan)) continue; - for (i = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i] == chan) + for (i = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i] == chan) found = true; } if (!found) continue; - if (request->n_ssids > 0 && - !cfg80211_find_ssid_match(ap, request)) + if (request->req.n_ssids > 0 && + !cfg80211_find_ssid_match(ap, &request->req)) continue; - if (!is_broadcast_ether_addr(request->bssid) && - !ether_addr_equal(request->bssid, ap->bssid)) + if (!is_broadcast_ether_addr(request->req.bssid) && + !ether_addr_equal(request->req.bssid, ap->bssid)) continue; - if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid) + if (!request->req.n_ssids && ap->multi_bss && + !ap->transmitted_bssid) continue; - cfg80211_scan_req_add_chan(request, chan, true); + cfg80211_scan_req_add_chan(&request->req, chan, true); memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN); scan_6ghz_params->short_ssid = ap->short_ssid; scan_6ghz_params->short_ssid_valid = ap->short_ssid_valid; @@ -1028,14 +1033,14 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) if (cfg80211_channel_is_psc(chan) && !need_scan_psc) scan_6ghz_params->psc_no_listen = true; - request->n_6ghz_params++; + request->req.n_6ghz_params++; } skip: cfg80211_free_coloc_ap_list(&coloc_ap_list); - if (request->n_channels) { - struct cfg80211_scan_request *old = rdev->int_scan_req; + if (request->req.n_channels) { + struct cfg80211_scan_request_int *old = rdev->int_scan_req; rdev->int_scan_req = request; @@ -1043,7 +1048,7 @@ skip: * If this scan follows a previous scan, save the scan start * info from the first part of the scan */ - if (old) + if (!first_part && !WARN_ON(!old)) rdev->int_scan_req->info = old->info; err = rdev_scan(rdev, request); @@ -1063,35 +1068,39 @@ skip: int cfg80211_scan(struct cfg80211_registered_device *rdev) { - struct cfg80211_scan_request *request; - struct cfg80211_scan_request *rdev_req = rdev->scan_req; + struct cfg80211_scan_request_int *request; + struct cfg80211_scan_request_int *rdev_req = rdev->scan_req; u32 n_channels = 0, idx, i; - if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) + if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) { + rdev_req->req.first_part = true; return rdev_scan(rdev, rdev_req); + } - for (i = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ) + for (i = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i]->band != NL80211_BAND_6GHZ) n_channels++; } if (!n_channels) - return cfg80211_scan_6ghz(rdev); + return cfg80211_scan_6ghz(rdev, true); - request = kzalloc(struct_size(request, channels, n_channels), + request = kzalloc(struct_size(request, req.channels, n_channels), GFP_KERNEL); if (!request) return -ENOMEM; *request = *rdev_req; - request->n_channels = n_channels; + request->req.n_channels = n_channels; - for (i = idx = 0; i < rdev_req->n_channels; i++) { - if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ) - request->channels[idx++] = rdev_req->channels[i]; + for (i = idx = 0; i < rdev_req->req.n_channels; i++) { + if (rdev_req->req.channels[i]->band != NL80211_BAND_6GHZ) + request->req.channels[idx++] = + rdev_req->req.channels[i]; } - rdev_req->scan_6ghz = false; + rdev_req->req.scan_6ghz = false; + rdev_req->req.first_part = true; rdev->int_scan_req = request; return rdev_scan(rdev, request); } @@ -1099,7 +1108,7 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev) void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message) { - struct cfg80211_scan_request *request, *rdev_req; + struct cfg80211_scan_request_int *request, *rdev_req; struct wireless_dev *wdev; struct sk_buff *msg; #ifdef CONFIG_CFG80211_WEXT @@ -1118,13 +1127,13 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, if (!rdev_req) return; - wdev = rdev_req->wdev; + wdev = rdev_req->req.wdev; request = rdev->int_scan_req ? rdev->int_scan_req : rdev_req; if (wdev_running(wdev) && (rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ) && - !rdev_req->scan_6ghz && !request->info.aborted && - !cfg80211_scan_6ghz(rdev)) + !rdev_req->req.scan_6ghz && !request->info.aborted && + !cfg80211_scan_6ghz(rdev, false)) return; /* @@ -1136,10 +1145,10 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, cfg80211_sme_scan_done(wdev->netdev); if (!request->info.aborted && - request->flags & NL80211_SCAN_FLAG_FLUSH) { + request->req.flags & NL80211_SCAN_FLAG_FLUSH) { /* flush entries from previous scans */ spin_lock_bh(&rdev->bss_lock); - __cfg80211_bss_expire(rdev, request->scan_start); + __cfg80211_bss_expire(rdev, request->req.scan_start); spin_unlock_bh(&rdev->bss_lock); } @@ -1175,13 +1184,16 @@ void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk) void cfg80211_scan_done(struct cfg80211_scan_request *request, struct cfg80211_scan_info *info) { - struct cfg80211_scan_info old_info = request->info; + struct cfg80211_scan_request_int *intreq = + container_of(request, struct cfg80211_scan_request_int, req); + struct cfg80211_registered_device *rdev = wiphy_to_rdev(request->wiphy); + struct cfg80211_scan_info old_info = intreq->info; - trace_cfg80211_scan_done(request, info); - WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req && - request != wiphy_to_rdev(request->wiphy)->int_scan_req); + trace_cfg80211_scan_done(intreq, info); + WARN_ON(intreq != rdev->scan_req && + intreq != rdev->int_scan_req); - request->info = *info; + intreq->info = *info; /* * In case the scan is split, the scan_start_tsf and tsf_bssid should @@ -1189,14 +1201,13 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, * be non zero. */ if (request->scan_6ghz && old_info.scan_start_tsf) { - request->info.scan_start_tsf = old_info.scan_start_tsf; - memcpy(request->info.tsf_bssid, old_info.tsf_bssid, - sizeof(request->info.tsf_bssid)); + intreq->info.scan_start_tsf = old_info.scan_start_tsf; + memcpy(intreq->info.tsf_bssid, old_info.tsf_bssid, + sizeof(intreq->info.tsf_bssid)); } - request->notified = true; - wiphy_work_queue(request->wiphy, - &wiphy_to_rdev(request->wiphy)->scan_done_wk); + intreq->notified = true; + wiphy_work_queue(request->wiphy, &rdev->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -2220,6 +2231,7 @@ cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len) return IEEE80211_REG_LPI_AP; case IEEE80211_6GHZ_CTRL_REG_SP_AP: case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP: + case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD: return IEEE80211_REG_SP_AP; case IEEE80211_6GHZ_CTRL_REG_VLP_AP: return IEEE80211_REG_VLP_AP; @@ -3496,7 +3508,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, struct cfg80211_registered_device *rdev; struct wiphy *wiphy; struct iw_scan_req *wreq = NULL; - struct cfg80211_scan_request *creq; + struct cfg80211_scan_request_int *creq; int i, err, n_channels = 0; enum nl80211_band band; @@ -3526,19 +3538,20 @@ int cfg80211_wext_siwscan(struct net_device *dev, n_channels = ieee80211_get_num_supported_channels(wiphy); } - creq = kzalloc(struct_size(creq, channels, n_channels) + + creq = kzalloc(struct_size(creq, req.channels, n_channels) + sizeof(struct cfg80211_ssid), GFP_ATOMIC); if (!creq) return -ENOMEM; - creq->wiphy = wiphy; - creq->wdev = dev->ieee80211_ptr; + creq->req.wiphy = wiphy; + creq->req.wdev = dev->ieee80211_ptr; /* SSIDs come after channels */ - creq->ssids = (void *)creq + struct_size(creq, channels, n_channels); - creq->n_channels = n_channels; - creq->n_ssids = 1; - creq->scan_start = jiffies; + creq->req.ssids = (void *)creq + + struct_size(creq, req.channels, n_channels); + creq->req.n_channels = n_channels; + creq->req.n_ssids = 1; + creq->req.scan_start = jiffies; /* translate "Scan on frequencies" request */ i = 0; @@ -3554,7 +3567,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* ignore disabled channels */ chan = &wiphy->bands[band]->channels[j]; if (chan->flags & IEEE80211_CHAN_DISABLED || - !cfg80211_wdev_channel_allowed(creq->wdev, chan)) + !cfg80211_wdev_channel_allowed(creq->req.wdev, chan)) continue; /* If we have a wireless request structure and the @@ -3577,7 +3590,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, } wext_freq_found: - creq->channels[i] = &wiphy->bands[band]->channels[j]; + creq->req.channels[i] = + &wiphy->bands[band]->channels[j]; i++; wext_freq_not_found: ; } @@ -3588,28 +3602,30 @@ int cfg80211_wext_siwscan(struct net_device *dev, goto out; } - /* Set real number of channels specified in creq->channels[] */ - creq->n_channels = i; + /* Set real number of channels specified in creq->req.channels[] */ + creq->req.n_channels = i; /* translate "Scan for SSID" request */ if (wreq) { if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) return -EINVAL; - memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); - creq->ssids[0].ssid_len = wreq->essid_len; + memcpy(creq->req.ssids[0].ssid, wreq->essid, + wreq->essid_len); + creq->req.ssids[0].ssid_len = wreq->essid_len; } if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) { - creq->ssids = NULL; - creq->n_ssids = 0; + creq->req.ssids = NULL; + creq->req.n_ssids = 0; } } for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) - creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; + creq->req.rates[i] = + (1 << wiphy->bands[i]->n_bitrates) - 1; - eth_broadcast_addr(creq->bssid); + eth_broadcast_addr(creq->req.bssid); scoped_guard(wiphy, &rdev->wiphy) { rdev->scan_req = creq; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index cf998500a965..6d7a7e7f0fc2 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2009, 2020, 2022-2024 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022-2025 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -64,7 +64,7 @@ static void cfg80211_sme_free(struct wireless_dev *wdev) static int cfg80211_conn_scan(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - struct cfg80211_scan_request *request; + struct cfg80211_scan_request_int *request; int n_channels, err; lockdep_assert_wiphy(wdev->wiphy); @@ -77,13 +77,13 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) else n_channels = ieee80211_get_num_supported_channels(wdev->wiphy); - request = kzalloc(sizeof(*request) + sizeof(request->ssids[0]) + - sizeof(request->channels[0]) * n_channels, + request = kzalloc(sizeof(*request) + sizeof(request->req.ssids[0]) + + sizeof(request->req.channels[0]) * n_channels, GFP_KERNEL); if (!request) return -ENOMEM; - request->n_channels = n_channels; + request->req.n_channels = n_channels; if (wdev->conn->params.channel) { enum nl80211_band band = wdev->conn->params.channel->band; struct ieee80211_supported_band *sband = @@ -93,8 +93,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) kfree(request); return -EINVAL; } - request->channels[0] = wdev->conn->params.channel; - request->rates[band] = (1 << sband->n_bitrates) - 1; + request->req.channels[0] = wdev->conn->params.channel; + request->req.rates[band] = (1 << sband->n_bitrates) - 1; } else { int i = 0, j; enum nl80211_band band; @@ -109,26 +109,26 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) channel = &bands->channels[j]; if (channel->flags & IEEE80211_CHAN_DISABLED) continue; - request->channels[i++] = channel; + request->req.channels[i++] = channel; } - request->rates[band] = (1 << bands->n_bitrates) - 1; + request->req.rates[band] = (1 << bands->n_bitrates) - 1; } n_channels = i; } - request->n_channels = n_channels; - request->ssids = (void *)request + - struct_size(request, channels, n_channels); - request->n_ssids = 1; + request->req.n_channels = n_channels; + request->req.ssids = (void *)request + + struct_size(request, req.channels, n_channels); + request->req.n_ssids = 1; - memcpy(request->ssids[0].ssid, wdev->conn->params.ssid, - wdev->conn->params.ssid_len); - request->ssids[0].ssid_len = wdev->conn->params.ssid_len; + memcpy(request->req.ssids[0].ssid, wdev->conn->params.ssid, + wdev->conn->params.ssid_len); + request->req.ssids[0].ssid_len = wdev->conn->params.ssid_len; - eth_broadcast_addr(request->bssid); + eth_broadcast_addr(request->req.bssid); - request->wdev = wdev; - request->wiphy = &rdev->wiphy; - request->scan_start = jiffies; + request->req.wdev = wdev; + request->req.wiphy = &rdev->wiphy; + request->req.scan_start = jiffies; rdev->scan_req = request; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 4ed9fada4ec0..a07d88d61bec 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -373,7 +373,8 @@ TRACE_EVENT(rdev_return_int, ); TRACE_EVENT(rdev_scan, - TP_PROTO(struct wiphy *wiphy, struct cfg80211_scan_request *request), + TP_PROTO(struct wiphy *wiphy, + struct cfg80211_scan_request_int *request), TP_ARGS(wiphy, request), TP_STRUCT__entry( WIPHY_ENTRY @@ -406,9 +407,19 @@ DEFINE_EVENT(wiphy_only_evt, rdev_return_void, TP_ARGS(wiphy) ); -DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna, - TP_PROTO(struct wiphy *wiphy), - TP_ARGS(wiphy) +TRACE_EVENT(rdev_get_antenna, + TP_PROTO(struct wiphy *wiphy, int radio_idx), + TP_ARGS(wiphy, radio_idx), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(int, radio_idx) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->radio_idx = radio_idx; + ), + TP_printk(WIPHY_PR_FMT ", radio_idx: %d", + WIPHY_PR_ARG, __entry->radio_idx) ); DEFINE_EVENT(wiphy_only_evt, rdev_rfkill_poll, @@ -1678,18 +1689,20 @@ TRACE_EVENT(rdev_join_ocb, ); TRACE_EVENT(rdev_set_wiphy_params, - TP_PROTO(struct wiphy *wiphy, u32 changed), - TP_ARGS(wiphy, changed), + TP_PROTO(struct wiphy *wiphy, int radio_idx, u32 changed), + TP_ARGS(wiphy, radio_idx, changed), TP_STRUCT__entry( WIPHY_ENTRY + __field(int, radio_idx) __field(u32, changed) ), TP_fast_assign( WIPHY_ASSIGN; + __entry->radio_idx = radio_idx; __entry->changed = changed; ), - TP_printk(WIPHY_PR_FMT ", changed: %u", - WIPHY_PR_ARG, __entry->changed) + TP_printk(WIPHY_PR_FMT ", radio_idx: %d, changed: %u", + WIPHY_PR_ARG, __entry->radio_idx, __entry->changed) ); DECLARE_EVENT_CLASS(wiphy_wdev_link_evt, @@ -1710,30 +1723,51 @@ DECLARE_EVENT_CLASS(wiphy_wdev_link_evt, WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) ); -DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_tx_power, +TRACE_EVENT(rdev_get_tx_power, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, - unsigned int link_id), - TP_ARGS(wiphy, wdev, link_id) + int radio_idx, unsigned int link_id), + TP_ARGS(wiphy, wdev, radio_idx, link_id), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(int, radio_idx) + __field(unsigned int, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->radio_idx = radio_idx; + __entry->link_id = link_id; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", radio_idx: %d, link_id: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, + __entry->radio_idx, __entry->link_id) ); TRACE_EVENT(rdev_set_tx_power, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, - enum nl80211_tx_power_setting type, int mbm), - TP_ARGS(wiphy, wdev, type, mbm), + int radio_idx, enum nl80211_tx_power_setting type, + int mbm), + TP_ARGS(wiphy, wdev, radio_idx, type, mbm), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY + __field(int, radio_idx) __field(enum nl80211_tx_power_setting, type) __field(int, mbm) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; + __entry->radio_idx = radio_idx; __entry->type = type; __entry->mbm = mbm; ), - TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type: %u, mbm: %d", - WIPHY_PR_ARG, WDEV_PR_ARG,__entry->type, __entry->mbm) + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", radio_idx: %d, type: %u, mbm: %d", + WIPHY_PR_ARG, WDEV_PR_ARG, + __entry->radio_idx, __entry->type, __entry->mbm) ); TRACE_EVENT(rdev_return_int_int, @@ -1866,26 +1900,24 @@ TRACE_EVENT(rdev_return_void_tx_rx, __entry->rx_max) ); -DECLARE_EVENT_CLASS(tx_rx_evt, - TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, tx, rx), +TRACE_EVENT(rdev_set_antenna, + TP_PROTO(struct wiphy *wiphy, int radio_idx, u32 tx, u32 rx), + TP_ARGS(wiphy, radio_idx, tx, rx), TP_STRUCT__entry( WIPHY_ENTRY + __field(int, radio_idx) __field(u32, tx) __field(u32, rx) ), TP_fast_assign( WIPHY_ASSIGN; + __entry->radio_idx = radio_idx; __entry->tx = tx; __entry->rx = rx; ), - TP_printk(WIPHY_PR_FMT ", tx: %u, rx: %u ", - WIPHY_PR_ARG, __entry->tx, __entry->rx) -); - -DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, - TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, tx, rx) + TP_printk(WIPHY_PR_FMT ", radio_idx: %d, tx: %u, rx: %u ", + WIPHY_PR_ARG, __entry->radio_idx, + __entry->tx, __entry->rx) ); DECLARE_EVENT_CLASS(wiphy_netdev_id_evt, @@ -3685,12 +3717,12 @@ TRACE_EVENT(cfg80211_tdls_oper_request, ); TRACE_EVENT(cfg80211_scan_done, - TP_PROTO(struct cfg80211_scan_request *request, + TP_PROTO(struct cfg80211_scan_request_int *request, struct cfg80211_scan_info *info), TP_ARGS(request, info), TP_STRUCT__entry( __field(u32, n_channels) - __dynamic_array(u8, ie, request ? request->ie_len : 0) + __dynamic_array(u8, ie, request ? request->req.ie_len : 0) __array(u32, rates, NUM_NL80211_BANDS) __field(u32, wdev_id) MAC_ENTRY(wiphy_mac) @@ -3701,16 +3733,16 @@ TRACE_EVENT(cfg80211_scan_done, ), TP_fast_assign( if (request) { - memcpy(__get_dynamic_array(ie), request->ie, - request->ie_len); - memcpy(__entry->rates, request->rates, + memcpy(__get_dynamic_array(ie), request->req.ie, + request->req.ie_len); + memcpy(__entry->rates, request->req.rates, NUM_NL80211_BANDS); - __entry->wdev_id = request->wdev ? - request->wdev->identifier : 0; - if (request->wiphy) + __entry->wdev_id = request->req.wdev ? + request->req.wdev->identifier : 0; + if (request->req.wiphy) MAC_ASSIGN(wiphy_mac, - request->wiphy->perm_addr); - __entry->no_cck = request->no_cck; + request->req.wiphy->perm_addr); + __entry->no_cck = request->req.no_cck; } if (info) { __entry->aborted = info->aborted; @@ -4126,20 +4158,22 @@ TRACE_EVENT(cfg80211_links_removed, TRACE_EVENT(cfg80211_mlo_reconf_add_done, TP_PROTO(struct net_device *netdev, u16 link_mask, - const u8 *buf, size_t len), - TP_ARGS(netdev, link_mask, buf, len), + const u8 *buf, size_t len, bool driver_initiated), + TP_ARGS(netdev, link_mask, buf, len, driver_initiated), TP_STRUCT__entry( NETDEV_ENTRY __field(u16, link_mask) __dynamic_array(u8, buf, len) + __field(bool, driver_initiated) ), TP_fast_assign( NETDEV_ASSIGN; __entry->link_mask = link_mask; memcpy(__get_dynamic_array(buf), buf, len); + __entry->driver_initiated = driver_initiated; ), - TP_printk(NETDEV_PR_FMT ", link_mask:0x%x", - NETDEV_PR_ARG, __entry->link_mask) + TP_printk(NETDEV_PR_FMT ", link_mask:0x%x, driver_initiated:%d", + NETDEV_PR_ARG, __entry->link_mask, __entry->driver_initiated) ); TRACE_EVENT(rdev_assoc_ml_reconf, diff --git a/net/wireless/util.c b/net/wireless/util.c index 1ad5a6bdfd75..240c68baa3d1 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2564,6 +2564,30 @@ int cfg80211_check_combinations(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_check_combinations); +int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy, + const struct ieee80211_channel *chan) +{ + const struct wiphy_radio *radio; + int i, j; + u32 freq; + + if (!chan) + return -EINVAL; + + freq = ieee80211_channel_to_khz(chan); + for (i = 0; i < wiphy->n_radio; i++) { + radio = &wiphy->radio[i]; + for (j = 0; j < radio->n_freq_range; j++) { + if (freq >= radio->freq_range[j].start_freq && + freq < radio->freq_range[j].end_freq) + return i; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL(cfg80211_get_radio_idx_by_chan); + int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, const u8 *rates, unsigned int n_rates, u32 *mask) @@ -2674,6 +2698,18 @@ bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, return false; } +int cfg80211_link_sinfo_alloc_tid_stats(struct link_station_info *link_sinfo, + gfp_t gfp) +{ + link_sinfo->pertid = kcalloc(IEEE80211_NUM_TIDS + 1, + sizeof(*link_sinfo->pertid), gfp); + if (!link_sinfo->pertid) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL(cfg80211_link_sinfo_alloc_tid_stats); + int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp) { sinfo->pertid = kcalloc(IEEE80211_NUM_TIDS + 1, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index a74b1afc594e..1241fda78a68 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -263,7 +263,7 @@ int cfg80211_wext_siwrts(struct net_device *dev, else wdev->wiphy->rts_threshold = rts->value; - err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_RTS_THRESHOLD); + err = rdev_set_wiphy_params(rdev, -1, WIPHY_PARAM_RTS_THRESHOLD); if (err) wdev->wiphy->rts_threshold = orts; return err; @@ -304,7 +304,7 @@ int cfg80211_wext_siwfrag(struct net_device *dev, wdev->wiphy->frag_threshold = frag->value & ~0x1; } - err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_FRAG_THRESHOLD); + err = rdev_set_wiphy_params(rdev, -1, WIPHY_PARAM_FRAG_THRESHOLD); if (err) wdev->wiphy->frag_threshold = ofrag; return err; @@ -355,7 +355,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev, changed |= WIPHY_PARAM_RETRY_SHORT; } - err = rdev_set_wiphy_params(rdev, changed); + err = rdev_set_wiphy_params(rdev, -1, changed); if (err) { wdev->wiphy->retry_short = oshort; wdev->wiphy->retry_long = olong; @@ -890,7 +890,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, guard(wiphy)(&rdev->wiphy); - return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); + return rdev_set_tx_power(rdev, wdev, -1, type, DBM_TO_MBM(dbm)); } static int cfg80211_wext_giwtxpower(struct net_device *dev, @@ -910,7 +910,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, return -EOPNOTSUPP; scoped_guard(wiphy, &rdev->wiphy) { - err = rdev_get_tx_power(rdev, wdev, 0, &val); + err = rdev_get_tx_power(rdev, wdev, -1, 0, &val); } if (err) return err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 1f8ae9f4a3f1..655d1e0ae25f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -891,7 +891,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, if (sk->sk_state != TCP_LISTEN) goto out2; - rc = x25_wait_for_data(sk, sk->sk_rcvtimeo); + rc = x25_wait_for_data(sk, READ_ONCE(sk->sk_rcvtimeo)); if (rc) goto out2; skb = skb_dequeue(&sk->sk_receive_queue); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 72c000c0ae5f..9c3acecc14b1 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -34,7 +34,7 @@ #include "xsk.h" #define TX_BATCH_SIZE 32 -#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) +#define MAX_PER_SOCKET_BUDGET 32 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { @@ -300,6 +300,13 @@ static bool xsk_tx_writeable(struct xdp_sock *xs) return true; } +static void __xsk_tx_release(struct xdp_sock *xs) +{ + __xskq_cons_release(xs->tx); + if (xsk_tx_writeable(xs)) + xs->sk.sk_write_space(&xs->sk); +} + static bool xsk_is_bound(struct xdp_sock *xs) { if (READ_ONCE(xs->state) == XSK_BOUND) { @@ -407,11 +414,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool) struct xdp_sock *xs; rcu_read_lock(); - list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { - __xskq_cons_release(xs->tx); - if (xsk_tx_writeable(xs)) - xs->sk.sk_write_space(&xs->sk); - } + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) + __xsk_tx_release(xs); rcu_read_unlock(); } EXPORT_SYMBOL(xsk_tx_release); @@ -779,10 +783,10 @@ free_err: static int __xsk_generic_xmit(struct sock *sk) { struct xdp_sock *xs = xdp_sk(sk); - u32 max_batch = TX_BATCH_SIZE; bool sent_frame = false; struct xdp_desc desc; struct sk_buff *skb; + u32 max_batch; int err = 0; mutex_lock(&xs->mutex); @@ -796,6 +800,7 @@ static int __xsk_generic_xmit(struct sock *sk) if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; + max_batch = READ_ONCE(xs->max_tx_budget); while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) { if (max_batch-- == 0) { err = -EAGAIN; @@ -858,8 +863,7 @@ static int __xsk_generic_xmit(struct sock *sk) out: if (sent_frame) - if (xsk_tx_writeable(xs)) - sk->sk_write_space(sk); + __xsk_tx_release(xs); mutex_unlock(&xs->mutex); return err; @@ -1437,6 +1441,21 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, mutex_unlock(&xs->mutex); return err; } + case XDP_MAX_TX_SKB_BUDGET: + { + unsigned int budget; + + if (optlen != sizeof(budget)) + return -EINVAL; + if (copy_from_sockptr(&budget, optval, sizeof(budget))) + return -EFAULT; + if (!xs->tx || + budget < TX_BATCH_SIZE || budget > xs->tx->nentries) + return -EACCES; + + WRITE_ONCE(xs->max_tx_budget, budget); + return 0; + } default: break; } @@ -1734,6 +1753,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs = xdp_sk(sk); xs->state = XSK_READY; + xs->max_tx_budget = TX_BATCH_SIZE; mutex_init(&xs->mutex); INIT_LIST_HEAD(&xs->map_list); diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 09dcea0cbbed..0e0bca031c03 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -119,7 +119,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, if ((req->xdiag_show & XDP_SHOW_INFO) && nla_put_u32(nlskb, XDP_DIAG_UID, - from_kuid_munged(user_ns, sock_i_uid(sk)))) + from_kuid_munged(user_ns, sk_uid(sk)))) goto out_nlmsg_trim; if ((req->xdiag_show & XDP_SHOW_RING_CFG) && diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 094d2454602e..c5035a9bc3bb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3925,7 +3925,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ - if (dst->obsolete < 0 && !stale_bundle(dst)) + if (READ_ONCE(dst->obsolete) < 0 && !stale_bundle(dst)) return dst; return NULL; @@ -3953,7 +3953,7 @@ static void xfrm_link_failure(struct sk_buff *skb) static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { - if (dst->obsolete) + if (READ_ONCE(dst->obsolete)) sk_dst_reset(sk); } |