diff options
Diffstat (limited to 'net')
77 files changed, 954 insertions, 578 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index fda3a80e9340..2b74ed56eb16 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -193,6 +193,8 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; + netdev_update_features(dev); + return 0; out_unregister_netdev: diff --git a/net/atm/common.c b/net/atm/common.c index 881c7f259dbd..c4edc1111bf0 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -881,7 +881,7 @@ out_atmproc_exit: out_atmsvc_exit: atmsvc_exit(); out_atmpvc_exit: - atmsvc_exit(); + atmpvc_exit(); out_unregister_vcc_proto: proto_unregister(&vcc_proto); goto out; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index f0c862091bff..2c21ae8abadc 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -53,6 +53,11 @@ static bool enable_6lowpan; static struct l2cap_chan *listen_chan; static DEFINE_MUTEX(set_lock); +enum { + LOWPAN_PEER_CLOSING, + LOWPAN_PEER_MAXBITS +}; + struct lowpan_peer { struct list_head list; struct rcu_head rcu; @@ -61,6 +66,8 @@ struct lowpan_peer { /* peer addresses in various formats */ unsigned char lladdr[ETH_ALEN]; struct in6_addr peer_addr; + + DECLARE_BITMAP(flags, LOWPAN_PEER_MAXBITS); }; struct lowpan_btle_dev { @@ -289,6 +296,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->pkt_type = PACKET_HOST; local_skb->dev = dev; + skb_reset_mac_header(local_skb); skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { @@ -919,7 +927,9 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type) BT_DBG("peer %p chan %p", peer, peer->chan); + l2cap_chan_lock(peer->chan); l2cap_chan_close(peer->chan, ENOENT); + l2cap_chan_unlock(peer->chan); return 0; } @@ -956,10 +966,11 @@ static struct l2cap_chan *bt_6lowpan_listen(void) } static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, - struct l2cap_conn **conn) + struct l2cap_conn **conn, bool disconnect) { struct hci_conn *hcon; struct hci_dev *hdev; + int le_addr_type; int n; n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", @@ -970,13 +981,32 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, if (n < 7) return -EINVAL; + if (disconnect) { + /* The "disconnect" debugfs command has used different address + * type constants than "connect" since 2015. Let's retain that + * for now even though it's obviously buggy... + */ + *addr_type += 1; + } + + switch (*addr_type) { + case BDADDR_LE_PUBLIC: + le_addr_type = ADDR_LE_DEV_PUBLIC; + break; + case BDADDR_LE_RANDOM: + le_addr_type = ADDR_LE_DEV_RANDOM; + break; + default: + return -EINVAL; + } + /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); if (!hdev) return -ENOENT; hci_dev_lock(hdev); - hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); + hcon = hci_conn_hash_lookup_le(hdev, addr, le_addr_type); hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -993,41 +1023,52 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, static void disconnect_all_peers(void) { struct lowpan_btle_dev *entry; - struct lowpan_peer *peer, *tmp_peer, *new_peer; - struct list_head peers; - - INIT_LIST_HEAD(&peers); + struct lowpan_peer *peer; + int nchans; - /* We make a separate list of peers as the close_cb() will - * modify the device peers list so it is better not to mess - * with the same list at the same time. + /* l2cap_chan_close() cannot be called from RCU, and lock ordering + * chan->lock > devices_lock prevents taking write side lock, so copy + * then close. */ rcu_read_lock(); + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) + list_for_each_entry_rcu(peer, &entry->peers, list) + clear_bit(LOWPAN_PEER_CLOSING, peer->flags); + rcu_read_unlock(); - list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { - list_for_each_entry_rcu(peer, &entry->peers, list) { - new_peer = kmalloc(sizeof(*new_peer), GFP_ATOMIC); - if (!new_peer) - break; + do { + struct l2cap_chan *chans[32]; + int i; - new_peer->chan = peer->chan; - INIT_LIST_HEAD(&new_peer->list); + nchans = 0; - list_add(&new_peer->list, &peers); - } - } + spin_lock(&devices_lock); - rcu_read_unlock(); + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { + list_for_each_entry_rcu(peer, &entry->peers, list) { + if (test_and_set_bit(LOWPAN_PEER_CLOSING, + peer->flags)) + continue; - spin_lock(&devices_lock); - list_for_each_entry_safe(peer, tmp_peer, &peers, list) { - l2cap_chan_close(peer->chan, ENOENT); + l2cap_chan_hold(peer->chan); + chans[nchans++] = peer->chan; - list_del_rcu(&peer->list); - kfree_rcu(peer, rcu); - } - spin_unlock(&devices_lock); + if (nchans >= ARRAY_SIZE(chans)) + goto done; + } + } + +done: + spin_unlock(&devices_lock); + + for (i = 0; i < nchans; ++i) { + l2cap_chan_lock(chans[i]); + l2cap_chan_close(chans[i], ENOENT); + l2cap_chan_unlock(chans[i]); + l2cap_chan_put(chans[i]); + } + } while (nchans); } struct set_enable { @@ -1050,7 +1091,9 @@ static void do_enable_set(struct work_struct *work) mutex_lock(&set_lock); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); } @@ -1103,13 +1146,15 @@ static ssize_t lowpan_control_write(struct file *fp, buf[buf_size] = '\0'; if (memcmp(buf, "connect ", 8) == 0) { - ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn); + ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn, false); if (ret == -EINVAL) return ret; mutex_lock(&set_lock); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); listen_chan = NULL; } @@ -1140,7 +1185,7 @@ static ssize_t lowpan_control_write(struct file *fp, } if (memcmp(buf, "disconnect ", 11) == 0) { - ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn); + ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn, true); if (ret < 0) return ret; @@ -1271,7 +1316,9 @@ static void __exit bt_6lowpan_exit(void) debugfs_remove(lowpan_control_debugfs); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c5dedf39a129..6fc0692abf05 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -769,21 +769,23 @@ static void find_bis(struct hci_conn *conn, void *data) d->count++; } -static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn) +static int hci_le_big_terminate(struct hci_dev *hdev, struct hci_conn *conn) { struct iso_list_data *d; int ret; - bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle); + bt_dev_dbg(hdev, "hcon %p big 0x%2.2x sync_handle 0x%4.4x", conn, + conn->iso_qos.bcast.big, conn->sync_handle); d = kzalloc(sizeof(*d), GFP_KERNEL); if (!d) return -ENOMEM; - d->big = big; + d->big = conn->iso_qos.bcast.big; d->sync_handle = conn->sync_handle; - if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { + if (conn->type == PA_LINK && + test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { hci_conn_hash_list_flag(hdev, find_bis, PA_LINK, HCI_CONN_PA_SYNC, d); @@ -801,6 +803,9 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c d->big_sync_term = true; } + if (!d->pa_sync_term && !d->big_sync_term) + return 0; + ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); if (ret) @@ -852,8 +857,7 @@ static void bis_cleanup(struct hci_conn *conn) hci_le_terminate_big(hdev, conn); } else { - hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, - conn); + hci_le_big_terminate(hdev, conn); } } @@ -994,19 +998,20 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; case CIS_LINK: - case BIS_LINK: - case PA_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); - /* set proper cleanup function */ - if (!bacmp(dst, BDADDR_ANY)) - conn->cleanup = bis_cleanup; - else if (conn->role == HCI_ROLE_MASTER) + if (conn->role == HCI_ROLE_MASTER) conn->cleanup = cis_cleanup; - conn->mtu = hdev->iso_mtu ? hdev->iso_mtu : - hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; + conn->mtu = hdev->iso_mtu; + break; + case PA_LINK: + case BIS_LINK: + /* conn->src should reflect the local identity address */ + hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + conn->cleanup = bis_cleanup; + conn->mtu = hdev->iso_mtu; break; case SCO_LINK: if (lmp_esco_capable(hdev)) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3418d7b964a1..8ccec73dce45 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3832,13 +3832,14 @@ static void hci_tx_work(struct work_struct *work) static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_acl_hdr *hdr; - struct hci_conn *conn; __u16 handle, flags; + int err; hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) { bt_dev_err(hdev, "ACL packet too small"); - goto drop; + kfree_skb(skb); + return; } handle = __le16_to_cpu(hdr->handle); @@ -3850,36 +3851,27 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) hdev->stat.acl_rx++; - hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); - hci_dev_unlock(hdev); - - if (conn) { - hci_conn_enter_active_mode(conn, BT_POWER_FORCE_ACTIVE_OFF); - - /* Send to upper protocol */ - l2cap_recv_acldata(conn, skb, flags); - return; - } else { + err = l2cap_recv_acldata(hdev, handle, skb, flags); + if (err == -ENOENT) bt_dev_err(hdev, "ACL packet for unknown connection handle %d", handle); - } - -drop: - kfree_skb(skb); + else if (err) + bt_dev_dbg(hdev, "ACL packet recv for handle %d failed: %d", + handle, err); } /* SCO data packet */ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_sco_hdr *hdr; - struct hci_conn *conn; __u16 handle, flags; + int err; hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) { bt_dev_err(hdev, "SCO packet too small"); - goto drop; + kfree_skb(skb); + return; } handle = __le16_to_cpu(hdr->handle); @@ -3891,34 +3883,28 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) hdev->stat.sco_rx++; - hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); - hci_dev_unlock(hdev); + hci_skb_pkt_status(skb) = flags & 0x03; - if (conn) { - /* Send to upper protocol */ - hci_skb_pkt_status(skb) = flags & 0x03; - sco_recv_scodata(conn, skb); - return; - } else { + err = sco_recv_scodata(hdev, handle, skb); + if (err == -ENOENT) bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d", handle); - } - -drop: - kfree_skb(skb); + else if (err) + bt_dev_dbg(hdev, "SCO packet recv for handle %d failed: %d", + handle, err); } static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_iso_hdr *hdr; - struct hci_conn *conn; __u16 handle, flags; + int err; hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) { bt_dev_err(hdev, "ISO packet too small"); - goto drop; + kfree_skb(skb); + return; } handle = __le16_to_cpu(hdr->handle); @@ -3928,22 +3914,13 @@ static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb) bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len, handle, flags); - hci_dev_lock(hdev); - conn = hci_conn_hash_lookup_handle(hdev, handle); - hci_dev_unlock(hdev); - - if (!conn) { + err = iso_recv(hdev, handle, skb, flags); + if (err == -ENOENT) bt_dev_err(hdev, "ISO packet for unknown connection handle %d", handle); - goto drop; - } - - /* Send to upper protocol */ - iso_recv(conn, skb, flags); - return; - -drop: - kfree_skb(skb); + else if (err) + bt_dev_dbg(hdev, "ISO packet recv for handle %d failed: %d", + handle, err); } static bool hci_req_is_complete(struct hci_dev *hdev) @@ -4121,7 +4098,7 @@ static void hci_rx_work(struct work_struct *work) } } -static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) +static int hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) { int err; @@ -4133,16 +4110,19 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) if (!hdev->sent_cmd) { skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); - return; + return -EINVAL; } if (hci_skb_opcode(skb) != HCI_OP_NOP) { err = hci_send_frame(hdev, skb); if (err < 0) { hci_cmd_sync_cancel_sync(hdev, -err); - return; + return err; } atomic_dec(&hdev->cmd_cnt); + } else { + err = -ENODATA; + kfree_skb(skb); } if (hdev->req_status == HCI_REQ_PEND && @@ -4150,12 +4130,15 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); } + + return err; } static void hci_cmd_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); struct sk_buff *skb; + int err; BT_DBG("%s cmd_cnt %d cmd queued %d", hdev->name, atomic_read(&hdev->cmd_cnt), skb_queue_len(&hdev->cmd_q)); @@ -4166,7 +4149,9 @@ static void hci_cmd_work(struct work_struct *work) if (!skb) return; - hci_send_cmd_sync(hdev, skb); + err = hci_send_cmd_sync(hdev, skb); + if (err) + return; rcu_read_lock(); if (test_bit(HCI_RESET, &hdev->flags) || diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d37db364acf7..3838b90343d9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4218,6 +4218,13 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, } if (i == ARRAY_SIZE(hci_cc_table)) { + if (!skb->len) { + bt_dev_err(hdev, "Unexpected cc 0x%4.4x with no status", + *opcode); + *status = HCI_ERROR_UNSPECIFIED; + return; + } + /* Unknown opcode, assume byte 0 contains the status, so * that e.g. __hci_cmd_sync() properly returns errors * for vendor specific commands send by HCI drivers. @@ -5836,6 +5843,29 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, void *data, le16_to_cpu(ev->supervision_timeout)); } +static void hci_le_pa_sync_lost_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_ev_le_pa_sync_lost *ev = data; + u16 handle = le16_to_cpu(ev->handle); + struct hci_conn *conn; + + bt_dev_dbg(hdev, "sync handle 0x%4.4x", handle); + + hci_dev_lock(hdev); + + /* Delete the pa sync connection */ + conn = hci_conn_hash_lookup_pa_sync_handle(hdev, handle); + if (conn) { + clear_bit(HCI_CONN_BIG_SYNC, &conn->flags); + clear_bit(HCI_CONN_PA_SYNC, &conn->flags); + hci_disconn_cfm(conn, HCI_ERROR_REMOTE_USER_TERM); + hci_conn_del(conn); + } + + hci_dev_unlock(hdev); +} + static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -6994,14 +7024,9 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, continue; } - if (ev->status != 0x42) { + if (ev->status != 0x42) /* Mark PA sync as established */ set_bit(HCI_CONN_PA_SYNC, &bis->flags); - /* Reset cleanup callback of PA Sync so it doesn't - * terminate the sync when deleting the connection. - */ - conn->cleanup = NULL; - } bis->sync_handle = conn->sync_handle; bis->iso_qos.bcast.big = ev->handle; @@ -7044,29 +7069,24 @@ static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_evt_le_big_sync_lost *ev = data; - struct hci_conn *bis, *conn; - bool mgmt_conn; + struct hci_conn *bis; + bool mgmt_conn = false; bt_dev_dbg(hdev, "big handle 0x%2.2x", ev->handle); hci_dev_lock(hdev); - /* Delete the pa sync connection */ - bis = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle); - if (bis) { - conn = hci_conn_hash_lookup_pa_sync_handle(hdev, - bis->sync_handle); - if (conn) - hci_conn_del(conn); - } - /* Delete each bis connection */ while ((bis = hci_conn_hash_lookup_big_state(hdev, ev->handle, BT_CONNECTED, HCI_ROLE_SLAVE))) { - mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &bis->flags); - mgmt_device_disconnected(hdev, &bis->dst, bis->type, bis->dst_type, - ev->reason, mgmt_conn); + if (!mgmt_conn) { + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, + &bis->flags); + mgmt_device_disconnected(hdev, &bis->dst, bis->type, + bis->dst_type, ev->reason, + mgmt_conn); + } clear_bit(HCI_CONN_BIG_SYNC, &bis->flags); hci_disconn_cfm(bis, ev->reason); @@ -7180,6 +7200,9 @@ static const struct hci_le_ev { hci_le_per_adv_report_evt, sizeof(struct hci_ev_le_per_adv_report), HCI_MAX_EVENT_SIZE), + /* [0x10 = HCI_EV_LE_PA_SYNC_LOST] */ + HCI_LE_EV(HCI_EV_LE_PA_SYNC_LOST, hci_le_pa_sync_lost_evt, + sizeof(struct hci_ev_le_pa_sync_lost)), /* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */ HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt, sizeof(struct hci_evt_le_ext_adv_set_term)), diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index fc866759910d..ad19022ae127 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1311,7 +1311,9 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, goto done; } + hci_dev_lock(hdev); mgmt_index_removed(hdev); + hci_dev_unlock(hdev); err = hci_dev_open(hdev->id); if (err) { diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 73fc41b68b68..6e76798ec786 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6999,7 +6999,7 @@ static void create_pa_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); - if (!hci_conn_valid(hdev, conn)) + if (hci_conn_valid(hdev, conn)) clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); if (!err) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 3d98cb6291da..616c2fef91d2 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2314,14 +2314,31 @@ static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason) iso_conn_del(hcon, bt_to_errno(reason)); } -void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) +int iso_recv(struct hci_dev *hdev, u16 handle, struct sk_buff *skb, u16 flags) { - struct iso_conn *conn = hcon->iso_data; + struct hci_conn *hcon; + struct iso_conn *conn; struct skb_shared_hwtstamps *hwts; __u16 pb, ts, len, sn; - if (!conn) - goto drop; + hci_dev_lock(hdev); + + hcon = hci_conn_hash_lookup_handle(hdev, handle); + if (!hcon) { + hci_dev_unlock(hdev); + kfree_skb(skb); + return -ENOENT; + } + + conn = iso_conn_hold_unless_zero(hcon->iso_data); + hcon = NULL; + + hci_dev_unlock(hdev); + + if (!conn) { + kfree_skb(skb); + return -EINVAL; + } pb = hci_iso_flags_pb(flags); ts = hci_iso_flags_ts(flags); @@ -2377,7 +2394,7 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) hci_skb_pkt_status(skb) = flags & 0x03; hci_skb_pkt_seqnum(skb) = sn; iso_recv_frame(conn, skb); - return; + goto done; } if (pb == ISO_SINGLE) { @@ -2455,6 +2472,9 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) drop: kfree_skb(skb); +done: + iso_conn_put(conn); + return 0; } static struct hci_cb iso_cb = { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d08320380ad6..07b493331fd7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -497,6 +497,7 @@ void l2cap_chan_hold(struct l2cap_chan *c) kref_get(&c->kref); } +EXPORT_SYMBOL_GPL(l2cap_chan_hold); struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) { @@ -7509,13 +7510,24 @@ struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *c) return c; } -void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) +int l2cap_recv_acldata(struct hci_dev *hdev, u16 handle, + struct sk_buff *skb, u16 flags) { + struct hci_conn *hcon; struct l2cap_conn *conn; int len; - /* Lock hdev to access l2cap_data to avoid race with l2cap_conn_del */ - hci_dev_lock(hcon->hdev); + /* Lock hdev for hci_conn, and race on l2cap_data vs. l2cap_conn_del */ + hci_dev_lock(hdev); + + hcon = hci_conn_hash_lookup_handle(hdev, handle); + if (!hcon) { + hci_dev_unlock(hdev); + kfree_skb(skb); + return -ENOENT; + } + + hci_conn_enter_active_mode(hcon, BT_POWER_FORCE_ACTIVE_OFF); conn = hcon->l2cap_data; @@ -7523,12 +7535,13 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) conn = l2cap_conn_add(hcon); conn = l2cap_conn_hold_unless_zero(conn); + hcon = NULL; - hci_dev_unlock(hcon->hdev); + hci_dev_unlock(hdev); if (!conn) { kfree_skb(skb); - return; + return -EINVAL; } BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags); @@ -7642,6 +7655,7 @@ drop: unlock: mutex_unlock(&conn->lock); l2cap_conn_put(conn); + return 0; } static struct hci_cb l2cap_cb = { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 24e335e3a727..262bf984d2aa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5395,9 +5395,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_EXT_AD_LENGTH || - length > HCI_MAX_EXT_AD_LENGTH || - (offset + length) > HCI_MAX_EXT_AD_LENGTH) + if (offset >= HCI_MAX_AD_LENGTH || + length > HCI_MAX_AD_LENGTH || + (offset + length) > HCI_MAX_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); @@ -9497,6 +9497,7 @@ void mgmt_index_removed(struct hci_dev *hdev) cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->service_cache); cancel_delayed_work_sync(&hdev->rpa_expired); + cancel_delayed_work_sync(&hdev->mesh_send_done); } void mgmt_power_on(struct hci_dev *hdev, int err) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ab0cf442d57b..298c2a9ab4df 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1458,22 +1458,39 @@ static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) sco_conn_del(hcon, bt_to_errno(reason)); } -void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb) +int sco_recv_scodata(struct hci_dev *hdev, u16 handle, struct sk_buff *skb) { - struct sco_conn *conn = hcon->sco_data; + struct hci_conn *hcon; + struct sco_conn *conn; - if (!conn) - goto drop; + hci_dev_lock(hdev); + + hcon = hci_conn_hash_lookup_handle(hdev, handle); + if (!hcon) { + hci_dev_unlock(hdev); + kfree_skb(skb); + return -ENOENT; + } + + conn = sco_conn_hold_unless_zero(hcon->sco_data); + hcon = NULL; + + hci_dev_unlock(hdev); + + if (!conn) { + kfree_skb(skb); + return -EINVAL; + } BT_DBG("conn %p len %u", conn, skb->len); - if (skb->len) { + if (skb->len) sco_recv_frame(conn, skb); - return; - } + else + kfree_skb(skb); -drop: - kfree_skb(skb); + sco_conn_put(conn); + return 0; } static struct hci_cb sco_cb = { diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 45512b2ba951..3a1ce04a7a53 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2136,7 +2136,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_chan *smp = chan->data; struct hci_conn *hcon = conn->hcon; u8 *pkax, *pkbx, *na, *nb, confirm_hint; - u32 passkey; + u32 passkey = 0; int err; bt_dev_dbg(hcon->hdev, "conn %p", conn); @@ -2188,24 +2188,6 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); - - /* Only Just-Works pairing requires extra checks */ - if (smp->method != JUST_WORKS) - goto mackey_and_ltk; - - /* If there already exists long term key in local host, leave - * the decision to user space since the remote device could - * be legitimate or malicious. - */ - if (hci_find_ltk(hcon->hdev, &hcon->dst, hcon->dst_type, - hcon->role)) { - /* Set passkey to 0. The value can be any number since - * it'll be ignored anyway. - */ - passkey = 0; - confirm_hint = 1; - goto confirm; - } } mackey_and_ltk: @@ -2226,11 +2208,12 @@ mackey_and_ltk: if (err) return SMP_UNSPECIFIED; - confirm_hint = 0; - -confirm: - if (smp->method == JUST_WORKS) - confirm_hint = 1; + /* Always require user confirmation for Just-Works pairing to prevent + * impersonation attacks, or in case of a legitimate device that is + * repairing use the confirmation as acknowledgment to proceed with the + * creation of new keys. + */ + confirm_hint = smp->method == JUST_WORKS ? 1 : 0; err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, hcon->type, hcon->dst_type, passkey, confirm_hint); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 870bdf2e082c..dea09096ad0f 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -25,7 +25,7 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) && + (br_mst_is_enabled(p) || p->state == BR_STATE_FORWARDING) && br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 98c5b9c3145f..ca3a637d7cca 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,6 +386,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_mst_uninit(br); br_recalculate_neigh_suppress_enabled(br); br_fdb_delete_by_port(br, NULL, 0, 1); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 67b4c905e49a..777fa869c1a1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -94,7 +94,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb br = p->br; - if (br_mst_is_enabled(br)) { + if (br_mst_is_enabled(p)) { state = BR_STATE_FORWARDING; } else { if (p->state == BR_STATE_DISABLED) { @@ -429,7 +429,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; forward: - if (br_mst_is_enabled(p->br)) + if (br_mst_is_enabled(p)) goto defer_stp_filtering; switch (p->state) { diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3f24b4ee49c2..43a300ae6bfa 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -22,6 +22,12 @@ bool br_mst_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_mst_enabled); +void br_mst_uninit(struct net_bridge *br) +{ + if (br_opt_get(br, BROPT_MST_ENABLED)) + static_branch_dec(&br_mst_used); +} + int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids) { const struct net_bridge_vlan_group *vg; @@ -225,9 +231,9 @@ int br_mst_set_enabled(struct net_bridge *br, bool on, return err; if (on) - static_branch_enable(&br_mst_used); + static_branch_inc(&br_mst_used); else - static_branch_disable(&br_mst_used); + static_branch_dec(&br_mst_used); br_opt_toggle(br, BROPT_MST_ENABLED, on); return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 16be5d250402..7280c4e9305f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1935,10 +1935,12 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) /* br_mst.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING DECLARE_STATIC_KEY_FALSE(br_mst_used); -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { + /* check the port's vlan group to avoid racing with port deletion */ return static_branch_unlikely(&br_mst_used) && - br_opt_get(br, BROPT_MST_ENABLED); + br_opt_get(p->br, BROPT_MST_ENABLED) && + rcu_access_pointer(p->vlgrp); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, @@ -1952,8 +1954,9 @@ int br_mst_fill_info(struct sk_buff *skb, const struct net_bridge_vlan_group *vg); int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); +void br_mst_uninit(struct net_bridge *br); #else -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { return false; } @@ -1987,6 +1990,10 @@ static inline int br_mst_process(struct net_bridge_port *p, { return -EOPNOTSUPP; } + +static inline void br_mst_uninit(struct net_bridge *br) +{ +} #endif struct nf_br_ops { diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index b71b1635916e..a21c157daf7d 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -631,6 +631,7 @@ static int handle_auth_session_key(struct ceph_auth_client *ac, u64 global_id, /* connection secret */ ceph_decode_32_safe(p, end, len, e_inval); + ceph_decode_need(p, end, len, e_inval); dout("%s connection secret blob len %d\n", __func__, len); if (len > 0) { dp = *p + ceph_x_encrypt_offset(); @@ -648,6 +649,7 @@ static int handle_auth_session_key(struct ceph_auth_client *ac, u64 global_id, /* service tickets */ ceph_decode_32_safe(p, end, len, e_inval); + ceph_decode_need(p, end, len, e_inval); dout("%s service tickets blob len %d\n", __func__, len); if (len > 0) { ret = ceph_x_proc_ticket_reply(ac, &th->session_key, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4c6441536d55..e734e57be083 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -786,41 +786,52 @@ void ceph_reset_client_addr(struct ceph_client *client) EXPORT_SYMBOL(ceph_reset_client_addr); /* - * true if we have the mon map (and have thus joined the cluster) - */ -static bool have_mon_and_osd_map(struct ceph_client *client) -{ - return client->monc.monmap && client->monc.monmap->epoch && - client->osdc.osdmap && client->osdc.osdmap->epoch; -} - -/* * mount: join the ceph cluster, and open root directory. */ -int __ceph_open_session(struct ceph_client *client, unsigned long started) +int __ceph_open_session(struct ceph_client *client) { - unsigned long timeout = client->options->mount_timeout; - long err; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + long timeout = ceph_timeout_jiffies(client->options->mount_timeout); + bool have_monmap, have_osdmap; + int err; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); if (err < 0) return err; - while (!have_mon_and_osd_map(client)) { - if (timeout && time_after_eq(jiffies, started + timeout)) - return -ETIMEDOUT; + add_wait_queue(&client->auth_wq, &wait); + for (;;) { + mutex_lock(&client->monc.mutex); + err = client->auth_err; + have_monmap = client->monc.monmap && client->monc.monmap->epoch; + mutex_unlock(&client->monc.mutex); + + down_read(&client->osdc.lock); + have_osdmap = client->osdc.osdmap && client->osdc.osdmap->epoch; + up_read(&client->osdc.lock); + + if (err || (have_monmap && have_osdmap)) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + if (!timeout) { + err = -ETIMEDOUT; + break; + } /* wait */ dout("mount waiting for mon_map\n"); - err = wait_event_interruptible_timeout(client->auth_wq, - have_mon_and_osd_map(client) || (client->auth_err < 0), - ceph_timeout_jiffies(timeout)); - if (err < 0) - return err; - if (client->auth_err < 0) - return client->auth_err; + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); } + remove_wait_queue(&client->auth_wq, &wait); + + if (err) + return err; pr_info("client%llu fsid %pU\n", ceph_client_gid(client), &client->fsid); @@ -833,12 +844,11 @@ EXPORT_SYMBOL(__ceph_open_session); int ceph_open_session(struct ceph_client *client) { int ret; - unsigned long started = jiffies; /* note the start time */ dout("open_session start\n"); mutex_lock(&client->mount_mutex); - ret = __ceph_open_session(client, started); + ret = __ceph_open_session(client); mutex_unlock(&client->mount_mutex); return ret; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 2110439f8a24..83c270bce63c 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -36,8 +36,9 @@ static int monmap_show(struct seq_file *s, void *p) int i; struct ceph_client *client = s->private; + mutex_lock(&client->monc.mutex); if (client->monc.monmap == NULL) - return 0; + goto out_unlock; seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); for (i = 0; i < client->monc.monmap->num_mon; i++) { @@ -48,6 +49,9 @@ static int monmap_show(struct seq_file *s, void *p) ENTITY_NAME(inst->name), ceph_pr_addr(&inst->addr)); } + +out_unlock: + mutex_unlock(&client->monc.mutex); return 0; } @@ -56,13 +60,14 @@ static int osdmap_show(struct seq_file *s, void *p) int i; struct ceph_client *client = s->private; struct ceph_osd_client *osdc = &client->osdc; - struct ceph_osdmap *map = osdc->osdmap; + struct ceph_osdmap *map; struct rb_node *n; + down_read(&osdc->lock); + map = osdc->osdmap; if (map == NULL) - return 0; + goto out_unlock; - down_read(&osdc->lock); seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch, osdc->epoch_barrier, map->flags); @@ -131,6 +136,7 @@ static int osdmap_show(struct seq_file *s, void *p) seq_printf(s, "]\n"); } +out_unlock: up_read(&osdc->lock); return 0; } diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 9e39378eda00..9e48623018a3 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1061,13 +1061,16 @@ static int decrypt_control_remainder(struct ceph_connection *con) static int process_v2_sparse_read(struct ceph_connection *con, struct page **pages, int spos) { - struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor; + struct ceph_msg_data_cursor cursor; int ret; + ceph_msg_data_cursor_init(&cursor, con->in_msg, + con->in_msg->sparse_read_total); + for (;;) { char *buf = NULL; - ret = con->ops->sparse_read(con, cursor, &buf); + ret = con->ops->sparse_read(con, &cursor, &buf); if (ret <= 0) return ret; @@ -1085,11 +1088,11 @@ static int process_v2_sparse_read(struct ceph_connection *con, } else { struct bio_vec bv; - get_bvec_at(cursor, &bv); + get_bvec_at(&cursor, &bv); len = min_t(int, len, bv.bv_len); memcpy_page(bv.bv_page, bv.bv_offset, spage, soff, len); - ceph_msg_data_advance(cursor, len); + ceph_msg_data_advance(&cursor, len); } spos += len; ret -= len; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 295098873861..d245fa508e1c 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1504,8 +1504,6 @@ static int decode_new_primary_temp(void **p, void *end, u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd) { - BUG_ON(osd >= map->max_osd); - if (!map->osd_primary_affinity) return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; @@ -1514,8 +1512,6 @@ u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd) static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff) { - BUG_ON(osd >= map->max_osd); - if (!map->osd_primary_affinity) { int i; @@ -1577,6 +1573,8 @@ static int decode_new_primary_affinity(void **p, void *end, ceph_decode_32_safe(p, end, osd, e_inval); ceph_decode_32_safe(p, end, aff, e_inval); + if (osd >= map->max_osd) + goto e_inval; ret = set_primary_affinity(map, osd, aff); if (ret) @@ -1879,7 +1877,9 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, ceph_decode_need(p, end, 2*sizeof(u32), e_inval); osd = ceph_decode_32(p); w = ceph_decode_32(p); - BUG_ON(osd >= map->max_osd); + if (osd >= map->max_osd) + goto e_inval; + osdmap_info(map, "osd%d weight 0x%x %s\n", osd, w, w == CEPH_OSD_IN ? "(in)" : (w == CEPH_OSD_OUT ? "(out)" : "")); @@ -1905,13 +1905,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, u32 xorstate; osd = ceph_decode_32(p); + if (osd >= map->max_osd) + goto e_inval; + if (struct_v >= 5) xorstate = ceph_decode_32(p); else xorstate = ceph_decode_8(p); if (xorstate == 0) xorstate = CEPH_OSD_UP; - BUG_ON(osd >= map->max_osd); if ((map->osd_state[osd] & CEPH_OSD_UP) && (xorstate & CEPH_OSD_UP)) osdmap_info(map, "osd%d down\n", osd); @@ -1937,7 +1939,9 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, struct ceph_entity_addr addr; osd = ceph_decode_32(p); - BUG_ON(osd >= map->max_osd); + if (osd >= map->max_osd) + goto e_inval; + if (struct_v >= 7) ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr); else diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index ad54b12d4b4c..8bb71a10dba0 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -443,6 +443,9 @@ static int generic_hwtstamp_ioctl_lower(struct net_device *dev, int cmd, struct ifreq ifrr; int err; + if (!kernel_cfg->ifr) + return -EINVAL; + strscpy_pad(ifrr.ifr_name, dev->name, IFNAMSIZ); ifrr.ifr_ifru = kernel_cfg->ifr->ifr_ifru; diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index fd57b845de33..a725d21159a6 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -60,9 +60,10 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; - __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { + __local_lock_nested_bh(&cell->bh_lock); skb = __skb_dequeue(&cell->napi_skbs); + __local_unlock_nested_bh(&cell->bh_lock); if (!skb) break; napi_gro_receive(napi, skb); @@ -71,7 +72,6 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); - __local_unlock_nested_bh(&cell->bh_lock); return work_done; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 60a05d3b7c24..331764845e8f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,19 +228,16 @@ static void refill_skbs(struct netpoll *np) { struct sk_buff_head *skb_pool; struct sk_buff *skb; - unsigned long flags; skb_pool = &np->skb_pool; - spin_lock_irqsave(&skb_pool->lock, flags); - while (skb_pool->qlen < MAX_SKBS) { + while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(skb_pool, skb); + skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) @@ -814,6 +811,10 @@ static void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; + /* At this point, there is a single npinfo instance per netdevice, and + * its refcnt tracks how many netpoll structures are linked to it. We + * only perform npinfo cleanup when the refcnt decrements to zero. + */ if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; @@ -823,8 +824,7 @@ static void __netpoll_cleanup(struct netpoll *np) RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); - } else - RCU_INIT_POINTER(np->dev->npinfo, NULL); + } skb_pool_flush(np); } diff --git a/net/devlink/rate.c b/net/devlink/rate.c index 264fb82cba19..d157a8419bca 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -828,13 +828,15 @@ void devl_rate_nodes_destroy(struct devlink *devlink) if (!devlink_rate->parent) continue; - refcount_dec(&devlink_rate->parent->refcnt); if (devlink_rate_is_leaf(devlink_rate)) ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, NULL); else if (devlink_rate_is_node(devlink_rate)) ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv, NULL, NULL); + + refcount_dec(&devlink_rate->parent->refcnt); + devlink_rate->parent = NULL; } list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) { if (devlink_rate_is_node(devlink_rate)) { diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 26bb657ceac3..eadb358179ce 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -176,7 +176,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); return skb; } @@ -224,12 +225,14 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, { int len = BRCM_LEG_TAG_LEN; int source_port; + __be16 *proto; u8 *brcm_tag; if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) return NULL; brcm_tag = dsa_etype_header_pos_rx(skb); + proto = (__be16 *)(brcm_tag + BRCM_LEG_TAG_LEN); source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; @@ -237,14 +240,19 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - /* VLAN tag is added by BCM63xx internal switch */ - if (netdev_uses_dsa(skb->dev)) + /* The internal switch in BCM63XX SoCs always tags on egress on the CPU + * port. We use VID 0 internally for untagged traffic, so strip the tag + * if the TCI field is all 0, and keep it otherwise to also retain + * e.g. 802.1p tagged packets. + */ + if (proto[0] == htons(ETH_P_8021Q) && proto[1] == 0) len += VLAN_HLEN; /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, len); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); dsa_strip_etype_header(skb, len); diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 081093dfd553..8f9532a15f43 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -259,6 +259,7 @@ static int tls_handshake_accept(struct handshake_req *req, out_cancel: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); out: return ret; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index fbbc3ccf9df6..492cbc78ab75 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -320,6 +320,9 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + skb_reset_mac_len(skb); + set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); @@ -334,7 +337,7 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag->tlv.HSR_TLV_type = type; - /* TODO: Why 12 in HSRv0? */ + /* HSRv0 has 6 unused bytes after the MAC */ hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index c67c0d35921d..339f0d220212 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -262,15 +262,23 @@ static struct sk_buff *prp_fill_rct(struct sk_buff *skb, return skb; } -static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr, +static void hsr_set_path_id(struct hsr_frame_info *frame, + struct hsr_ethhdr *hsr_ethhdr, struct hsr_port *port) { int path_id; - if (port->type == HSR_PT_SLAVE_A) - path_id = 0; - else - path_id = 1; + if (port->hsr->prot_version) { + if (port->type == HSR_PT_SLAVE_A) + path_id = 0; + else + path_id = 1; + } else { + if (frame->is_supervision) + path_id = 0xf; + else + path_id = 1; + } set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id); } @@ -304,7 +312,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, else hsr_ethhdr = (struct hsr_ethhdr *)pc; - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; @@ -330,7 +338,7 @@ struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr); /* set the lane id properly */ - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); return skb_clone(frame->skb_hsr, GFP_ATOMIC); } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { return skb_clone(frame->skb_std, GFP_ATOMIC); diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index e0d94270da28..05828d4cb6cd 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -122,8 +122,10 @@ static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { - __be16 type = x->inner_mode.family == AF_INET6 ? htons(ETH_P_IPV6) - : htons(ETH_P_IP); + const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x, + XFRM_MODE_SKB_CB(skb)->protocol); + __be16 type = inner_mode->family == AF_INET6 ? htons(ETH_P_IPV6) + : htons(ETH_P_IP); return skb_eth_gso_segment(skb, features, type); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6d27d3610c1c..b549d6a57307 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -607,6 +607,11 @@ static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) oldest_p = fnhe_p; } } + + /* Clear oldest->fnhe_daddr to prevent this fnhe from being + * rebound with new dsts in rt_bind_exception(). + */ + oldest->fnhe_daddr = 0; fnhe_flush_routes(oldest); *oldest_p = oldest->fnhe_next; kfree_rcu(oldest, rcu); diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 7b41fb4f00b5..22410243ebe8 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -158,8 +158,10 @@ static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { - __be16 type = x->inner_mode.family == AF_INET ? htons(ETH_P_IP) - : htons(ETH_P_IPV6); + const struct xfrm_mode *inner_mode = xfrm_ip2inner_mode(x, + XFRM_MODE_SKB_CB(skb)->protocol); + __be16 type = inner_mode->family == AF_INET ? htons(ETH_P_IP) + : htons(ETH_P_IPV6); return skb_eth_gso_segment(skb, features, type); } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 369a2f2e459c..0710281dd95a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1246,9 +1246,9 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns else l2tp_build_l2tpv3_header(session, __skb_push(skb, session->hdr_len)); - /* Reset skb netfilter state */ - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); + /* Reset control buffer */ + memset(skb->cb, 0, sizeof(skb->cb)); + nf_reset_ct(skb); /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 57065714cf8c..7f8799fd673e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1290,7 +1290,7 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) &link->csa.finalize_work); break; case NL80211_IFTYPE_STATION: - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 73fd86ec1bce..878c3b14aeb8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -612,11 +612,11 @@ struct ieee80211_if_managed { u8 *assoc_req_ies; size_t assoc_req_ies_len; - struct wiphy_delayed_work ml_reconf_work; + struct wiphy_hrtimer_work ml_reconf_work; u16 removed_links; /* TID-to-link mapping support */ - struct wiphy_delayed_work ttlm_work; + struct wiphy_hrtimer_work ttlm_work; struct ieee80211_adv_ttlm_info ttlm_info; struct wiphy_work teardown_ttlm_work; @@ -1017,10 +1017,10 @@ struct ieee80211_link_data_managed { bool operating_11g_mode; struct { - struct wiphy_delayed_work switch_work; + struct wiphy_hrtimer_work switch_work; struct cfg80211_chan_def ap_chandef; struct ieee80211_parsed_tpe tpe; - unsigned long time; + ktime_t time; bool waiting_bcn; bool ignored_same_chan; bool blocked_tx; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a7873832d4fa..0ca55b9655a7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -223,6 +223,10 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata if (netif_carrier_ok(sdata->dev)) return -EBUSY; + /* if any stations are set known (so they know this vif too), reject */ + if (sta_info_get_by_idx(sdata, 0)) + return -EBUSY; + /* First check no ROC work is happening on this iface */ list_for_each_entry(roc, &local->roc_list, list) { if (roc->sdata != sdata) @@ -242,12 +246,16 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata ret = -EBUSY; } + /* + * More interface types could be added here but changing the + * address while powered makes the most sense in client modes. + */ switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - /* More interface types could be added here but changing the - * address while powered makes the most sense in client modes. - */ + /* refuse while connecting */ + if (sdata->u.mgd.auth_data || sdata->u.mgd.assoc_data) + return -EBUSY; break; default: ret = -EOPNOTSUPP; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d71eabe5abf8..4a19b765ccb6 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -472,10 +472,10 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, * from there. */ if (link->conf->csa_active) - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - - jiffies); + ktime_get_boottime()); } for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b5827ea438e..f3138d158535 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -45,7 +45,7 @@ #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100) +#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS (100 * USEC_PER_MSEC) #define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00 #define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5) @@ -2594,7 +2594,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, return; } - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); } @@ -2753,7 +2753,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, .timestamp = timestamp, .device_timestamp = device_timestamp, }; - unsigned long now; + u32 csa_time_tu; + ktime_t now; int res; lockdep_assert_wiphy(local->hw.wiphy); @@ -2983,10 +2984,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.mode); /* we may have to handle timeout for deactivated link in software */ - now = jiffies; - link->u.mgd.csa.time = now + - TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * - link->conf->beacon_int); + now = ktime_get_boottime(); + csa_time_tu = (max_t(int, csa_ie.count, 1) - 1) * link->conf->beacon_int; + link->u.mgd.csa.time = now + us_to_ktime(ieee80211_tu_to_usec(csa_time_tu)); if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && local->ops->channel_switch) { @@ -3001,7 +3001,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } /* channel switch handled in software */ - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - now); return; @@ -4242,14 +4242,14 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&sdata->u.mgd.ttlm_info, 0, sizeof(sdata->u.mgd.ttlm_info)); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->neg_ttlm_timeout_work); sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); wiphy_work_cancel(sdata->local->hw.wiphy, @@ -6876,7 +6876,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, /* In case the removal was cancelled, abort it */ if (sdata->u.mgd.removed_links) { sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); } return; @@ -6906,9 +6906,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, } sdata->u.mgd.removed_links = removed_links; - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work, - TU_TO_JIFFIES(delay)); + us_to_ktime(ieee80211_tu_to_usec(delay))); } static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, @@ -7095,7 +7095,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, /* if a planned TID-to-link mapping was cancelled - * abort it */ - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); } else if (sdata->u.mgd.ttlm_info.active) { /* if no TID-to-link element, set to default mapping in @@ -7130,7 +7130,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (ttlm_info.switch_time) { u16 beacon_ts_tu, st_tu, delay; - u32 delay_jiffies; + u64 delay_usec; u64 mask; /* The t2l map switch time is indicated with a partial @@ -7152,23 +7152,23 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW) return; - delay_jiffies = TU_TO_JIFFIES(delay); + delay_usec = ieee80211_tu_to_usec(delay); /* Link switching can take time, so schedule it * 100ms before to be ready on time */ - if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) - delay_jiffies -= + if (delay_usec > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) + delay_usec -= IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS; else - delay_jiffies = 0; + delay_usec = 0; sdata->u.mgd.ttlm_info = ttlm_info; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work, - delay_jiffies); + us_to_ktime(delay_usec)); return; } } @@ -8793,7 +8793,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_csa_connection_drop_work); wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work, ieee80211_tdls_peer_del_work); - wiphy_delayed_work_init(&ifmgd->ml_reconf_work, + wiphy_hrtimer_work_init(&ifmgd->ml_reconf_work, ieee80211_ml_reconf_work); wiphy_delayed_work_init(&ifmgd->reconf.wk, ieee80211_ml_sta_reconf_timeout); @@ -8802,7 +8802,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, ieee80211_sta_handle_tspec_ac_params_wk); - wiphy_delayed_work_init(&ifmgd->ttlm_work, + wiphy_hrtimer_work_init(&ifmgd->ttlm_work, ieee80211_tid_to_link_map_work); wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work, ieee80211_neg_ttlm_timeout_work); @@ -8849,7 +8849,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) else link->u.mgd.req_smps = IEEE80211_SMPS_OFF; - wiphy_delayed_work_init(&link->u.mgd.csa.switch_work, + wiphy_hrtimer_work_init(&link->u.mgd.csa.switch_work, ieee80211_csa_switch_work); ieee80211_clear_tpe(&link->conf->tpe); @@ -10064,7 +10064,7 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) &link->u.mgd.request_smps_work); wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); - wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6af43dfefdd6..5b4c3fe9970a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -5360,10 +5360,14 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, if (WARN_ON(!local->started)) goto drop; - if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) { + if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC) && + !(status->flag & RX_FLAG_NO_PSDU && + status->zero_length_psdu_type == + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED))) { /* - * Validate the rate, unless a PLCP error means that - * we probably can't have a valid rate here anyway. + * Validate the rate, unless there was a PLCP error which may + * have an invalid rate or the PSDU was not capture and may be + * missing rate information. */ switch (status->encoding) { diff --git a/net/mctp/route.c b/net/mctp/route.c index 4d314e062ba9..2ac4011a953f 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -623,6 +623,7 @@ static int mctp_dst_output(struct mctp_dst *dst, struct sk_buff *skb) skb->protocol = htons(ETH_P_MCTP); skb->pkt_type = PACKET_OUTGOING; + skb->dev = dst->dev->dev; if (skb->len > dst->mtu) { kfree_skb(skb); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 1103b3341a70..f24ae7d40e88 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -838,8 +838,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, opts->suboptions = 0; + /* Force later mptcp_write_options(), but do not use any actual + * option space. + */ if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb))) - return false; + return true; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) || @@ -1041,6 +1044,31 @@ static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) WRITE_ONCE(msk->snd_una, new_snd_una); } +static void rwin_update(struct mptcp_sock *msk, struct sock *ssk, + struct sk_buff *skb) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct tcp_sock *tp = tcp_sk(ssk); + u64 mptcp_rcv_wnd; + + /* Avoid touching extra cachelines if TCP is going to accept this + * skb without filling the TCP-level window even with a possibly + * outdated mptcp-level rwin. + */ + if (!skb->len || skb->len < tcp_receive_window(tp)) + return; + + mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent); + if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent)) + return; + + /* Some other subflow grew the mptcp-level rwin since rcv_wup, + * resync. + */ + tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent; + subflow->rcv_wnd_sent = mptcp_rcv_wnd; +} + static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) @@ -1208,6 +1236,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) */ if (mp_opt.use_ack) ack_update_msk(msk, sk, &mp_opt); + rwin_update(msk, sk, skb); /* Zero-data-length packets are dropped by the caller and not * propagated to the MPTCP layer, so the skb extension does not @@ -1294,6 +1323,10 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) if (rcv_wnd_new != rcv_wnd_old) { raise_win: + /* The msk-level rcv wnd is after the tcp level one, + * sync the latter. + */ + rcv_wnd_new = rcv_wnd_old; win = rcv_wnd_old - ack_seq; tp->rcv_wnd = min_t(u64, win, U32_MAX); new_win = tp->rcv_wnd; @@ -1317,6 +1350,21 @@ raise_win: update_wspace: WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); + subflow->rcv_wnd_sent = rcv_wnd_new; +} + +static void mptcp_track_rwin(struct tcp_sock *tp) +{ + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk; + + if (!ssk) + return; + + subflow = mptcp_subflow_ctx(ssk); + msk = mptcp_sk(subflow->conn); + WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) @@ -1611,6 +1659,10 @@ mp_rst: opts->reset_transient, opts->reset_reason); return; + } else if (unlikely(!opts->suboptions)) { + /* Fallback to TCP */ + mptcp_track_rwin(tp); + return; } if (OPTION_MPTCP_PRIO & opts->suboptions) { diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 2ff1b9499568..9604b91902b8 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -18,6 +18,7 @@ struct mptcp_pm_add_entry { u8 retrans_times; struct timer_list add_timer; struct mptcp_sock *sock; + struct rcu_head rcu; }; static DEFINE_SPINLOCK(mptcp_pm_list_lock); @@ -155,7 +156,7 @@ bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, entry = mptcp_pm_del_add_timer(msk, addr, false); ret = entry; - kfree(entry); + kfree_rcu(entry, rcu); return ret; } @@ -345,22 +346,27 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; struct sock *sk = (struct sock *)msk; - struct timer_list *add_timer = NULL; + bool stop_timer = false; + + rcu_read_lock(); spin_lock_bh(&msk->pm.lock); entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (entry && (!check_id || entry->addr.id == addr->id)) { entry->retrans_times = ADD_ADDR_RETRANS_MAX; - add_timer = &entry->add_timer; + stop_timer = true; } if (!check_id && entry) list_del(&entry->list); spin_unlock_bh(&msk->pm.lock); - /* no lock, because sk_stop_timer_sync() is calling timer_delete_sync() */ - if (add_timer) - sk_stop_timer_sync(sk, add_timer); + /* Note: entry might have been removed by another thread. + * We hold rcu_read_lock() to ensure it is not freed under us. + */ + if (stop_timer) + sk_stop_timer_sync(sk, &entry->add_timer); + rcu_read_unlock(); return entry; } @@ -415,7 +421,7 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) list_for_each_entry_safe(entry, tmp, &free_list, list) { sk_stop_timer_sync(sk, &entry->add_timer); - kfree(entry); + kfree_rcu(entry, rcu); } } diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index 2ae95476dba3..0a50fd5edc06 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -672,7 +672,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) { - if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + if (rm_id && !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { u8 limit_add_addr_accepted = mptcp_pm_get_limit_add_addr_accepted(msk); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2d6b8de35c44..1e413426deee 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -61,11 +61,13 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) { + unsigned short family = READ_ONCE(sk->sk_family); + #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (sk->sk_prot == &tcpv6_prot) + if (family == AF_INET6) return &inet6_stream_ops; #endif - WARN_ON_ONCE(sk->sk_prot != &tcp_prot); + WARN_ON_ONCE(family != AF_INET); return &inet_stream_ops; } @@ -76,6 +78,13 @@ bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib) if (__mptcp_check_fallback(msk)) return true; + /* The caller possibly is not holding the msk socket lock, but + * in the fallback case only the current subflow is touching + * the OoO queue. + */ + if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) + return false; + spin_lock_bh(&msk->fallback_lock); if (!msk->allow_infinite_fallback) { spin_unlock_bh(&msk->fallback_lock); @@ -935,14 +944,19 @@ static void mptcp_reset_rtx_timer(struct sock *sk) bool mptcp_schedule_work(struct sock *sk) { - if (inet_sk_state_load(sk) != TCP_CLOSE && - schedule_work(&mptcp_sk(sk)->work)) { - /* each subflow already holds a reference to the sk, and the - * workqueue is invoked by a subflow, so sk can't go away here. - */ - sock_hold(sk); + if (inet_sk_state_load(sk) == TCP_CLOSE) + return false; + + /* Get a reference on this socket, mptcp_worker() will release it. + * As mptcp_worker() might complete before us, we can not avoid + * a sock_hold()/sock_put() if schedule_work() returns false. + */ + sock_hold(sk); + + if (schedule_work(&mptcp_sk(sk)->work)) return true; - } + + sock_put(sk); return false; } @@ -2397,7 +2411,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) /* flags for __mptcp_close_ssk() */ #define MPTCP_CF_PUSH BIT(1) -#define MPTCP_CF_FASTCLOSE BIT(2) /* be sure to send a reset only if the caller asked for it, also * clean completely the subflow status when the subflow reaches @@ -2408,7 +2421,7 @@ static void __mptcp_subflow_disconnect(struct sock *ssk, unsigned int flags) { if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || - (flags & MPTCP_CF_FASTCLOSE)) { + subflow->send_fastclose) { /* The MPTCP code never wait on the subflow sockets, TCP-level * disconnect should never fail */ @@ -2455,14 +2468,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); - if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) { - /* be sure to force the tcp_close path - * to generate the egress reset - */ - ssk->sk_lingertime = 0; - sock_set_flag(ssk, SOCK_LINGER); - subflow->send_fastclose = 1; - } + if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE) + tcp_set_state(ssk, TCP_CLOSE); need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { @@ -2558,7 +2565,8 @@ static void __mptcp_close_subflow(struct sock *sk) if (ssk_state != TCP_CLOSE && (ssk_state != TCP_CLOSE_WAIT || - inet_sk_state_load(sk) != TCP_ESTABLISHED)) + inet_sk_state_load(sk) != TCP_ESTABLISHED || + __mptcp_check_fallback(msk))) continue; /* 'subflow_data_ready' will re-sched once rx queue is empty */ @@ -2657,7 +2665,7 @@ static void __mptcp_retrans(struct sock *sk) } if (!mptcp_send_head(sk)) - return; + goto clear_scheduled; goto reset_timer; } @@ -2688,7 +2696,7 @@ static void __mptcp_retrans(struct sock *sk) if (__mptcp_check_fallback(msk)) { spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); - return; + goto clear_scheduled; } while (info.sent < info.limit) { @@ -2720,6 +2728,15 @@ reset_timer: if (!mptcp_rtx_timer_pending(sk)) mptcp_reset_rtx_timer(sk); + +clear_scheduled: + /* If no rtx data was available or in case of fallback, there + * could be left-over scheduled subflows; clear them all + * or later xmit could use bad ones + */ + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->scheduled)) + mptcp_subflow_set_scheduled(subflow, false); } /* schedule the timeout timer for the relevant event: either close timeout @@ -2766,9 +2783,32 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); mptcp_set_state(sk, TCP_CLOSE); - mptcp_for_each_subflow_safe(msk, subflow, tmp) - __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), - subflow, MPTCP_CF_FASTCLOSE); + + /* Explicitly send the fastclose reset as need */ + if (__mptcp_check_fallback(msk)) + return; + + mptcp_for_each_subflow_safe(msk, subflow, tmp) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + + /* Some subflow socket states don't allow/need a reset.*/ + if ((1 << ssk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + goto unlock; + + subflow->send_fastclose = 1; + + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window(), see tcp_disconnect(). + */ + inet_csk(ssk)->icsk_ack.rcv_mss = TCP_MIN_MSS; + + tcp_send_active_reset(ssk, ssk->sk_allocation, + SK_RST_REASON_TCP_ABORT_ON_CLOSE); +unlock: + release_sock(ssk); + } } static void mptcp_worker(struct work_struct *work) @@ -2795,7 +2835,11 @@ static void mptcp_worker(struct work_struct *work) __mptcp_close_subflow(sk); if (mptcp_close_tout_expired(sk)) { + struct mptcp_subflow_context *subflow, *tmp; + mptcp_do_fastclose(sk); + mptcp_for_each_subflow_safe(msk, subflow, tmp) + __mptcp_close_ssk(sk, subflow->tcp_sock, subflow, 0); mptcp_close_wake_up(sk); } @@ -3220,7 +3264,8 @@ static int mptcp_disconnect(struct sock *sk, int flags) /* msk->subflow is still intact, the following will not free the first * subflow */ - mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); + mptcp_do_fastclose(sk); + mptcp_destroy_common(msk); /* The first subflow is already in TCP_CLOSE status, the following * can't overlap with a fallback anymore @@ -3399,7 +3444,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; } -void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) +void mptcp_destroy_common(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; @@ -3408,7 +3453,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) /* join list will be eventually flushed (with rst) at sock lock release time */ mptcp_for_each_subflow_safe(msk, subflow, tmp) - __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); + __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, 0); __skb_queue_purge(&sk->sk_receive_queue); skb_rbtree_purge(&msk->out_of_order_queue); @@ -3426,7 +3471,7 @@ static void mptcp_destroy(struct sock *sk) /* allow the following to close even the initial subflow */ msk->free_first = 1; - mptcp_destroy_common(msk, 0); + mptcp_destroy_common(msk); sk_sockets_allocated_dec(sk); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 379a88e14e8d..6ca97096607c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -509,6 +509,7 @@ struct mptcp_subflow_context { u64 remote_key; u64 idsn; u64 map_seq; + u64 rcv_wnd_sent; u32 snd_isn; u32 token; u32 rel_write_seq; @@ -976,7 +977,7 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) local_bh_enable(); } -void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags); +void mptcp_destroy_common(struct mptcp_sock *msk); #define MPTCP_TOKEN_MAX_RETRIES 4 diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e8325890a322..af707ce0f624 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -2144,6 +2144,10 @@ void __init mptcp_subflow_init(void) tcp_prot_override = tcp_prot; tcp_prot_override.release_cb = tcp_release_cb_override; tcp_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcp_prot_override.psock_update_sk_prot = NULL; +#endif #if IS_ENABLED(CONFIG_MPTCP_IPV6) /* In struct mptcp_subflow_request_sock, we assume the TCP request sock @@ -2180,6 +2184,10 @@ void __init mptcp_subflow_init(void) tcpv6_prot_override = tcpv6_prot; tcpv6_prot_override.release_cb = tcp_release_cb_override; tcpv6_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcpv6_prot_override.psock_update_sk_prot = NULL; +#endif #endif mptcp_diag_subflow_init(&subflow_ulp_ops); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 2832e0794197..792ca44a461d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -572,69 +572,6 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } -static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, - const struct nlattr *a) -{ - struct nshhdr *nh; - size_t length; - int err; - u8 flags; - u8 ttl; - int i; - - struct ovs_key_nsh key; - struct ovs_key_nsh mask; - - err = nsh_key_from_nlattr(a, &key, &mask); - if (err) - return err; - - /* Make sure the NSH base header is there */ - if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) - return -ENOMEM; - - nh = nsh_hdr(skb); - length = nsh_hdr_len(nh); - - /* Make sure the whole NSH header is there */ - err = skb_ensure_writable(skb, skb_network_offset(skb) + - length); - if (unlikely(err)) - return err; - - nh = nsh_hdr(skb); - skb_postpull_rcsum(skb, nh, length); - flags = nsh_get_flags(nh); - flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); - flow_key->nsh.base.flags = flags; - ttl = nsh_get_ttl(nh); - ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); - flow_key->nsh.base.ttl = ttl; - nsh_set_flags_and_ttl(nh, flags, ttl); - nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, - mask.base.path_hdr); - flow_key->nsh.base.path_hdr = nh->path_hdr; - switch (nh->mdtype) { - case NSH_M_TYPE1: - for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { - nh->md1.context[i] = - OVS_MASKED(nh->md1.context[i], key.context[i], - mask.context[i]); - } - memcpy(flow_key->nsh.context, nh->md1.context, - sizeof(nh->md1.context)); - break; - case NSH_M_TYPE2: - memset(flow_key->nsh.context, 0, - sizeof(flow_key->nsh.context)); - break; - default: - return -EINVAL; - } - skb_postpush_rcsum(skb, nh, length); - return 0; -} - /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -1130,10 +1067,6 @@ static int execute_masked_set_action(struct sk_buff *skb, get_mask(a, struct ovs_key_ethernet *)); break; - case OVS_KEY_ATTR_NSH: - err = set_nsh(skb, flow_key, a); - break; - case OVS_KEY_ATTR_IPV4: err = set_ipv4(skb, flow_key, nla_data(a), get_mask(a, struct ovs_key_ipv4 *)); @@ -1170,6 +1103,7 @@ static int execute_masked_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_CT_LABELS: case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: + case OVS_KEY_ATTR_NSH: err = -EINVAL; break; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ad64bb9ab5e2..1cb4f97335d8 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1305,6 +1305,11 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, return 0; } +/* + * Constructs NSH header 'nh' from attributes of OVS_ACTION_ATTR_PUSH_NSH, + * where 'nh' points to a memory block of 'size' bytes. It's assumed that + * attributes were previously validated with validate_push_nsh(). + */ int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, size_t size) { @@ -1314,8 +1319,6 @@ int nsh_hdr_from_nlattr(const struct nlattr *attr, u8 ttl = 0; int mdlen = 0; - /* validate_nsh has check this, so we needn't do duplicate check here - */ if (size < NSH_BASE_HDR_LEN) return -ENOBUFS; @@ -1359,46 +1362,6 @@ int nsh_hdr_from_nlattr(const struct nlattr *attr, return 0; } -int nsh_key_from_nlattr(const struct nlattr *attr, - struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) -{ - struct nlattr *a; - int rem; - - /* validate_nsh has check this, so we needn't do duplicate check here - */ - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - - switch (type) { - case OVS_NSH_KEY_ATTR_BASE: { - const struct ovs_nsh_key_base *base = nla_data(a); - const struct ovs_nsh_key_base *base_mask = base + 1; - - nsh->base = *base; - nsh_mask->base = *base_mask; - break; - } - case OVS_NSH_KEY_ATTR_MD1: { - const struct ovs_nsh_key_md1 *md1 = nla_data(a); - const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; - - memcpy(nsh->context, md1->context, sizeof(*md1)); - memcpy(nsh_mask->context, md1_mask->context, - sizeof(*md1_mask)); - break; - } - case OVS_NSH_KEY_ATTR_MD2: - /* Not supported yet */ - return -ENOTSUPP; - default: - return -EINVAL; - } - } - - return 0; -} - static int nsh_key_put_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool is_push_nsh, bool log) @@ -2839,17 +2802,13 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } -static bool validate_nsh(const struct nlattr *attr, bool is_mask, - bool is_push_nsh, bool log) +static bool validate_push_nsh(const struct nlattr *attr, bool log) { struct sw_flow_match match; struct sw_flow_key key; - int ret = 0; ovs_match_init(&match, &key, true, NULL); - ret = nsh_key_put_from_nlattr(attr, &match, is_mask, - is_push_nsh, log); - return !ret; + return !nsh_key_put_from_nlattr(attr, &match, false, true, log); } /* Return false if there are any non-masked bits set. @@ -2997,13 +2956,6 @@ static int validate_set(const struct nlattr *a, break; - case OVS_KEY_ATTR_NSH: - if (eth_type != htons(ETH_P_NSH)) - return -EINVAL; - if (!validate_nsh(nla_data(a), masked, false, log)) - return -EINVAL; - break; - default: return -EINVAL; } @@ -3437,7 +3389,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; } mac_proto = MAC_PROTO_NONE; - if (!validate_nsh(nla_data(a), false, true, true)) + if (!validate_push_nsh(nla_data(a), log)) return -EINVAL; break; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index fe7f77fc5f18..ff8cdecbe346 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -65,8 +65,6 @@ int ovs_nla_put_actions(const struct nlattr *attr, void ovs_nla_free_flow_actions(struct sw_flow_actions *); void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); -int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, - struct ovs_key_nsh *nsh_mask); int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, size_t size); diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 396b576390d0..c2b5bc19e091 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -47,12 +47,10 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(filter, skb); + filter_res = bpf_prog_run_data_pointers(filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(filter, skb); + filter_res = bpf_prog_run_data_pointers(filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 3e89927d7116..26ba8c2d20ab 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -195,13 +195,15 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, const struct tcf_connmark_info *ci = to_connmark(a); unsigned char *b = skb_tail_pointer(skb); const struct tcf_connmark_parms *parms; - struct tc_connmark opt = { - .index = ci->tcf_index, - .refcnt = refcount_read(&ci->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, - }; + struct tc_connmark opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ci->tcf_index; + opt.refcnt = refcount_read(&ci->tcf_refcnt) - ref; + opt.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind; + rcu_read_lock(); parms = rcu_dereference(ci->parms); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 107c6d83dc5c..7c6975632fc2 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -644,13 +644,15 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; - struct tc_ife opt = { - .index = ife->tcf_index, - .refcnt = refcount_read(&ife->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, - }; + struct tc_ife opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ife->tcf_index, + opt.refcnt = refcount_read(&ife->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, + spin_lock_bh(&ife->tcf_lock); opt.action = ife->tcf_action; p = rcu_dereference_protected(ife->params, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 7fbe42f0e5c2..a32754a2658b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -97,12 +97,10 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(prog->filter, skb); + filter_res = bpf_prog_run_data_pointers(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(prog->filter, skb); + filter_res = bpf_prog_run_data_pointers(prog->filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c index 5337bc462755..2d27f91d8441 100644 --- a/net/sched/em_canid.c +++ b/net/sched/em_canid.c @@ -99,6 +99,9 @@ static int em_canid_match(struct sk_buff *skb, struct tcf_ematch *m, int i; const struct can_filter *lp; + if (!pskb_may_pull(skb, CAN_MTU)) + return 0; + can_id = em_canid_get_id(skb); if (can_id & CAN_EFF_FLAG) { diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index 64b637f18bc7..48c1bce74f49 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -22,9 +22,12 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, struct tcf_pkt_info *info) { struct tcf_em_cmp *cmp = (struct tcf_em_cmp *) em->data; - unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer) + cmp->off; + unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer); u32 val = 0; + if (!ptr) + return 0; + ptr += cmp->off; if (!tcf_valid_offset(skb, ptr, cmp->align)) return 0; diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 4f9f21a05d5e..c65ffa5fff94 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -42,6 +42,8 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em, struct nbyte_data *nbyte = (struct nbyte_data *) em->data; unsigned char *ptr = tcf_get_base_ptr(skb, nbyte->hdr.layer); + if (!ptr) + return 0; ptr += nbyte->hdr.off; if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 6b3d0af72c39..692e2be1793e 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -29,12 +29,19 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m, struct tcf_pkt_info *info) { struct text_match *tm = EM_TEXT_PRIV(m); + unsigned char *ptr; int from, to; - from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data; + ptr = tcf_get_base_ptr(skb, tm->from_layer); + if (!ptr) + return 0; + from = ptr - skb->data; from += tm->from_offset; - to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data; + ptr = tcf_get_base_ptr(skb, tm->to_layer); + if (!ptr) + return 0; + to = ptr - skb->data; to += tm->to_offset; return skb_find_text(skb, from, to, tm->config) != UINT_MAX; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1e058b46d3e1..f56b18c8aebf 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1599,6 +1599,11 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } + if (p->flags & TCQ_F_INGRESS) { + NL_SET_ERR_MSG(extack, + "Cannot add children to ingress/clsact qdisc"); + return -EOPNOTSUPP; + } q = qdisc_leaf(p, clid, extack); if (IS_ERR(q)) return PTR_ERR(q); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1e008a228ebd..7dee9748a56b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -180,9 +180,10 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, const struct netdev_queue *txq, - int *packets) + int *packets, int budget) { int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; + int cnt = 0; while (bytelimit > 0) { struct sk_buff *nskb = q->dequeue(q); @@ -193,8 +194,10 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, bytelimit -= nskb->len; /* covers GSO len */ skb->next = nskb; skb = nskb; - (*packets)++; /* GSO counts as one pkt */ + if (++cnt >= budget) + break; } + (*packets) += cnt; skb_mark_not_on_list(skb); } @@ -228,7 +231,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, * A requeued skb (via q->gso_skb) can also be a SKB list. */ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, - int *packets) + int *packets, int budget) { const struct netdev_queue *txq = q->dev_queue; struct sk_buff *skb = NULL; @@ -295,7 +298,7 @@ validate: if (skb) { bulk: if (qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq, packets); + try_bulk_dequeue_skb(q, skb, txq, packets, budget); else try_bulk_dequeue_skb_slow(q, skb, packets); } @@ -387,7 +390,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline bool qdisc_restart(struct Qdisc *q, int *packets) +static inline bool qdisc_restart(struct Qdisc *q, int *packets, int budget) { spinlock_t *root_lock = NULL; struct netdev_queue *txq; @@ -396,7 +399,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) bool validate; /* Dequeue packet */ - skb = dequeue_skb(q, &validate, packets); + skb = dequeue_skb(q, &validate, packets, budget); if (unlikely(!skb)) return false; @@ -414,7 +417,7 @@ void __qdisc_run(struct Qdisc *q) int quota = READ_ONCE(net_hotdata.dev_tx_weight); int packets; - while (qdisc_restart(q, &packets)) { + while (qdisc_restart(q, &packets, quota)) { quota -= packets; if (quota <= 0) { if (q->flags & TCQ_F_NOLOCK) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 996c2018f0e6..2afb376299fe 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -73,19 +73,26 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, struct nlattr *attr; void *info = NULL; + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) addrcnt++; + rcu_read_unlock(); attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); if (!attr) return -EMSGSIZE; info = nla_data(attr); + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) { memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; + + if (!--addrcnt) + break; } + rcu_read_unlock(); return 0; } @@ -223,14 +230,15 @@ struct sctp_comm_param { bool net_admin; }; -static size_t inet_assoc_attr_size(struct sctp_association *asoc) +static size_t inet_assoc_attr_size(struct sock *sk, + struct sctp_association *asoc) { int addrlen = sizeof(struct sockaddr_storage); int addrcnt = 0; struct sctp_sockaddr_entry *laddr; list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, - list) + list, lockdep_sock_is_held(sk)) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) @@ -256,11 +264,14 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t if (err) return err; - rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); - if (!rep) + lock_sock(sk); + + rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL); + if (!rep) { + release_sock(sk); return -ENOMEM; + } - lock_sock(sk); if (ep != assoc->ep) { err = -EAGAIN; goto out; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4d258a6e8033..0c56d9673cc1 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -37,10 +37,10 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct net *net, - struct sctp_transport *peer, - const union sctp_addr *addr, - gfp_t gfp) +static void sctp_transport_init(struct net *net, + struct sctp_transport *peer, + const union sctp_addr *addr, + gfp_t gfp) { /* Copy in the address. */ peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); @@ -83,8 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct net *net, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); refcount_set(&peer->refcnt, 1); - - return peer; } /* Allocate and initialize a new transport. */ @@ -96,20 +94,13 @@ struct sctp_transport *sctp_transport_new(struct net *net, transport = kzalloc(sizeof(*transport), gfp); if (!transport) - goto fail; + return NULL; - if (!sctp_transport_init(net, transport, addr, gfp)) - goto fail_init; + sctp_transport_init(net, transport, addr, gfp); SCTP_DBG_OBJCNT_INC(transport); return transport; - -fail_init: - kfree(transport); - -fail: - return NULL; } /* This transport is no longer needed. Free up if possible, or @@ -495,6 +486,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) if (tp->rttvar || tp->srtt) { struct net *net = tp->asoc->base.net; + unsigned int rto_beta, rto_alpha; /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -506,10 +498,14 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) - + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) - + (rtt >> net->sctp.rto_alpha); + rto_beta = READ_ONCE(net->sctp.rto_beta); + if (rto_beta < 32) + tp->rttvar = tp->rttvar - (tp->rttvar >> rto_beta) + + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> rto_beta); + rto_alpha = READ_ONCE(net->sctp.rto_alpha); + if (rto_alpha < 32) + tp->srtt = tp->srtt - (tp->srtt >> rto_alpha) + + (rtt >> rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 157aace169d4..87c87edadde7 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -890,6 +890,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) return SMC_CLC_DECL_CNFERR; } pclc_base->hdr.typev1 = SMC_TYPE_N; + ini->smc_type_v1 = SMC_TYPE_N; } else { pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); plen += sizeof(*pclc_prfx) + diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 43b1f558b33d..e659fea2da70 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -238,7 +238,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, strp_parser_err(strp, -EMSGSIZE, desc); break; } else if (len <= (ssize_t)head->len - - skb->len - stm->strp.offset) { + (ssize_t)skb->len - stm->strp.offset) { /* Length must be into new skb (and also * greater than zero) */ diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 984e0cf9bf8a..a570e7adf270 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,9 @@ config SUNRPC_SWAP config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" - depends on SUNRPC + depends on SUNRPC && CRYPTO default y select SUNRPC_GSS - select CRYPTO select CRYPTO_SKCIPHER select CRYPTO_HASH help diff --git a/net/tipc/net.c b/net/tipc/net.c index 0e95572e56b4..7e65d0b0c4a8 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -145,7 +145,9 @@ void tipc_net_finalize_work(struct work_struct *work) { struct tipc_net *tn = container_of(work, struct tipc_net, work); + rtnl_lock(); tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); + rtnl_unlock(); } void tipc_net_stop(struct net *net) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 768098dec231..833c3616d2a2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2954,6 +2954,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, u = unix_sk(sk); +redo: /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ @@ -2965,7 +2966,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, struct sk_buff *skb, *last; int chunk; -redo: unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) { err = -ECONNRESET; @@ -3015,7 +3015,6 @@ again: goto out; } - mutex_lock(&u->iolock); goto redo; unlock: unix_state_unlock(sk); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 684ab03137b6..65396a4e1b07 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -145,6 +145,7 @@ enum unix_vertex_index { }; static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1; +static unsigned long unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) { @@ -153,6 +154,7 @@ static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) if (!vertex) { vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); vertex->index = unix_vertex_unvisited_index; + vertex->scc_index = ++unix_vertex_max_scc_index; vertex->out_degree = 0; INIT_LIST_HEAD(&vertex->edges); INIT_LIST_HEAD(&vertex->scc_entry); @@ -489,10 +491,15 @@ prev_vertex: scc_dead = unix_vertex_dead(v); } - if (scc_dead) + if (scc_dead) { unix_collect_skb(&scc, hitlist); - else if (!unix_graph_maybe_cyclic) - unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } else { + if (unix_vertex_max_scc_index < vertex->scc_index) + unix_vertex_max_scc_index = vertex->scc_index; + + if (!unix_graph_maybe_cyclic) + unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } list_del(&scc); } @@ -507,6 +514,7 @@ static void unix_walk_scc(struct sk_buff_head *hitlist) unsigned long last_index = UNIX_VERTEX_INDEX_START; unix_graph_maybe_cyclic = false; + unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; /* Visit every vertex exactly once. * __unix_walk_scc() moves visited vertices to unix_visited_vertices. diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 76763247a377..a9ca9c3b87b3 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1661,18 +1661,40 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, timeout = schedule_timeout(timeout); lock_sock(sk); - if (signal_pending(current)) { - err = sock_intr_errno(timeout); - sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; - sock->state = SS_UNCONNECTED; - vsock_transport_cancel_pkt(vsk); - vsock_remove_connected(vsk); - goto out_wait; - } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) { - err = -ETIMEDOUT; + /* Connection established. Whatever happens to socket once we + * release it, that's not connect()'s concern. No need to go + * into signal and timeout handling. Call it a day. + * + * Note that allowing to "reset" an already established socket + * here is racy and insecure. + */ + if (sk->sk_state == TCP_ESTABLISHED) + break; + + /* If connection was _not_ established and a signal/timeout came + * to be, we want the socket's state reset. User space may want + * to retry. + * + * sk_state != TCP_ESTABLISHED implies that socket is not on + * vsock_connected_table. We keep the binding and the transport + * assigned. + */ + if (signal_pending(current) || timeout == 0) { + err = timeout == 0 ? -ETIMEDOUT : sock_intr_errno(timeout); + + /* Listener might have already responded with + * VIRTIO_VSOCK_OP_RESPONSE. Its handling expects our + * sk_state == TCP_SYN_SENT, which hereby we break. + * In such case VIRTIO_VSOCK_OP_RST will follow. + */ sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; + + /* Try to cancel VIRTIO_VSOCK_OP_REQUEST skb sent out by + * transport->connect(). + */ vsock_transport_cancel_pkt(vsk); + goto out_wait; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 797f9f2004a6..54a34d8d356e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1787,6 +1787,62 @@ bool wiphy_delayed_work_pending(struct wiphy *wiphy, } EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending); +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t) +{ + struct wiphy_hrtimer_work *hrwork = + container_of(t, struct wiphy_hrtimer_work, timer); + + wiphy_work_queue(hrwork->wiphy, &hrwork->work); + + return HRTIMER_NORESTART; +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_timer); + +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay) +{ + trace_wiphy_hrtimer_work_queue(wiphy, &hrwork->work, delay); + + if (!delay) { + hrtimer_cancel(&hrwork->timer); + wiphy_work_queue(wiphy, &hrwork->work); + return; + } + + hrwork->wiphy = wiphy; + hrtimer_start_range_ns(&hrwork->timer, delay, + 1000 * NSEC_PER_USEC, HRTIMER_MODE_REL); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_queue); + +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_cancel(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_cancel); + +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_flush(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_flush); + +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + return hrtimer_is_queued(&hrwork->timer); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_pending); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8a4c34112eb5..2b71f1d867a0 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -304,6 +304,27 @@ TRACE_EVENT(wiphy_delayed_work_queue, __entry->delay) ); +TRACE_EVENT(wiphy_hrtimer_work_queue, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work, + ktime_t delay), + TP_ARGS(wiphy, work, delay), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(void *, instance) + __field(void *, func) + __field(ktime_t, delay) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->instance = work; + __entry->func = work->func; + __entry->delay = delay; + ), + TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%llu", + WIPHY_PR_ARG, __entry->instance, __entry->func, + __entry->delay) +); + TRACE_EVENT(wiphy_work_worker_start, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7b0c68a70888..69bbcca8ac75 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -36,20 +36,13 @@ #define TX_BATCH_SIZE 32 #define MAX_PER_SOCKET_BUDGET 32 -struct xsk_addr_node { - u64 addr; - struct list_head addr_node; -}; - -struct xsk_addr_head { +struct xsk_addrs { u32 num_descs; - struct list_head addrs_list; + u64 addrs[MAX_SKB_FRAGS + 1]; }; static struct kmem_cache *xsk_tx_generic_cache; -#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb)) - void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { if (pool->cached_need_wakeup & XDP_WAKEUP_RX) @@ -558,29 +551,68 @@ static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool) return ret; } +static bool xsk_skb_destructor_is_addr(struct sk_buff *skb) +{ + return (uintptr_t)skb_shinfo(skb)->destructor_arg & 0x1UL; +} + +static u64 xsk_skb_destructor_get_addr(struct sk_buff *skb) +{ + return (u64)((uintptr_t)skb_shinfo(skb)->destructor_arg & ~0x1UL); +} + +static void xsk_skb_destructor_set_addr(struct sk_buff *skb, u64 addr) +{ + skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t)addr | 0x1UL); +} + +static void xsk_inc_num_desc(struct sk_buff *skb) +{ + struct xsk_addrs *xsk_addr; + + if (!xsk_skb_destructor_is_addr(skb)) { + xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + xsk_addr->num_descs++; + } +} + +static u32 xsk_get_num_desc(struct sk_buff *skb) +{ + struct xsk_addrs *xsk_addr; + + if (xsk_skb_destructor_is_addr(skb)) + return 1; + + xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + + return xsk_addr->num_descs; +} + static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, struct sk_buff *skb) { - struct xsk_addr_node *pos, *tmp; + u32 num_descs = xsk_get_num_desc(skb); + struct xsk_addrs *xsk_addr; u32 descs_processed = 0; unsigned long flags; - u32 idx; + u32 idx, i; spin_lock_irqsave(&pool->cq_lock, flags); idx = xskq_get_prod(pool->cq); - xskq_prod_write_addr(pool->cq, idx, - (u64)(uintptr_t)skb_shinfo(skb)->destructor_arg); - descs_processed++; + if (unlikely(num_descs > 1)) { + xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; - if (unlikely(XSKCB(skb)->num_descs > 1)) { - list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) { + for (i = 0; i < num_descs; i++) { xskq_prod_write_addr(pool->cq, idx + descs_processed, - pos->addr); + xsk_addr->addrs[i]); descs_processed++; - list_del(&pos->addr_node); - kmem_cache_free(xsk_tx_generic_cache, pos); } + kmem_cache_free(xsk_tx_generic_cache, xsk_addr); + } else { + xskq_prod_write_addr(pool->cq, idx, + xsk_skb_destructor_get_addr(skb)); + descs_processed++; } xskq_prod_submit_n(pool->cq, descs_processed); spin_unlock_irqrestore(&pool->cq_lock, flags); @@ -595,16 +627,6 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n) spin_unlock_irqrestore(&pool->cq_lock, flags); } -static void xsk_inc_num_desc(struct sk_buff *skb) -{ - XSKCB(skb)->num_descs++; -} - -static u32 xsk_get_num_desc(struct sk_buff *skb) -{ - return XSKCB(skb)->num_descs; -} - static void xsk_destruct_skb(struct sk_buff *skb) { struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta; @@ -621,27 +643,22 @@ static void xsk_destruct_skb(struct sk_buff *skb) static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs, u64 addr) { - BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb)); - INIT_LIST_HEAD(&XSKCB(skb)->addrs_list); skb->dev = xs->dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); - XSKCB(skb)->num_descs = 0; skb->destructor = xsk_destruct_skb; - skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr; + xsk_skb_destructor_set_addr(skb, addr); } static void xsk_consume_skb(struct sk_buff *skb) { struct xdp_sock *xs = xdp_sk(skb->sk); u32 num_descs = xsk_get_num_desc(skb); - struct xsk_addr_node *pos, *tmp; + struct xsk_addrs *xsk_addr; if (unlikely(num_descs > 1)) { - list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) { - list_del(&pos->addr_node); - kmem_cache_free(xsk_tx_generic_cache, pos); - } + xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + kmem_cache_free(xsk_tx_generic_cache, xsk_addr); } skb->destructor = sock_wfree; @@ -701,7 +718,6 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, { struct xsk_buff_pool *pool = xs->pool; u32 hr, len, ts, offset, copy, copied; - struct xsk_addr_node *xsk_addr; struct sk_buff *skb = xs->skb; struct page *page; void *buffer; @@ -727,16 +743,26 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, return ERR_PTR(err); } } else { - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); - if (!xsk_addr) - return ERR_PTR(-ENOMEM); + struct xsk_addrs *xsk_addr; + + if (xsk_skb_destructor_is_addr(skb)) { + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, + GFP_KERNEL); + if (!xsk_addr) + return ERR_PTR(-ENOMEM); + + xsk_addr->num_descs = 1; + xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; + } else { + xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + } /* in case of -EOVERFLOW that could happen below, * xsk_consume_skb() will release this node as whole skb * would be dropped, which implies freeing all list elements */ - xsk_addr->addr = desc->addr; - list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); + xsk_addr->addrs[xsk_addr->num_descs] = desc->addr; } len = desc->len; @@ -813,10 +839,25 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } } else { int nr_frags = skb_shinfo(skb)->nr_frags; - struct xsk_addr_node *xsk_addr; + struct xsk_addrs *xsk_addr; struct page *page; u8 *vaddr; + if (xsk_skb_destructor_is_addr(skb)) { + xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, + GFP_KERNEL); + if (!xsk_addr) { + err = -ENOMEM; + goto free_err; + } + + xsk_addr->num_descs = 1; + xsk_addr->addrs[0] = xsk_skb_destructor_get_addr(skb); + skb_shinfo(skb)->destructor_arg = (void *)xsk_addr; + } else { + xsk_addr = (struct xsk_addrs *)skb_shinfo(skb)->destructor_arg; + } + if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { err = -EOVERFLOW; goto free_err; @@ -828,13 +869,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, goto free_err; } - xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL); - if (!xsk_addr) { - __free_page(page); - err = -ENOMEM; - goto free_err; - } - vaddr = kmap_local_page(page); memcpy(vaddr, buffer, len); kunmap_local(vaddr); @@ -842,8 +876,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); - xsk_addr->addr = desc->addr; - list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list); + xsk_addr->addrs[xsk_addr->num_descs] = desc->addr; } } @@ -1904,7 +1937,7 @@ static int __init xsk_init(void) goto out_pernet; xsk_tx_generic_cache = kmem_cache_create("xsk_generic_xmit_cache", - sizeof(struct xsk_addr_node), + sizeof(struct xsk_addrs), 0, SLAB_HWCACHE_ALIGN, NULL); if (!xsk_tx_generic_cache) { err = -ENOMEM; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 44b9de6e4e77..52ae0e034d29 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -438,7 +438,7 @@ ok: check_tunnel_size = x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->props.mode == XFRM_MODE_TUNNEL; - switch (x->inner_mode.family) { + switch (skb_dst(skb)->ops->family) { case AF_INET: /* Check for IPv4 options */ if (ip_hdr(skb)->ihl != 5) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9077730ff7d0..54222fcbd7fd 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -698,7 +698,7 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) return; if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { - switch (x->outer_mode.family) { + switch (skb_dst(skb)->ops->family) { case AF_INET: xo->inner_ipproto = ip_hdr(skb)->protocol; break; @@ -772,8 +772,12 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) /* Exclusive direct xmit for tunnel mode, as * some filtering or matching rules may apply * in transport mode. + * Locally generated packets also require + * the normal XFRM path for L2 header setup, + * as the hardware needs the L2 header to match + * for encryption, so skip direct output as well. */ - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode == XFRM_MODE_TUNNEL && !skb->sk) return xfrm_dev_direct_output(sk, x, skb); return xfrm_output_resume(sk, skb, 0); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d213ca3653a8..9e14e453b55c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -592,6 +592,7 @@ void xfrm_state_free(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_free); +static void xfrm_state_delete_tunnel(struct xfrm_state *x); static void xfrm_state_gc_destroy(struct xfrm_state *x) { if (x->mode_cbs && x->mode_cbs->destroy_state) @@ -607,6 +608,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->replay_esn); kfree(x->preplay_esn); xfrm_unset_type_offload(x); + xfrm_state_delete_tunnel(x); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -806,7 +808,6 @@ void __xfrm_state_destroy(struct xfrm_state *x) } EXPORT_SYMBOL(__xfrm_state_destroy); -static void xfrm_state_delete_tunnel(struct xfrm_state *x); int __xfrm_state_delete(struct xfrm_state *x) { struct net *net = xs_net(x); @@ -2073,6 +2074,7 @@ static struct xfrm_state *xfrm_state_clone_and_setup(struct xfrm_state *orig, return x; error: + x->km.state = XFRM_STATE_DEAD; xfrm_state_put(x); out: return NULL; @@ -2157,11 +2159,15 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, xfrm_state_insert(xc); } else { if (xfrm_state_add(xc) < 0) - goto error; + goto error_add; } return xc; +error_add: + if (xuo) + xfrm_dev_state_delete(xc); error: + xc->km.state = XFRM_STATE_DEAD; xfrm_state_put(xc); return NULL; } @@ -2191,14 +2197,18 @@ int xfrm_state_update(struct xfrm_state *x) } if (x1->km.state == XFRM_STATE_ACQ) { - if (x->dir && x1->dir != x->dir) + if (x->dir && x1->dir != x->dir) { + to_put = x1; goto out; + } __xfrm_state_insert(x); x = NULL; } else { - if (x1->dir != x->dir) + if (x1->dir != x->dir) { + to_put = x1; goto out; + } } err = 0; @@ -3298,6 +3308,7 @@ out_bydst: void xfrm_state_fini(struct net *net) { unsigned int sz; + int i; flush_work(&net->xfrm.state_hash_work); xfrm_state_flush(net, 0, false); @@ -3305,14 +3316,17 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); + for (i = 0; i <= net->xfrm.state_hmask; i++) { + WARN_ON(!hlist_empty(net->xfrm.state_byseq + i)); + WARN_ON(!hlist_empty(net->xfrm.state_byspi + i)); + WARN_ON(!hlist_empty(net->xfrm.state_bysrc + i)); + WARN_ON(!hlist_empty(net->xfrm.state_bydst + i)); + } + sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); - WARN_ON(!hlist_empty(net->xfrm.state_byseq)); xfrm_hash_free(net->xfrm.state_byseq, sz); - WARN_ON(!hlist_empty(net->xfrm.state_byspi)); xfrm_hash_free(net->xfrm.state_byspi, sz); - WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); xfrm_hash_free(net->xfrm.state_bysrc, sz); - WARN_ON(!hlist_empty(net->xfrm.state_bydst)); xfrm_hash_free(net->xfrm.state_bydst, sz); free_percpu(net->xfrm.state_cache_input); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 010c9e6638c0..403b5ecac2c5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -947,8 +947,11 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); - if (x->pcpu_num >= num_possible_cpus()) + if (x->pcpu_num >= num_possible_cpus()) { + err = -ERANGE; + NL_SET_ERR_MSG(extack, "pCPU number too big"); goto error; + } } err = __xfrm_init_state(x, extack); @@ -3035,6 +3038,9 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, } xfrm_state_free(x); + xfrm_dev_policy_delete(xp); + xfrm_dev_policy_free(xp); + security_xfrm_policy_free(xp->security); kfree(xp); return 0; |
