diff options
Diffstat (limited to 'net')
131 files changed, 3489 insertions, 1521 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 0beb44f2fe1f..f00158234505 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -407,6 +407,8 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; @@ -417,6 +419,8 @@ unwind: list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } @@ -436,8 +440,11 @@ void vlan_vids_del_by_dev(struct net_device *dev, if (!vlan_info) return; - list_for_each_entry(vid_info, &vlan_info->vid_list, list) + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); + } } EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index a852ec093fa8..198f5ba2feae 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1581,7 +1581,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } /* Build a packet */ - SOCK_DEBUG(sk, "SK %p: Got address.\n", sk); + net_dbg_ratelimited("SK %p: Got address.\n", sk); /* For headers */ size = sizeof(struct ddpehdr) + len + ddp_dl->header_length; @@ -1602,7 +1602,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dev = rt->dev; - SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n", + net_dbg_ratelimited("SK %p: Size needed %d, device %s\n", sk, size, dev->name); hard_header_len = dev->hard_header_len; @@ -1631,7 +1631,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) skb_reserve(skb, hard_header_len); skb->dev = dev; - SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk); + net_dbg_ratelimited("SK %p: Begin build.\n", sk); ddp = skb_put(skb, sizeof(struct ddpehdr)); ddp->deh_len_hops = htons(len + sizeof(*ddp)); @@ -1642,7 +1642,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) ddp->deh_dport = usat->sat_port; ddp->deh_sport = at->src_port; - SOCK_DEBUG(sk, "SK %p: Copy user data (%zd bytes).\n", sk, len); + net_dbg_ratelimited("SK %p: Copy user data (%zd bytes).\n", sk, len); err = memcpy_from_msg(skb_put(skb, len), msg, len); if (err) { @@ -1666,7 +1666,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (skb2) { loopback = 1; - SOCK_DEBUG(sk, "SK %p: send out(copy).\n", sk); + net_dbg_ratelimited("SK %p: send out(copy).\n", sk); /* * If it fails it is queued/sent above in the aarp queue */ @@ -1675,7 +1675,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } if (dev->flags & IFF_LOOPBACK || loopback) { - SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk); + net_dbg_ratelimited("SK %p: Loop back.\n", sk); /* loop back */ skb_orphan(skb); if (ddp->deh_dnode == ATADDR_BCAST) { @@ -1689,7 +1689,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } ddp_dl->request(ddp_dl, skb, dev->dev_addr); } else { - SOCK_DEBUG(sk, "SK %p: send out.\n", sk); + net_dbg_ratelimited("SK %p: send out.\n", sk); if (rt->flags & RTF_GATEWAY) { gsat.sat_addr = rt->gateway; usat = &gsat; @@ -1700,7 +1700,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) */ aarp_send_ddp(dev, skb, &usat->sat_addr, NULL); } - SOCK_DEBUG(sk, "SK %p: Done write (%zd).\n", sk, len); + net_dbg_ratelimited("SK %p: Done write (%zd).\n", sk, len); out: release_sock(sk); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 336a76165454..b93464ac3517 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -309,11 +309,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; + lock_sock(sk); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) - return 0; + err = 0; + release_sock(sk); return err; } @@ -343,6 +346,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); + release_sock(sk); + if (flags & MSG_TRUNC) copied = skblen; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2cee330188ce..a41d2693f4d8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -300,6 +300,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec) __u8 vnd_len, *vnd_data = NULL; struct hci_op_configure_data_path *cmd = NULL; + if (!codec->data_path || !hdev->get_codec_config_data) + return 0; + + /* Do not take me as error */ + if (!hdev->get_codec_config_data) + return 0; + err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, &vnd_data); if (err < 0) @@ -345,9 +352,7 @@ static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data) bt_dev_dbg(hdev, "hcon %p", conn); - /* for offload use case, codec needs to configured before opening SCO */ - if (conn->codec.data_path) - configure_datapath_sync(hdev, &conn->codec); + configure_datapath_sync(hdev, &conn->codec); conn->state = BT_CONNECT; conn->out = true; @@ -1086,8 +1091,9 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) hci_conn_failed(conn, reason); break; case ISO_LINK: - if (conn->state != BT_CONNECTED && - !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) + if ((conn->state != BT_CONNECTED && + !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) || + test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) hci_conn_failed(conn, reason); break; } @@ -2228,7 +2234,17 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 base_len, __u8 *base) { struct hci_conn *conn; + struct hci_conn *parent; __u8 eir[HCI_MAX_PER_AD_LENGTH]; + struct hci_link *link; + + /* Look for any BIS that is open for rebinding */ + conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN); + if (conn) { + memcpy(qos, &conn->iso_qos, sizeof(*qos)); + conn->state = BT_CONNECTED; + return conn; + } if (base_len && base) base_len = eir_append_service_data(eir, 0, 0x1851, @@ -2256,6 +2272,20 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, conn->iso_qos = *qos; conn->state = BT_BOUND; + /* Link BISes together */ + parent = hci_conn_hash_lookup_big(hdev, + conn->iso_qos.bcast.big); + if (parent && parent != conn) { + link = hci_conn_link(parent, conn); + if (!link) { + hci_conn_drop(conn); + return ERR_PTR(-ENOLINK); + } + + /* Link takes the refcount */ + hci_conn_drop(conn); + } + return conn; } @@ -2287,6 +2317,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, if (IS_ERR(conn)) return conn; + if (conn->state == BT_CONNECTED) + return conn; + data.big = qos->bcast.big; data.bis = qos->bcast.bis; @@ -2421,12 +2454,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); - /* If we're already encrypted set the REAUTH_PEND flag, - * otherwise set the ENCRYPT_PEND. + /* Set the ENCRYPT_PEND to trigger encryption after + * authentication. */ - if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) - set_bit(HCI_CONN_REAUTH_PEND, &conn->flags); - else + if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 6b7741f6e95b..233453807b50 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -1046,10 +1046,12 @@ static int min_key_size_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE) + hci_dev_lock(hdev); + if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_min_key_size = val; hci_dev_unlock(hdev); @@ -1074,10 +1076,12 @@ static int max_key_size_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size) + hci_dev_lock(hdev); + if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_max_key_size = val; hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0849e0dafa95..ef8c3bed7361 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -516,6 +516,9 @@ static u8 hci_cc_read_class_of_dev(struct hci_dev *hdev, void *data, { struct hci_rp_read_class_of_dev *rp = data; + if (WARN_ON(!hdev)) + return HCI_ERROR_UNSPECIFIED; + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); if (rp->status) @@ -747,9 +750,23 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, } else { conn->enc_key_size = rp->key_size; status = 0; + + if (conn->enc_key_size < hdev->min_enc_key_size) { + /* As slave role, the conn->state has been set to + * BT_CONNECTED and l2cap conn req might not be received + * yet, at this moment the l2cap layer almost does + * nothing with the non-zero status. + * So we also clear encrypt related bits, and then the + * handler of l2cap conn req will get the right secure + * state at a later time. + */ + status = HCI_ERROR_AUTH_FAILURE; + clear_bit(HCI_CONN_ENCRYPT, &conn->flags); + clear_bit(HCI_CONN_AES_CCM, &conn->flags); + } } - hci_encrypt_cfm(conn, 0); + hci_encrypt_cfm(conn, status); done: hci_dev_unlock(hdev); @@ -820,8 +837,6 @@ static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data, if (!rp->status) conn->auth_payload_timeout = get_unaligned_le16(sent + 2); - hci_encrypt_cfm(conn, 0); - unlock: hci_dev_unlock(hdev); @@ -2304,7 +2319,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - set_bit(HCI_INQUIRY, &hdev->flags); + if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) + set_bit(HCI_INQUIRY, &hdev->flags); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) @@ -3484,14 +3500,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data, if (!ev->status) { clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags); - - if (!hci_conn_ssp_enabled(conn) && - test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { - bt_dev_info(hdev, "re-auth of legacy device is not possible."); - } else { - set_bit(HCI_CONN_AUTH, &conn->flags); - conn->sec_level = conn->pending_sec_level; - } + set_bit(HCI_CONN_AUTH, &conn->flags); + conn->sec_level = conn->pending_sec_level; } else { if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING) set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags); @@ -3500,7 +3510,6 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data, } clear_bit(HCI_CONN_AUTH_PEND, &conn->flags); - clear_bit(HCI_CONN_REAUTH_PEND, &conn->flags); if (conn->state == BT_CONFIG) { if (!ev->status && hci_conn_ssp_enabled(conn)) { @@ -3683,12 +3692,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, cp.handle = cpu_to_le16(conn->handle); cp.timeout = cpu_to_le16(hdev->auth_payload_timeout); if (hci_send_cmd(conn->hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO, - sizeof(cp), &cp)) { + sizeof(cp), &cp)) bt_dev_err(hdev, "write auth payload timeout failed"); - goto notify; - } - - goto unlock; } notify: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d85a7091a116..a6fc8a2a5c67 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -348,8 +348,6 @@ static void le_scan_disable(struct work_struct *work) if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) goto _return; - cancel_delayed_work(&hdev->le_scan_restart); - status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL); if (status) { bt_dev_err(hdev, "failed to disable LE scan: %d", status); @@ -397,71 +395,6 @@ _return: static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val, u8 filter_dup); -static int hci_le_scan_restart_sync(struct hci_dev *hdev) -{ - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return 0; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return 0; - } - - hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00); - return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE, - LE_SCAN_FILTER_DUP_ENABLE); -} - -static void le_scan_restart(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - unsigned long timeout, duration, scan_start, now; - int status; - - bt_dev_dbg(hdev, ""); - - status = hci_le_scan_restart_sync(hdev); - if (status) { - bt_dev_err(hdev, "failed to restart LE scan: status %d", - status); - return; - } - - hci_dev_lock(hdev); - - if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || - !hdev->discovery.scan_start) - goto unlock; - - /* When the scan was started, hdev->le_scan_disable has been queued - * after duration from scan_start. During scan restart this job - * has been canceled, and we need to queue it again after proper - * timeout, to make sure that scan does not run indefinitely. - */ - duration = hdev->discovery.scan_duration; - scan_start = hdev->discovery.scan_start; - now = jiffies; - if (now - scan_start <= duration) { - int elapsed; - - if (now >= scan_start) - elapsed = now - scan_start; - else - elapsed = ULONG_MAX - scan_start + now; - - timeout = duration - elapsed; - } else { - timeout = 0; - } - - queue_delayed_work(hdev->req_workqueue, - &hdev->le_scan_disable, timeout); - -unlock: - hci_dev_unlock(hdev); -} static int reenable_adv_sync(struct hci_dev *hdev, void *data) { @@ -630,7 +563,6 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); INIT_WORK(&hdev->reenable_adv_work, reenable_adv); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable); - INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } @@ -3800,12 +3732,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ - /* Don't set Disconnect Complete when suspended as that - * would wakeup the host when disconnecting due to - * suspend. + /* Don't set Disconnect Complete and mode change when + * suspended as that would wakeup the host when disconnecting + * due to suspend. */ - if (hdev->suspended) + if (hdev->suspended) { events[0] &= 0xef; + events[2] &= 0xf7; + } } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); @@ -4960,7 +4894,6 @@ int hci_dev_close_sync(struct hci_dev *hdev) cancel_delayed_work(&hdev->power_off); cancel_delayed_work(&hdev->ncmd_timer); cancel_delayed_work(&hdev->le_scan_disable); - cancel_delayed_work(&hdev->le_scan_restart); hci_request_cancel_all(hdev); @@ -5178,7 +5111,6 @@ int hci_stop_discovery_sync(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { cancel_delayed_work(&hdev->le_scan_disable); - cancel_delayed_work(&hdev->le_scan_restart); err = hci_scan_disable_sync(hdev); if (err) @@ -5686,19 +5618,18 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) if (err < 0) own_addr_type = ADDR_LE_DEV_PUBLIC; - if (hci_is_adv_monitoring(hdev)) { + if (hci_is_adv_monitoring(hdev) || + (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && + hdev->discovery.result_filtering)) { /* Duplicate filter should be disabled when some advertisement * monitor is activated, otherwise AdvMon can only receive one * advertisement for one peer(*) during active scanning, and * might report loss to these peers. * - * Note that different controllers have different meanings of - * |duplicate|. Some of them consider packets with the same - * address as duplicate, and others consider packets with the - * same address and the same RSSI as duplicate. Although in the - * latter case we don't need to disable duplicate filter, but - * it is common to have active scanning for a short period of - * time, the power impact should be neglectable. + * If controller does strict duplicate filtering and the + * discovery requires result filtering disables controller based + * filtering since that can cause reports that would match the + * host filter to not be reported. */ filter_dup = LE_SCAN_FILTER_DUP_DISABLE; } @@ -5778,17 +5709,6 @@ int hci_start_discovery_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout)); - /* When service discovery is used and the controller has a - * strict duplicate filter, it is important to remember the - * start and duration of the scan. This is required for - * restarting scanning during the discovery phase. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && - hdev->discovery.result_filtering) { - hdev->discovery.scan_start = jiffies; - hdev->discovery.scan_duration = timeout; - } - queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, timeout); return 0; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 07b80e97aead..04f6572d35f1 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -54,6 +54,7 @@ static void iso_sock_kill(struct sock *sk); enum { BT_SK_BIG_SYNC, BT_SK_PA_SYNC, + BT_SK_PA_SYNC_TERM, }; struct iso_pinfo { @@ -82,6 +83,11 @@ static bool iso_match_sid(struct sock *sk, void *data); static bool iso_match_sync_handle(struct sock *sk, void *data); static void iso_sock_disconn(struct sock *sk); +typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); + +static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, + iso_sock_match_t match, void *data); + /* ---- ISO timers ---- */ #define ISO_CONN_TIMEOUT (HZ * 40) #define ISO_DISCONN_TIMEOUT (HZ * 2) @@ -190,10 +196,21 @@ static void iso_chan_del(struct sock *sk, int err) sock_set_flag(sk, SOCK_ZAPPED); } +static bool iso_match_conn_sync_handle(struct sock *sk, void *data) +{ + struct hci_conn *hcon = data; + + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) + return false; + + return hcon->sync_handle == iso_pi(sk)->sync_handle; +} + static void iso_conn_del(struct hci_conn *hcon, int err) { struct iso_conn *conn = hcon->iso_data; struct sock *sk; + struct sock *parent; if (!conn) return; @@ -209,6 +226,25 @@ static void iso_conn_del(struct hci_conn *hcon, int err) if (sk) { lock_sock(sk); + + /* While a PA sync hcon is in the process of closing, + * mark parent socket with a flag, so that any residual + * BIGInfo adv reports that arrive before PA sync is + * terminated are not processed anymore. + */ + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + parent = iso_get_sock_listen(&hcon->src, + &hcon->dst, + iso_match_conn_sync_handle, + hcon); + + if (parent) { + set_bit(BT_SK_PA_SYNC_TERM, + &iso_pi(parent)->flags); + sock_put(parent); + } + } + iso_sock_clear_timer(sk); iso_chan_del(sk, err); release_sock(sk); @@ -545,8 +581,6 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc, return NULL; } -typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); - /* Find socket listening: * source bdaddr (Unicast) * destination bdaddr (Broadcast only) @@ -574,19 +608,68 @@ static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, continue; /* Exact match. */ - if (!bacmp(&iso_pi(sk)->src, src)) + if (!bacmp(&iso_pi(sk)->src, src)) { + sock_hold(sk); break; + } /* Closest match */ - if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) + if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) { + if (sk1) + sock_put(sk1); + sk1 = sk; + sock_hold(sk1); + } } + if (sk && sk1) + sock_put(sk1); + read_unlock(&iso_sk_list.lock); return sk ? sk : sk1; } +static struct sock *iso_get_sock_big(struct sock *match_sk, bdaddr_t *src, + bdaddr_t *dst, uint8_t big) +{ + struct sock *sk = NULL; + + read_lock(&iso_sk_list.lock); + + sk_for_each(sk, &iso_sk_list.head) { + if (match_sk == sk) + continue; + + /* Look for sockets that have already been + * connected to the BIG + */ + if (sk->sk_state != BT_CONNECTED && + sk->sk_state != BT_CONNECT) + continue; + + /* Match Broadcast destination */ + if (bacmp(&iso_pi(sk)->dst, dst)) + continue; + + /* Match BIG handle */ + if (iso_pi(sk)->qos.bcast.big != big) + continue; + + /* Match source address */ + if (bacmp(&iso_pi(sk)->src, src)) + continue; + + sock_hold(sk); + break; + } + + read_unlock(&iso_sk_list.lock); + + return sk; +} + static void iso_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); @@ -639,6 +722,28 @@ static void iso_sock_kill(struct sock *sk) static void iso_sock_disconn(struct sock *sk) { + struct sock *bis_sk; + struct hci_conn *hcon = iso_pi(sk)->conn->hcon; + + if (test_bit(HCI_CONN_BIG_CREATED, &hcon->flags)) { + bis_sk = iso_get_sock_big(sk, &iso_pi(sk)->src, + &iso_pi(sk)->dst, + iso_pi(sk)->qos.bcast.big); + + /* If there are any other connected sockets for the + * same BIG, just delete the sk and leave the bis + * hcon active, in case later rebinding is needed. + */ + if (bis_sk) { + hcon->state = BT_OPEN; + iso_pi(sk)->conn->hcon = NULL; + iso_sock_clear_timer(sk); + iso_chan_del(sk, bt_to_errno(hcon->abort_reason)); + sock_put(bis_sk); + return; + } + } + sk->sk_state = BT_DISCONN; iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT); iso_conn_lock(iso_pi(sk)->conn); @@ -792,27 +897,75 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid, sa->iso_bc->bc_num_bis); - if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc)) + if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc)) return -EINVAL; bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr); + + /* Check if the address type is of LE type */ + if (!bdaddr_type_is_le(sa->iso_bc->bc_bdaddr_type)) + return -EINVAL; + iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type; iso_pi(sk)->sync_handle = -1; + + if (sa->iso_bc->bc_sid > 0x0f) + return -EINVAL; + iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid; + + if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS) + return -EINVAL; + iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis; - for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) { + for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) if (sa->iso_bc->bc_bis[i] < 0x01 || sa->iso_bc->bc_bis[i] > 0x1f) return -EINVAL; - memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis, - iso_pi(sk)->bc_num_bis); - } + memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis, + iso_pi(sk)->bc_num_bis); return 0; } +static int iso_sock_bind_pa_sk(struct sock *sk, struct sockaddr_iso *sa, + int addr_len) +{ + int err = 0; + + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + goto done; + } + + if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc)) { + err = -EINVAL; + goto done; + } + + if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS) { + err = -EINVAL; + goto done; + } + + iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis; + + for (int i = 0; i < iso_pi(sk)->bc_num_bis; i++) + if (sa->iso_bc->bc_bis[i] < 0x01 || + sa->iso_bc->bc_bis[i] > 0x1f) { + err = -EINVAL; + goto done; + } + + memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis, + iso_pi(sk)->bc_num_bis); + +done: + return err; +} + static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { @@ -828,6 +981,15 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow the user to bind a PA sync socket to a number + * of BISes to sync to. + */ + if (sk->sk_state == BT_CONNECT2 && + test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + err = iso_sock_bind_pa_sk(sk, sa, addr_len); + goto done; + } + if (sk->sk_state != BT_OPEN) { err = -EBADFD; goto done; @@ -1694,6 +1856,7 @@ static void iso_conn_ready(struct iso_conn *conn) parent->sk_data_ready(parent); release_sock(parent); + sock_put(parent); } } @@ -1759,9 +1922,20 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) /* Try to get PA sync listening socket, if it exists */ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_pa_sync_flag, NULL); - if (!sk) + + if (!sk) { sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sync_handle, ev2); + + /* If PA Sync is in process of terminating, + * do not handle any more BIGInfo adv reports. + */ + + if (sk && test_bit(BT_SK_PA_SYNC_TERM, + &iso_pi(sk)->flags)) + return lm; + } + if (sk) { int err; @@ -1778,6 +1952,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (err) { bt_dev_err(hdev, "hci_le_big_create_sync: %d", err); + sock_put(sk); sk = NULL; } } @@ -1810,6 +1985,8 @@ done: if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) *flags |= HCI_PROTO_DEFER; + sock_put(sk); + return lm; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 17ca13e8c044..60298975d5c4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6492,6 +6492,14 @@ drop: kfree_skb(skb); } +static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) +{ + struct l2cap_cmd_rej_unk rej; + + rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -6517,23 +6525,25 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); - break; + l2cap_sig_send_rej(conn, cmd->ident); + skb_pull(skb, len > skb->len ? skb->len : len); + continue; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { - struct l2cap_cmd_rej_unk rej; - BT_ERR("Wrong link type (%d)", err); - - rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); + l2cap_sig_send_rej(conn, cmd->ident); } skb_pull(skb, len); } + if (skb->len > 0) { + BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, 0); + } + drop: kfree_skb(skb); } diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index 53a796ac078c..43aa01fd07b9 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -30,6 +30,15 @@ #include <net/bluetooth/bluetooth.h> +/** + * baswap() - Swaps the order of a bd address + * @dst: Pointer to a bdaddr_t struct that will store the swapped + * bd address. + * @src: Pointer to the bdaddr_t struct to be swapped. + * + * This function reverses the byte order of a Bluetooth device + * address. + */ void baswap(bdaddr_t *dst, const bdaddr_t *src) { const unsigned char *s = (const unsigned char *)src; @@ -41,7 +50,19 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src) } EXPORT_SYMBOL(baswap); -/* Bluetooth error codes to Unix errno mapping */ +/** + * bt_to_errno() - Bluetooth error codes to standard errno + * @code: Bluetooth error code to be converted + * + * This function takes a Bluetooth error code as input and convets + * it to an equivalent Unix/standard errno value. + * + * Return: + * + * If the bt error code is known, an equivalent Unix errno value + * is returned. + * If the given bt error code is not known, ENOSYS is returned. + */ int bt_to_errno(__u16 code) { switch (code) { @@ -135,10 +156,22 @@ int bt_to_errno(__u16 code) } EXPORT_SYMBOL(bt_to_errno); -/* Unix errno to Bluetooth error codes mapping */ +/** + * bt_status() - Standard errno value to Bluetooth error code + * @err: Unix/standard errno value to be converted + * + * This function converts a standard/Unix errno value to an + * equivalent Bluetooth error code. + * + * Return: Bluetooth error code. + * + * If the given errno is not found, 0x1f is returned by default + * which indicates an unspecified error. + * For err >= 0, no conversion is performed, and the same value + * is immediately returned. + */ __u8 bt_status(int err) { - /* Don't convert if already positive value */ if (err >= 0) return err; @@ -206,6 +239,10 @@ __u8 bt_status(int err) } EXPORT_SYMBOL(bt_status); +/** + * bt_info() - Log Bluetooth information message + * @format: Message's format string + */ void bt_info(const char *format, ...) { struct va_format vaf; @@ -222,6 +259,10 @@ void bt_info(const char *format, ...) } EXPORT_SYMBOL(bt_info); +/** + * bt_warn() - Log Bluetooth warning message + * @format: Message's format string + */ void bt_warn(const char *format, ...) { struct va_format vaf; @@ -238,6 +279,10 @@ void bt_warn(const char *format, ...) } EXPORT_SYMBOL(bt_warn); +/** + * bt_err() - Log Bluetooth error message + * @format: Message's format string + */ void bt_err(const char *format, ...) { struct va_format vaf; @@ -267,6 +312,10 @@ bool bt_dbg_get(void) return debug_enable; } +/** + * bt_dbg() - Log Bluetooth debugging message + * @format: Message's format string + */ void bt_dbg(const char *format, ...) { struct va_format vaf; @@ -287,6 +336,13 @@ void bt_dbg(const char *format, ...) EXPORT_SYMBOL(bt_dbg); #endif +/** + * bt_warn_ratelimited() - Log rate-limited Bluetooth warning message + * @format: Message's format string + * + * This functions works like bt_warn, but it uses rate limiting + * to prevent the message from being logged too often. + */ void bt_warn_ratelimited(const char *format, ...) { struct va_format vaf; @@ -303,6 +359,13 @@ void bt_warn_ratelimited(const char *format, ...) } EXPORT_SYMBOL(bt_warn_ratelimited); +/** + * bt_err_ratelimited() - Log rate-limited Bluetooth error message + * @format: Message's format string + * + * This functions works like bt_err, but it uses rate limiting + * to prevent the message from being logged too often. + */ void bt_err_ratelimited(const char *format, ...) { struct va_format vaf; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ba2e00646e8e..bb72ff6eb22f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2897,7 +2897,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - if (key->addr.type != BDADDR_BREDR || key->type > 0x08) + /* Considering SMP over BREDR/LE, there is no need to check addr_type */ + if (key->type > 0x08) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); @@ -7130,6 +7131,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; + u8 addr_type = le_addr_type(irk->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, @@ -7139,8 +7141,12 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (irk->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_irk(hdev, &irk->addr.bdaddr, - le_addr_type(irk->addr.type), irk->val, + addr_type, irk->val, BDADDR_ANY); } @@ -7221,6 +7227,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; + u8 addr_type = le_addr_type(key->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, @@ -7255,8 +7262,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (key->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_ltk(hdev, &key->addr.bdaddr, - le_addr_type(key->addr.type), type, authenticated, + addr_type, type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -9523,7 +9534,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = BDADDR_BREDR; + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; @@ -9574,7 +9585,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; @@ -9603,7 +9614,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); - ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); + ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); @@ -9632,7 +9643,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); @@ -10134,21 +10145,6 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16]) return false; } -static void restart_le_scan(struct hci_dev *hdev) -{ - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - if (time_after(jiffies + DISCOV_LE_RESTART_DELAY, - hdev->discovery.scan_start + - hdev->discovery.scan_duration)) - return; - - queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart, - DISCOV_LE_RESTART_DELAY); -} - static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) { @@ -10183,8 +10179,6 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir, * scanning to ensure updated result with updated RSSI values. */ if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) { - restart_le_scan(hdev); - /* Validate RSSI value against the RSSI threshold once more. */ if (hdev->discovery.rssi != HCI_RSSI_INVALID && rssi < hdev->discovery.rssi) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 5f2f97de295e..1e7ea3a4b7ef 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1059,6 +1059,7 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->remote_irk) { + smp->remote_irk->link_type = hcon->type; mgmt_new_irk(hdev, smp->remote_irk, persistent); /* Now that user space can be considered to know the @@ -1078,24 +1079,28 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->csrk) { + smp->csrk->link_type = hcon->type; smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->csrk, persistent); } if (smp->responder_csrk) { + smp->responder_csrk->link_type = hcon->type; smp->responder_csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->responder_csrk, persistent); } if (smp->ltk) { + smp->ltk->link_type = hcon->type; smp->ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->ltk, persistent); } if (smp->responder_ltk) { + smp->responder_ltk->link_type = hcon->type; smp->responder_ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->responder_ltk, persistent); @@ -1115,6 +1120,8 @@ static void smp_notify_keys(struct l2cap_conn *conn) key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst, smp->link_key, type, 0, &persistent); if (key) { + key->link_type = hcon->type; + key->bdaddr_type = hcon->dst_type; mgmt_new_link_key(hdev, key, persistent); /* Don't keep debug keys around if the relevant diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c index 5c4c369f8536..2faab44652e7 100644 --- a/net/bridge/br_cfm_netlink.c +++ b/net/bridge/br_cfm_netlink.c @@ -362,7 +362,7 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, memset(&tx_info, 0, sizeof(tx_info)); - instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]); nla_memcpy(&tx_info.dmac.addr, tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], sizeof(tx_info.dmac.addr)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8f40de3af154..65cee0ad3c1b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -471,6 +471,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_get = br_fdb_get, .ndo_mdb_add = br_mdb_add, .ndo_mdb_del = br_mdb_del, + .ndo_mdb_del_bulk = br_mdb_del_bulk, .ndo_mdb_dump = br_mdb_dump, .ndo_mdb_get = br_mdb_get, .ndo_bridge_getlink = br_getlink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 8cc526067bc2..bc37e47ad829 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1412,6 +1412,139 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[], return err; } +struct br_mdb_flush_desc { + u32 port_ifindex; + u16 vid; + u8 rt_protocol; + u8 state; + u8 state_mask; +}; + +static const struct nla_policy br_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), + [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT), +}; + +static int br_mdb_flush_desc_init(struct br_mdb_flush_desc *desc, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + int err; + + desc->port_ifindex = entry->ifindex; + desc->vid = entry->vid; + desc->state = entry->state; + + if (!tb[MDBA_SET_ENTRY_ATTRS]) + return 0; + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, + tb[MDBA_SET_ENTRY_ATTRS], + br_mdbe_attrs_del_bulk_pol, extack); + if (err) + return err; + + if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) + desc->state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]); + + if (mdbe_attrs[MDBE_ATTR_RTPROT]) + desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); + + return 0; +} + +static void br_mdb_flush_host(struct net_bridge *br, + struct net_bridge_mdb_entry *mp, + const struct br_mdb_flush_desc *desc) +{ + u8 state; + + if (desc->port_ifindex && desc->port_ifindex != br->dev->ifindex) + return; + + if (desc->rt_protocol) + return; + + state = br_group_is_l2(&mp->addr) ? MDB_PERMANENT : 0; + if (desc->state_mask && (state & desc->state_mask) != desc->state) + return; + + br_multicast_host_leave(mp, true); + if (!mp->ports && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); +} + +static void br_mdb_flush_pgs(struct net_bridge *br, + struct net_bridge_mdb_entry *mp, + const struct br_mdb_flush_desc *desc) +{ + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;) { + u8 state; + + if (desc->port_ifindex && + desc->port_ifindex != p->key.port->dev->ifindex) { + pp = &p->next; + continue; + } + + if (desc->rt_protocol && desc->rt_protocol != p->rt_protocol) { + pp = &p->next; + continue; + } + + state = p->flags & MDB_PG_FLAGS_PERMANENT ? MDB_PERMANENT : 0; + if (desc->state_mask && + (state & desc->state_mask) != desc->state) { + pp = &p->next; + continue; + } + + br_multicast_del_pg(mp, p, pp); + } +} + +static void br_mdb_flush(struct net_bridge *br, + const struct br_mdb_flush_desc *desc) +{ + struct net_bridge_mdb_entry *mp; + + spin_lock_bh(&br->multicast_lock); + + /* Safe variant is not needed because entries are removed from the list + * upon group timer expiration or bridge deletion. + */ + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { + if (desc->vid && desc->vid != mp->addr.vid) + continue; + + br_mdb_flush_host(br, mp, desc); + br_mdb_flush_pgs(br, mp, desc); + } + + spin_unlock_bh(&br->multicast_lock); +} + +int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(dev); + struct br_mdb_flush_desc desc = {}; + int err; + + err = br_mdb_flush_desc_init(&desc, tb, extack); + if (err) + return err; + + br_mdb_flush(br, &desc); + + return 0; +} + static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 051ea81864ac..b0a92c344722 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1022,6 +1022,8 @@ int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); +int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, @@ -1430,6 +1432,12 @@ static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[], return -EOPNOTSUPP; } +static inline int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { diff --git a/net/core/dev.c b/net/core/dev.c index 0432b04cf9b0..f01a9b858347 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3471,6 +3471,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > READ_ONCE(dev->gso_max_segs)) return features & ~NETIF_F_GSO_MASK; + if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + return features & ~NETIF_F_GSO_MASK; + if (!skb_shinfo(skb)->gso_type) { skb_warn_bad_offload(skb); return features & ~NETIF_F_GSO_MASK; @@ -3753,6 +3756,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); + tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP); + if (q->flags & TCQ_F_NOLOCK) { if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && qdisc_run_begin(q)) { @@ -3782,7 +3787,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, no_lock_out: if (unlikely(to_free)) kfree_skb_list_reason(to_free, - SKB_DROP_REASON_QDISC_DROP); + tcf_get_drop_reason(to_free)); return rc; } @@ -3837,7 +3842,8 @@ no_lock_out: } spin_unlock(root_lock); if (unlikely(to_free)) - kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP); + kfree_skb_list_reason(to_free, + tcf_get_drop_reason(to_free)); if (unlikely(contended)) spin_unlock(&q->busylock); return rc; @@ -3923,14 +3929,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; - res.drop_reason = *drop_reason; + tcf_set_drop_reason(skb, *drop_reason); mini_qdisc_bstats_cpu_update(miniq, skb); ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); /* Only tcf related quirks below. */ switch (ret) { case TC_ACT_SHOT: - *drop_reason = res.drop_reason; + *drop_reason = tcf_get_drop_reason(skb); mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: @@ -11620,6 +11626,7 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom); @@ -11633,7 +11640,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160); /* TXRX read-mostly hotpath */ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); @@ -11660,7 +11667,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); } /* diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index b240d9aae4a6..b0f221d658be 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -183,7 +183,7 @@ out: } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", .cap_sys_admin = 1 }, + { .name = "events", .flags = GENL_MCAST_CAP_SYS_ADMIN, }, }; static void send_dm_alert(struct work_struct *work) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 75282222e0b4..96622bfb838a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -53,7 +53,7 @@ bool fib_rule_matchall(const struct fib_rule *rule) EXPORT_SYMBOL_GPL(fib_rule_matchall); int fib_default_rule_add(struct fib_rules_ops *ops, - u32 pref, u32 table, u32 flags) + u32 pref, u32 table) { struct fib_rule *r; @@ -65,7 +65,6 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->action = FR_ACT_TO_TBL; r->pref = pref; r->table = table; - r->flags = flags; r->proto = RTPROT_KERNEL; r->fr_net = ops->fro_net; r->uid_range = fib_kuid_range_unset; diff --git a/net/core/filter.c b/net/core/filter.c index 4ff6100c6a27..24061f29c9dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,7 +87,7 @@ #include "dev.h" static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); +bpf_sk_base_func_proto(enum bpf_func_id func_id); int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len) { @@ -203,7 +203,7 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) return 0; nla = (struct nlattr *) &skb->data[a]; - if (nla->nla_len > skb->len - a) + if (!nla_ok(nla, skb->len - a)) return 0; nla = nla_find_nested(nla, x); @@ -7862,7 +7862,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -7955,7 +7955,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -7974,7 +7974,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8161,7 +8161,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8220,7 +8220,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } #if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) @@ -8281,7 +8281,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8323,7 +8323,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_cgroup_classid_curr_proto; #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8367,7 +8367,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_lookup_tcp_proto; #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8378,7 +8378,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_load_bytes: return &bpf_flow_dissector_load_bytes_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8405,7 +8405,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8580,7 +8580,7 @@ static bool cg_skb_is_valid_access(int off, int size, return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return false; break; } @@ -8592,7 +8592,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; case bpf_ctx_range(struct __sk_buff, tstamp): - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return false; break; default: @@ -11236,7 +11236,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -11418,7 +11418,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_sk_release: return &bpf_sk_release_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -11752,7 +11752,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = { }; static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +bpf_sk_base_func_proto(enum bpf_func_id func_id) { const struct bpf_func_proto *func; @@ -11781,10 +11781,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } - if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) + if (!perfmon_capable()) return NULL; return func; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 57cea67b7562..ea55a758a475 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3669,10 +3669,8 @@ static int pktgen_thread_worker(void *arg) if (unlikely(!pkt_dev && t->control == 0)) { if (t->net->pktgen_exiting) break; - wait_event_interruptible_timeout(t->queue, - t->control != 0, - HZ/10); - try_to_freeze(); + wait_event_freezable_timeout(t->queue, + t->control != 0, HZ / 10); continue; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 94c4572512b8..5f6ed6da3cfc 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -6410,17 +6410,64 @@ static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); } +static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + struct br_mdb_entry zero_entry = {}; + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG(extack, "Entry flags cannot be set"); + return -EINVAL; + } + + if (entry->vid >= VLAN_N_VID - 1) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) { + NL_SET_ERR_MSG(extack, "Entry address cannot be set"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry_del_bulk, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; struct net *net = sock_net(skb->sk); struct br_port_msg *bpm; struct net_device *dev; int err; - err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, - MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (!del_bulk) + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, + extack); + else + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, + mdba_del_bulk_policy, extack); if (err) return err; @@ -6441,6 +6488,14 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (del_bulk) { + if (!dev->netdev_ops->ndo_mdb_del_bulk) { + NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion"); + return -EOPNOTSUPP; + } + return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack); + } + if (!dev->netdev_ops->ndo_mdb_del) { NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); return -EOPNOTSUPP; @@ -6686,5 +6741,6 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, + RTNL_FLAG_BULK_DEL_SUPPORTED); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4d4b11b0a83d..12d22c0b8551 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4861,7 +4861,9 @@ static __always_inline unsigned int skb_ext_total_length(void) static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); +#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); +#endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), @@ -5993,6 +5995,31 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) } EXPORT_SYMBOL(skb_ensure_writable); +int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev) +{ + int needed_headroom = dev->needed_headroom; + int needed_tailroom = dev->needed_tailroom; + + /* For tail taggers, we need to pad short frames ourselves, to ensure + * that the tail tag does not fail at its role of being at the end of + * the packet, once the conduit interface pads the frame. Account for + * that pad length here, and pad later. + */ + if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) + needed_tailroom += ETH_ZLEN - skb->len; + /* skb_headroom() returns unsigned int... */ + needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); + needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); + + if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) + /* No reallocation needed, yay! */ + return 0; + + return pskb_expand_head(skb, needed_headroom, needed_tailroom, + GFP_ATOMIC); +} +EXPORT_SYMBOL(skb_ensure_writable_head_tail); + /* remove VLAN header from packet and update csum accordingly. * expects a non skb_vlan_tag_present skb with a vlan tag payload */ diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4292c2ed1828..27d733c0f65e 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -536,6 +536,8 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_stream_unix(sk)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } diff --git a/net/core/stream.c b/net/core/stream.c index 96fbcb9bbb30..b16dfa568a2d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -79,7 +79,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); - return 0; + return done < 0 ? done : 0; } EXPORT_SYMBOL(sk_stream_wait_connect); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 06d7324276ec..ded07e09f813 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -889,7 +889,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; - SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); + net_dbg_ratelimited("connect: ipv4 mapped\n"); if (ipv6_only_sock(sk)) return -ENETUNREACH; diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 918a0395b03e..19dbf540748a 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -202,7 +202,10 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); - WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); + WARN_ON(!devl_is_registered(devlink)); + + if (!devlink_nl_notify_need(devlink)) + return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -214,8 +217,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info) @@ -999,7 +1001,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink, cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1010,8 +1012,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink, if (err) goto out_free_msg; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); return; out_free_msg: diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 5ea2e2012e93..c7a8e13f917c 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -91,10 +91,15 @@ extern struct genl_family devlink_nl_family; struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp); +static inline bool __devl_is_registered(struct devlink *devlink) +{ + return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); +} + static inline bool devl_is_registered(struct devlink *devlink) { devl_assert_locked(devlink); - return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + return __devl_is_registered(devlink); } static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) @@ -180,6 +185,58 @@ int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); +static inline bool devlink_nl_notify_need(struct devlink *devlink) +{ + return genl_has_listeners(&devlink_nl_family, devlink_net(devlink), + DEVLINK_MCGRP_CONFIG); +} + +struct devlink_obj_desc { + struct rcu_head rcu; + const char *bus_name; + const char *dev_name; + unsigned int port_index; + bool port_index_valid; + long data[]; +}; + +static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, + struct devlink *devlink) +{ + memset(desc, 0, sizeof(*desc)); + desc->bus_name = devlink->dev->bus->name; + desc->dev_name = dev_name(devlink->dev); +} + +static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, + struct devlink_port *devlink_port) +{ + desc->port_index = devlink_port->index; + desc->port_index_valid = true; +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data); + +static inline void devlink_nl_notify_send_desc(struct devlink *devlink, + struct sk_buff *msg, + struct devlink_obj_desc *desc) +{ + genlmsg_multicast_netns_filtered(&devlink_nl_family, + devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, + GFP_KERNEL, + devlink_nl_notify_filter, desc); +} + +static inline void devlink_nl_notify_send(struct devlink *devlink, + struct sk_buff *msg) +{ + struct devlink_obj_desc desc; + + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_notify_send_desc(devlink, msg, &desc); +} + /* Notify */ void devlink_notify_register(struct devlink *devlink); void devlink_notify_unregister(struct devlink *devlink); diff --git a/net/devlink/health.c b/net/devlink/health.c index 71ae121dc739..acb8c0e174bb 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -490,12 +490,16 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, enum devlink_command cmd) { struct devlink *devlink = reporter->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); ASSERT_DEVLINK_REGISTERED(devlink); + if (!devlink_nl_notify_need(devlink)) + return; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -506,8 +510,10 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_obj_desc_init(&desc, devlink); + if (reporter->devlink_port) + devlink_nl_obj_desc_port_set(&desc, reporter->devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } void diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 2f1c317b64cd..67f70a621d27 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -136,7 +136,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard, WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW && cmd != DEVLINK_CMD_LINECARD_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -150,8 +150,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_linecards_notify_register(struct devlink *devlink) diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index fa9afe3e6d9b..499885c8b9ca 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -17,6 +17,119 @@ static const struct genl_multicast_group devlink_nl_mcgrps[] = { [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, }; +struct devlink_nl_sock_priv { + struct devlink_obj_desc __rcu *flt; + spinlock_t flt_lock; /* Protects flt. */ +}; + +static void devlink_nl_sock_priv_init(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + + spin_lock_init(&sk_priv->flt_lock); +} + +static void devlink_nl_sock_priv_destroy(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + struct devlink_obj_desc *flt; + + flt = rcu_dereference_protected(sk_priv->flt, true); + kfree_rcu(flt, rcu); +} + +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_nl_sock_priv *sk_priv; + struct nlattr **attrs = info->attrs; + struct devlink_obj_desc *flt; + size_t data_offset = 0; + size_t data_size = 0; + char *pos; + + if (attrs[DEVLINK_ATTR_BUS_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_BUS_NAME]) + 1); + if (attrs[DEVLINK_ATTR_DEV_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_DEV_NAME]) + 1); + + flt = kzalloc(size_add(sizeof(*flt), data_size), GFP_KERNEL); + if (!flt) + return -ENOMEM; + + pos = (char *) flt->data; + if (attrs[DEVLINK_ATTR_BUS_NAME]) { + data_offset += nla_strscpy(pos, + attrs[DEVLINK_ATTR_BUS_NAME], + data_size) + 1; + flt->bus_name = pos; + pos += data_offset; + } + if (attrs[DEVLINK_ATTR_DEV_NAME]) { + nla_strscpy(pos, attrs[DEVLINK_ATTR_DEV_NAME], + data_size - data_offset); + flt->dev_name = pos; + } + + if (attrs[DEVLINK_ATTR_PORT_INDEX]) { + flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); + flt->port_index_valid = true; + } + + /* Don't attach empty filter. */ + if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) { + kfree(flt); + flt = NULL; + } + + sk_priv = genl_sk_priv_get(&devlink_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(sk_priv)) { + kfree(flt); + return PTR_ERR(sk_priv); + } + spin_lock(&sk_priv->flt_lock); + flt = rcu_replace_pointer(sk_priv->flt, flt, + lockdep_is_held(&sk_priv->flt_lock)); + spin_unlock(&sk_priv->flt_lock); + kfree_rcu(flt, rcu); + return 0; +} + +static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc, + const struct devlink_obj_desc *flt) +{ + if (desc->bus_name && flt->bus_name && + strcmp(desc->bus_name, flt->bus_name)) + return false; + if (desc->dev_name && flt->dev_name && + strcmp(desc->dev_name, flt->dev_name)) + return false; + if (desc->port_index_valid && flt->port_index_valid && + desc->port_index != flt->port_index) + return false; + return true; +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct devlink_obj_desc *desc = data; + struct devlink_nl_sock_priv *sk_priv; + struct devlink_obj_desc *flt; + int ret = 0; + + rcu_read_lock(); + sk_priv = __genl_sk_priv_get(&devlink_nl_family, dsk); + if (!IS_ERR_OR_NULL(sk_priv)) { + flt = rcu_dereference(sk_priv->flt); + if (flt) + ret = !devlink_obj_desc_match(desc, flt); + } + rcu_read_unlock(); + return ret; +} + int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype) { @@ -256,4 +369,7 @@ struct genl_family devlink_nl_family __ro_after_init = { .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, .mcgrps = devlink_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), + .sock_priv_size = sizeof(struct devlink_nl_sock_priv), + .sock_priv_init = devlink_nl_sock_priv_init, + .sock_priv_destroy = devlink_nl_sock_priv_destroy, }; diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 95f9b4350ab7..c81cf2dd154f 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -560,8 +560,15 @@ static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELF [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), }; +/* DEVLINK_CMD_NOTIFY_FILTER_SET - do */ +static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[73] = { +const struct genl_split_ops devlink_nl_ops[74] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -1233,4 +1240,11 @@ const struct genl_split_ops devlink_nl_ops[73] = { .maxattr = DEVLINK_ATTR_SELFTESTS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET, + .doit = devlink_nl_notify_filter_set_doit, + .policy = devlink_notify_filter_set_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_CMD_CAP_DO, + }, }; diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 02f3c0bfae0e..8f2bd50ddf5e 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -16,7 +16,7 @@ extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_F extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[73]; +extern const struct genl_split_ops devlink_nl_ops[74]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -142,5 +142,7 @@ int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info); #endif /* _LINUX_DEVLINK_GEN_H */ diff --git a/net/devlink/param.c b/net/devlink/param.c index d74df09311a9..22bc3b500518 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -343,7 +343,7 @@ static void devlink_param_notify(struct devlink *devlink, * will replay the notifications if the params are added/removed * outside of the lifetime of the instance. */ - if (!devl_is_registered(devlink)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -356,8 +356,7 @@ static void devlink_param_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } static void devlink_params_notify(struct devlink *devlink, diff --git a/net/devlink/port.c b/net/devlink/port.c index 7634f187fa50..62e54e152ecf 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -507,12 +507,13 @@ static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { struct devlink *devlink = devlink_port->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -525,8 +526,9 @@ static void devlink_port_notify(struct devlink_port *devlink_port, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_obj_desc_port_set(&desc, devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } static void devlink_ports_notify(struct devlink *devlink, diff --git a/net/devlink/rate.c b/net/devlink/rate.c index 94b289b93ff2..7139e67e93ae 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -146,7 +146,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -159,8 +159,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_rates_notify_register(struct devlink *devlink) diff --git a/net/devlink/region.c b/net/devlink/region.c index e3bab458db94..7319127c5913 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -234,15 +234,15 @@ static void devlink_nl_region_notify(struct devlink_region *region, struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0); if (IS_ERR(msg)) return; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_regions_notify_register(struct devlink *devlink) diff --git a/net/devlink/trap.c b/net/devlink/trap.c index c26313e7ca08..5d18c7424df1 100644 --- a/net/devlink/trap.c +++ b/net/devlink/trap.c @@ -1173,7 +1173,8 @@ devlink_trap_group_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && cmd != DEVLINK_CMD_TRAP_GROUP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1187,8 +1188,7 @@ devlink_trap_group_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_trap_groups_notify_register(struct devlink *devlink) @@ -1234,7 +1234,8 @@ static void devlink_trap_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && cmd != DEVLINK_CMD_TRAP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1247,8 +1248,7 @@ static void devlink_trap_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_traps_notify_register(struct devlink *devlink) @@ -1710,7 +1710,8 @@ devlink_trap_policer_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW && cmd != DEVLINK_CMD_TRAP_POLICER_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1724,8 +1725,7 @@ devlink_trap_policer_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_trap_policers_notify_register(struct devlink *devlink) diff --git a/net/dsa/user.c b/net/dsa/user.c index d438884a4eb0..b738a466e2dc 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -920,30 +920,6 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) } EXPORT_SYMBOL_GPL(dsa_enqueue_skb); -static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) -{ - int needed_headroom = dev->needed_headroom; - int needed_tailroom = dev->needed_tailroom; - - /* For tail taggers, we need to pad short frames ourselves, to ensure - * that the tail tag does not fail at its role of being at the end of - * the packet, once the conduit interface pads the frame. Account for - * that pad length here, and pad later. - */ - if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) - needed_tailroom += ETH_ZLEN - skb->len; - /* skb_headroom() returns unsigned int... */ - needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0); - needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0); - - if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb))) - /* No reallocation needed, yay! */ - return 0; - - return pskb_expand_head(skb, needed_headroom, needed_tailroom, - GFP_ATOMIC); -} - static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_user_priv *p = netdev_priv(dev); @@ -956,13 +932,14 @@ static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev) /* Handle tx timestamp if any */ dsa_skb_tx_timestamp(p, skb); - if (dsa_realloc_skb(skb, dev)) { + if (skb_ensure_writable_head_tail(skb, dev)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } /* needed_tailroom should still be 'warm' in the cache line from - * dsa_realloc_skb(), which has also ensured that padding is safe. + * skb_ensure_writable_head_tail(), which has also ensured that + * padding is safe. */ if (dev->needed_tailroom) eth_skb_pad(skb); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 86d47425038b..4bc9a2a07bbb 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -973,32 +973,35 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rxfh_param rxfh = {}; struct ethtool_rxnfc info; size_t info_size = sizeof(info); int rc; - if (!ops->set_rxnfc || !ops->get_rxfh) + if (!ops->set_rxnfc) return -EOPNOTSUPP; rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr); if (rc) return rc; - rc = ops->get_rxfh(dev, &rxfh); - if (rc) - return rc; + if (ops->get_rxfh) { + struct ethtool_rxfh_param rxfh = {}; - /* Sanity check: if symmetric-xor is set, then: - * 1 - no other fields besides IP src/dst and/or L4 src/dst - * 2 - If src is set, dst must also be set - */ - if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && - ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | - RXH_L4_B_0_1 | RXH_L4_B_2_3)) || - (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || - (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3)))) - return -EINVAL; + rc = ops->get_rxfh(dev, &rxfh); + if (rc) + return rc; + + /* Sanity check: if symmetric-xor is set, then: + * 1 - no other fields besides IP src/dst and/or L4 src/dst + * 2 - If src is set, dst must also be set + */ + if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) || + (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || + (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3)))) + return -EINVAL; + } rc = ops->set_rxnfc(dev, &info); if (rc) @@ -1252,6 +1255,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) { ret = -EFAULT; } else if (copy_to_user(useraddr + + offsetof(struct ethtool_rxfh, input_xfrm), + &rxfh_dev.input_xfrm, + sizeof(rxfh.input_xfrm))) { + ret = -EFAULT; + } else if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_config[0]), rss_config, total_size)) { ret = -EFAULT; @@ -1299,14 +1307,16 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key or function. + * Must request at least one change: indir size, hash key, function + * or input transformation. */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || (rxfh.key_size && (rxfh.key_size != dev_key_size)) || (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && - rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE)) + rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE && + rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index f05b7bdae2aa..7bce67673e83 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o obj-y += 6lowpan/ ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \ - header_ops.o sysfs.o nl802154.o trace.o + header_ops.o sysfs.o nl802154.o trace.o pan.o ieee802154_socket-y := socket.o CFLAGS_trace.o := -I$(src) diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 57546e07e06a..60e8fff1347e 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -198,6 +198,25 @@ void wpan_phy_free(struct wpan_phy *phy) } EXPORT_SYMBOL(wpan_phy_free); +static void cfg802154_free_peer_structures(struct wpan_dev *wpan_dev) +{ + struct ieee802154_pan_device *child, *tmp; + + mutex_lock(&wpan_dev->association_lock); + + kfree(wpan_dev->parent); + wpan_dev->parent = NULL; + + list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) { + list_del(&child->node); + kfree(child); + } + + wpan_dev->nchildren = 0; + + mutex_unlock(&wpan_dev->association_lock); +} + int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, struct net *net) { @@ -276,6 +295,9 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, wpan_dev->identifier = ++rdev->wpan_dev_id; list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list); rdev->devlist_generation++; + mutex_init(&wpan_dev->association_lock); + INIT_LIST_HEAD(&wpan_dev->children); + wpan_dev->max_associations = SZ_16K; wpan_dev->netdev = dev; break; @@ -291,6 +313,8 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, rdev->opencount++; break; case NETDEV_UNREGISTER: + cfg802154_free_peer_structures(wpan_dev); + /* It is possible to get NETDEV_UNREGISTER * multiple times. To detect that, check * that the interface is still on the list diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 1a265a421308..7eb37de3add2 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -234,6 +234,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { NL802154_SCAN_DONE_REASON_ABORTED), [NL802154_ATTR_BEACON_INTERVAL] = NLA_POLICY_MAX(NLA_U8, IEEE802154_ACTIVE_SCAN_DURATION), + [NL802154_ATTR_MAX_ASSOCIATIONS] = { .type = NLA_U32 }, + [NL802154_ATTR_PEER] = { .type = NLA_NESTED }, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, }, @@ -248,7 +250,6 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; -#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static int nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -307,7 +308,6 @@ nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev) { rtnl_unlock(); } -#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ /* message building helper */ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq, @@ -1087,15 +1087,14 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info) pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]); - /* TODO - * I am not sure about to check here on broadcast pan_id. - * Broadcast is a valid setting, comment from 802.15.4: - * If this value is 0xffff, the device is not associated. - * - * This could useful to simple deassociate an device. + /* Only allow changing the PAN ID when the device has no more + * associations ongoing to avoid confusing peers. */ - if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST)) + if (cfg802154_device_is_associated(wpan_dev)) { + NL_SET_ERR_MSG(info->extack, + "Existing associations, changing PAN ID forbidden"); return -EINVAL; + } return rdev_set_pan_id(rdev, wpan_dev, pan_id); } @@ -1123,20 +1122,17 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info) short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]); - /* TODO - * I am not sure about to check here on broadcast short_addr. - * Broadcast is a valid setting, comment from 802.15.4: - * A value of 0xfffe indicates that the device has - * associated but has not been allocated an address. A - * value of 0xffff indicates that the device does not - * have a short address. - * - * I think we should allow to set these settings but - * don't allow to allow socket communication with it. + /* The short address only has a meaning when part of a PAN, after a + * proper association procedure. However, we want to still offer the + * possibility to create static networks so changing the short address + * is only allowed when not already associated to other devices with + * the official handshake. */ - if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) || - short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST)) + if (cfg802154_device_is_associated(wpan_dev)) { + NL_SET_ERR_MSG(info->extack, + "Existing associations, changing short address forbidden"); return -EINVAL; + } return rdev_set_short_addr(rdev, wpan_dev, short_addr); } @@ -1638,6 +1634,189 @@ nl802154_stop_beacons(struct sk_buff *skb, struct genl_info *info) return rdev_stop_beacons(rdev, wpan_dev); } +static int nl802154_associate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wpan_dev *wpan_dev; + struct wpan_phy *wpan_phy; + struct ieee802154_addr coord; + int err; + + wpan_dev = dev->ieee802154_ptr; + wpan_phy = &rdev->wpan_phy; + + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + + if (!info->attrs[NL802154_ATTR_PAN_ID] || + !info->attrs[NL802154_ATTR_EXTENDED_ADDR]) + return -EINVAL; + + coord.pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]); + coord.mode = IEEE802154_ADDR_LONG; + coord.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]); + + mutex_lock(&wpan_dev->association_lock); + err = rdev_associate(rdev, wpan_dev, &coord); + mutex_unlock(&wpan_dev->association_lock); + if (err) + pr_err("Association with PAN ID 0x%x failed (%d)\n", + le16_to_cpu(coord.pan_id), err); + + return err; +} + +static int nl802154_disassociate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg802154_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wpan_dev *wpan_dev = dev->ieee802154_ptr; + struct wpan_phy *wpan_phy = &rdev->wpan_phy; + struct ieee802154_addr target; + + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + + target.pan_id = wpan_dev->pan_id; + + if (info->attrs[NL802154_ATTR_EXTENDED_ADDR]) { + target.mode = IEEE802154_ADDR_LONG; + target.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]); + } else if (info->attrs[NL802154_ATTR_SHORT_ADDR]) { + target.mode = IEEE802154_ADDR_SHORT; + target.short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]); + } else { + NL_SET_ERR_MSG(info->extack, "Device address is missing"); + return -EINVAL; + } + + mutex_lock(&wpan_dev->association_lock); + rdev_disassociate(rdev, wpan_dev, &target); + mutex_unlock(&wpan_dev->association_lock); + + return 0; +} + +static int nl802154_set_max_associations(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev = info->user_ptr[1]; + struct wpan_dev *wpan_dev = dev->ieee802154_ptr; + unsigned int max_assoc; + + if (!info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]) { + NL_SET_ERR_MSG(info->extack, "No maximum number of association given"); + return -EINVAL; + } + + max_assoc = nla_get_u32(info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]); + + mutex_lock(&wpan_dev->association_lock); + cfg802154_set_max_associations(wpan_dev, max_assoc); + mutex_unlock(&wpan_dev->association_lock); + + return 0; +} + +static int nl802154_send_peer_info(struct sk_buff *msg, + struct netlink_callback *cb, + u32 seq, int flags, + struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, + struct ieee802154_pan_device *peer, + enum nl802154_peer_type type) +{ + struct nlattr *nla; + void *hdr; + + ASSERT_RTNL(); + + hdr = nl802154hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, + NL802154_CMD_LIST_ASSOCIATIONS); + if (!hdr) + return -ENOBUFS; + + genl_dump_check_consistent(cb, hdr); + + nla = nla_nest_start_noflag(msg, NL802154_ATTR_PEER); + if (!nla) + goto nla_put_failure; + + if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_PEER_TYPE, type)) + goto nla_put_failure; + + if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_MODE, peer->mode)) + goto nla_put_failure; + + if (nla_put(msg, NL802154_DEV_ADDR_ATTR_SHORT, + IEEE802154_SHORT_ADDR_LEN, &peer->short_addr)) + goto nla_put_failure; + + if (nla_put(msg, NL802154_DEV_ADDR_ATTR_EXTENDED, + IEEE802154_EXTENDED_ADDR_LEN, &peer->extended_addr)) + goto nla_put_failure; + + nla_nest_end(msg, nla); + + genlmsg_end(msg, hdr); + + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nl802154_list_associations(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct cfg802154_registered_device *rdev; + struct ieee802154_pan_device *child; + struct wpan_dev *wpan_dev; + int err; + + err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev); + if (err) + return err; + + mutex_lock(&wpan_dev->association_lock); + + if (cb->args[2]) + goto out; + + if (wpan_dev->parent) { + err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, rdev, wpan_dev, + wpan_dev->parent, + NL802154_PEER_TYPE_PARENT); + if (err < 0) + goto out_err; + } + + list_for_each_entry(child, &wpan_dev->children, node) { + err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq, + NLM_F_MULTI, rdev, wpan_dev, + child, + NL802154_PEER_TYPE_CHILD); + if (err < 0) + goto out_err; + } + + cb->args[2] = 1; +out: + err = skb->len; +out_err: + mutex_unlock(&wpan_dev->association_lock); + + nl802154_finish_wpan_dev_dump(rdev); + + return err; +} + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = { [NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 }, @@ -2759,6 +2938,34 @@ static const struct genl_ops nl802154_ops[] = { NL802154_FLAG_CHECK_NETDEV_UP | NL802154_FLAG_NEED_RTNL, }, + { + .cmd = NL802154_CMD_ASSOCIATE, + .doit = nl802154_associate, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_NETDEV | + NL802154_FLAG_CHECK_NETDEV_UP | + NL802154_FLAG_NEED_RTNL, + }, + { + .cmd = NL802154_CMD_DISASSOCIATE, + .doit = nl802154_disassociate, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_NETDEV | + NL802154_FLAG_CHECK_NETDEV_UP | + NL802154_FLAG_NEED_RTNL, + }, + { + .cmd = NL802154_CMD_SET_MAX_ASSOCIATIONS, + .doit = nl802154_set_max_associations, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL802154_FLAG_NEED_NETDEV | + NL802154_FLAG_NEED_RTNL, + }, + { + .cmd = NL802154_CMD_LIST_ASSOCIATIONS, + .dumpit = nl802154_list_associations, + /* can be retrieved by unprivileged users */ + }, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL { .cmd = NL802154_CMD_SET_SEC_PARAMS, diff --git a/net/ieee802154/pan.c b/net/ieee802154/pan.c new file mode 100644 index 000000000000..249df7364b3e --- /dev/null +++ b/net/ieee802154/pan.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IEEE 802.15.4 PAN management + * + * Copyright (C) 2023 Qorvo US, Inc + * Authors: + * - David Girault <david.girault@qorvo.com> + * - Miquel Raynal <miquel.raynal@bootlin.com> + */ + +#include <linux/kernel.h> +#include <net/cfg802154.h> +#include <net/af_ieee802154.h> + +/* Checks whether a device address matches one from the PAN list. + * This helper is meant to be used only during PAN management, when we expect + * extended addresses to be used. + */ +static bool cfg802154_pan_device_is_matching(struct ieee802154_pan_device *pan_dev, + struct ieee802154_addr *ext_dev) +{ + if (!pan_dev || !ext_dev) + return false; + + if (ext_dev->mode == IEEE802154_ADDR_SHORT) + return false; + + return pan_dev->extended_addr == ext_dev->extended_addr; +} + +bool cfg802154_device_is_associated(struct wpan_dev *wpan_dev) +{ + bool is_assoc; + + mutex_lock(&wpan_dev->association_lock); + is_assoc = !list_empty(&wpan_dev->children) || wpan_dev->parent; + mutex_unlock(&wpan_dev->association_lock); + + return is_assoc; +} + +bool cfg802154_device_is_parent(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + lockdep_assert_held(&wpan_dev->association_lock); + + return cfg802154_pan_device_is_matching(wpan_dev->parent, target); +} +EXPORT_SYMBOL_GPL(cfg802154_device_is_parent); + +struct ieee802154_pan_device * +cfg802154_device_is_child(struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + struct ieee802154_pan_device *child; + + lockdep_assert_held(&wpan_dev->association_lock); + + list_for_each_entry(child, &wpan_dev->children, node) + if (cfg802154_pan_device_is_matching(child, target)) + return child; + + return NULL; +} +EXPORT_SYMBOL_GPL(cfg802154_device_is_child); + +__le16 cfg802154_get_free_short_addr(struct wpan_dev *wpan_dev) +{ + struct ieee802154_pan_device *child; + __le16 addr; + + lockdep_assert_held(&wpan_dev->association_lock); + + do { + get_random_bytes(&addr, 2); + if (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST) || + addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC)) + continue; + + if (wpan_dev->short_addr == addr) + continue; + + if (wpan_dev->parent && wpan_dev->parent->short_addr == addr) + continue; + + list_for_each_entry(child, &wpan_dev->children, node) + if (child->short_addr == addr) + continue; + + break; + } while (1); + + return addr; +} +EXPORT_SYMBOL_GPL(cfg802154_get_free_short_addr); + +unsigned int cfg802154_set_max_associations(struct wpan_dev *wpan_dev, + unsigned int max) +{ + unsigned int old_max; + + lockdep_assert_held(&wpan_dev->association_lock); + + old_max = wpan_dev->max_associations; + wpan_dev->max_associations = max; + + return old_max; +} +EXPORT_SYMBOL_GPL(cfg802154_set_max_associations); diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 5eaae15c610e..64071ef6f57b 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -265,6 +265,36 @@ static inline int rdev_stop_beacons(struct cfg802154_registered_device *rdev, return ret; } +static inline int rdev_associate(struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord) +{ + int ret; + + if (!rdev->ops->associate) + return -EOPNOTSUPP; + + trace_802154_rdev_associate(&rdev->wpan_phy, wpan_dev, coord); + ret = rdev->ops->associate(&rdev->wpan_phy, wpan_dev, coord); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; +} + +static inline int rdev_disassociate(struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + int ret; + + if (!rdev->ops->disassociate) + return -EOPNOTSUPP; + + trace_802154_rdev_disassociate(&rdev->wpan_phy, wpan_dev, target); + ret = rdev->ops->disassociate(&rdev->wpan_phy, wpan_dev, target); + trace_802154_rdev_return_int(&rdev->wpan_phy, ret); + return ret; +} + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL /* TODO this is already a nl802154, so move into ieee802154 */ static inline void diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index c16db0b326fa..62aa6465253a 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -356,6 +356,44 @@ DEFINE_EVENT(802154_wdev_template, 802154_rdev_stop_beacons, TP_ARGS(wpan_phy, wpan_dev) ); +TRACE_EVENT(802154_rdev_associate, + TP_PROTO(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord), + TP_ARGS(wpan_phy, wpan_dev, coord), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(__le64, addr) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->addr = coord->extended_addr; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", associating with: 0x%llx", + WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) +); + +TRACE_EVENT(802154_rdev_disassociate, + TP_PROTO(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target), + TP_ARGS(wpan_phy, wpan_dev, target), + TP_STRUCT__entry( + WPAN_PHY_ENTRY + WPAN_DEV_ENTRY + __field(__le64, addr) + ), + TP_fast_assign( + WPAN_PHY_ASSIGN; + WPAN_DEV_ASSIGN; + __entry->addr = target->extended_addr; + ), + TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", disassociating with: 0x%llx", + WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr) +); + TRACE_EVENT(802154_rdev_return_int, TP_PROTO(struct wpan_phy *wpan_phy, int ret), TP_ARGS(wpan_phy, ret), diff --git a/net/ife/ife.c b/net/ife/ife.c index 13bbf8cb6a39..be05b690b9ef 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) if (unlikely(!pskb_may_pull(skb, total_pull))) return NULL; + ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len); skb_set_mac_header(skb, total_pull); __skb_pull(skb, total_pull); *metalen = ifehdrln - IFE_METAHDRLEN; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 634cfafa583d..ae8b15e6896f 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -191,7 +191,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 513f475c6a53..5bdd1c016009 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -395,13 +395,13 @@ static int fib_default_rules_init(struct fib_rules_ops *ops) { int err; - err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL); if (err < 0) return err; - err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN); if (err < 0) return err; - err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0); + err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT); if (err < 0) return err; return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bd325b029dd1..8e2eb1793685 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -159,8 +159,11 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk) if (sk->sk_family == AF_INET6) { int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); - return addr_type != IPV6_ADDR_ANY && - addr_type != IPV6_ADDR_MAPPED; + if (addr_type == IPV6_ADDR_ANY) + return false; + + if (addr_type != IPV6_ADDR_MAPPED) + return true; } #endif return sk->sk_rcv_saddr != htonl(INADDR_ANY); @@ -213,18 +216,9 @@ static bool inet_bhash2_conflict(const struct sock *sk, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { - struct inet_timewait_sock *tw2; struct sock *sk2; - sk_for_each_bound_bhash2(sk2, &tb2->owners) { - if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, - reuseport_cb_ok, reuseport_ok)) - return true; - } - - twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { - sk2 = (struct sock *)tw2; - + sk_for_each_bound(sk2, &tb2->owners) { if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, reuseport_cb_ok, reuseport_ok)) return true; @@ -233,15 +227,20 @@ static bool inet_bhash2_conflict(const struct sock *sk, return false; } +#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \ + hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \ + sk_for_each_bound(sk2, &(__tb2)->owners) + /* This should be called only when the tb and tb2 hashbuckets' locks are held */ static int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, const struct inet_bind2_bucket *tb2, /* may be null */ bool relax, bool reuseport_ok) { - bool reuseport_cb_ok; - struct sock_reuseport *reuseport_cb; kuid_t uid = sock_i_uid((struct sock *)sk); + struct sock_reuseport *reuseport_cb; + bool reuseport_cb_ok; + struct sock *sk2; rcu_read_lock(); reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); @@ -249,32 +248,29 @@ static int inet_csk_bind_conflict(const struct sock *sk, reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks); rcu_read_unlock(); - /* - * Unlike other sk lookup places we do not check + /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if + * ipv4) should have been checked already. We need to do these two + * checks separately because their spinlocks have to be acquired/released + * independently of each other, to prevent possible deadlocks + */ + if (inet_use_bhash2_on_bind(sk)) + return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, + reuseport_cb_ok, reuseport_ok); + + /* Unlike other sk lookup places we do not check * for sk_net here, since _all_ the socks listed * in tb->owners and tb2->owners list belong * to the same net - the one this bucket belongs to. */ + sk_for_each_bound_bhash(sk2, tb2, tb) { + if (!inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok)) + continue; - if (!inet_use_bhash2_on_bind(sk)) { - struct sock *sk2; - - sk_for_each_bound(sk2, &tb->owners) - if (inet_bind_conflict(sk, sk2, uid, relax, - reuseport_cb_ok, reuseport_ok) && - inet_rcv_saddr_equal(sk, sk2, true)) - return true; - - return false; + if (inet_rcv_saddr_equal(sk, sk2, true)) + return true; } - /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if - * ipv4) should have been checked already. We need to do these two - * checks separately because their spinlocks have to be acquired/released - * independently of each other, to prevent possible deadlocks - */ - return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, - reuseport_ok); + return false; } /* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or @@ -457,7 +453,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, kuid_t uid = sock_i_uid(sk); bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; - if (hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->bhash2)) { tb->fastreuse = reuse; if (sk->sk_reuseport) { tb->fastreuseport = FASTREUSEPORT_ANY; @@ -549,7 +545,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) } if (!found_port) { - if (!hlist_empty(&tb->owners)) { + if (!hlist_empty(&tb->bhash2)) { if (sk->sk_reuse == SK_FORCE_REUSE || (tb->fastreuse > 0 && reuse) || sk_reuseport_match(tb, sk)) @@ -569,7 +565,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) if (!tb2) { tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, - net, head2, port, l3mdev, sk); + net, head2, tb, sk); if (!tb2) goto fail_unlock; bhash2_created = true; @@ -591,11 +587,10 @@ success: fail_unlock: if (ret) { + if (bhash2_created) + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2); if (bhash_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - if (bhash2_created) - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, - tb2); } if (head2_lock_acquired) spin_unlock(&head2->lock); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 46b13962ad02..8e6b6aa0579e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1104,7 +1104,7 @@ resume_bind_walk: if (!net_eq(ib2_net(tb2), net)) continue; - sk_for_each_bound_bhash2(sk, &tb2->owners) { + sk_for_each_bound(sk, &tb2->owners) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9ff201bc4e6d..93e9193df544 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -76,7 +76,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; - INIT_HLIST_HEAD(&tb->owners); + INIT_HLIST_HEAD(&tb->bhash2); hlist_add_head(&tb->node, &head->chain); } return tb; @@ -87,7 +87,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, */ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) { - if (hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->bhash2)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } @@ -100,47 +100,52 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net tb->l3mdev == l3mdev; } -static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, +static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb2, struct net *net, struct inet_bind_hashbucket *head, - unsigned short port, int l3mdev, + struct inet_bind_bucket *tb, const struct sock *sk) { - write_pnet(&tb->ib_net, net); - tb->l3mdev = l3mdev; - tb->port = port; + write_pnet(&tb2->ib_net, net); + tb2->l3mdev = tb->l3mdev; + tb2->port = tb->port; #if IS_ENABLED(CONFIG_IPV6) - tb->family = sk->sk_family; - if (sk->sk_family == AF_INET6) - tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; - else + BUILD_BUG_ON(USHRT_MAX < (IPV6_ADDR_ANY | IPV6_ADDR_MAPPED)); + if (sk->sk_family == AF_INET6) { + tb2->addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); + tb2->v6_rcv_saddr = sk->sk_v6_rcv_saddr; + } else { + tb2->addr_type = IPV6_ADDR_MAPPED; + ipv6_addr_set_v4mapped(sk->sk_rcv_saddr, &tb2->v6_rcv_saddr); + } +#else + tb2->rcv_saddr = sk->sk_rcv_saddr; #endif - tb->rcv_saddr = sk->sk_rcv_saddr; - INIT_HLIST_HEAD(&tb->owners); - INIT_HLIST_HEAD(&tb->deathrow); - hlist_add_head(&tb->node, &head->chain); + INIT_HLIST_HEAD(&tb2->owners); + hlist_add_head(&tb2->node, &head->chain); + hlist_add_head(&tb2->bhash_node, &tb->bhash2); } struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, - unsigned short port, - int l3mdev, + struct inet_bind_bucket *tb, const struct sock *sk) { - struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); + struct inet_bind2_bucket *tb2 = kmem_cache_alloc(cachep, GFP_ATOMIC); - if (tb) - inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk); + if (tb2) + inet_bind2_bucket_init(tb2, net, head, tb, sk); - return tb; + return tb2; } /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { - if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { + if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); + __hlist_del(&tb->bhash_node); kmem_cache_free(cachep, tb); } } @@ -149,18 +154,11 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb2->family) { - if (sk->sk_family == AF_INET) - return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) && - tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; - - return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) && - sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr; - } - if (sk->sk_family == AF_INET6) - return ipv6_addr_equal(&tb2->v6_rcv_saddr, - &sk->sk_v6_rcv_saddr); + return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); + + if (tb2->addr_type != IPV6_ADDR_MAPPED) + return false; #endif return tb2->rcv_saddr == sk->sk_rcv_saddr; } @@ -169,10 +167,9 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { inet_sk(sk)->inet_num = port; - sk_add_bind_node(sk, &tb->owners); inet_csk(sk)->icsk_bind_hash = tb; - sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; + sk_add_bind_node(sk, &tb2->owners); } /* @@ -192,21 +189,20 @@ static void __inet_put_port(struct sock *sk) spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; - __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_lock(&head2->lock); if (inet_csk(sk)->icsk_bind2_hash) { struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash; - __sk_del_bind2_node(sk); + __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind2_hash = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); } spin_unlock(&head2->lock); + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } @@ -275,8 +271,7 @@ bhash2_find: tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child); if (!tb2) { tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, - net, head2, port, - l3mdev, child); + net, head2, tb, child); if (!tb2) goto error; } @@ -836,16 +831,15 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const return false; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb->family) { - if (sk->sk_family == AF_INET) - return ipv6_addr_any(&tb->v6_rcv_saddr) || - ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr); + if (tb->addr_type == IPV6_ADDR_ANY) + return true; + if (tb->addr_type != IPV6_ADDR_MAPPED) return false; - } - if (sk->sk_family == AF_INET6) - return ipv6_addr_any(&tb->v6_rcv_saddr); + if (sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + return false; #endif return tb->rcv_saddr == 0; } @@ -942,7 +936,7 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, spin_lock_bh(&head->lock); spin_lock(&head2->lock); - __sk_del_bind2_node(sk); + __sk_del_bind_node(sk); inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); spin_unlock(&head2->lock); @@ -957,10 +951,10 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; - inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk); + inet_bind2_bucket_init(tb2, net, head2, inet_csk(sk)->icsk_bind_hash, sk); } - sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; + sk_add_bind_node(sk, &tb2->owners); spin_unlock(&head2->lock); spin_unlock_bh(&head->lock); @@ -1064,7 +1058,7 @@ other_parity_scan: if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port; - WARN_ON(hlist_empty(&tb->owners)); + WARN_ON(hlist_empty(&tb->bhash2)); if (!check_established(death_row, sk, port, &tw)) goto ok; @@ -1104,7 +1098,7 @@ ok: tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, - head2, port, l3mdev, sk); + head2, tb, sk); if (!tb2) goto error; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index dd37a5bf6881..5befa4de5b24 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -35,13 +35,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, if (!tb) return; - __hlist_del(&tw->tw_bind_node); + __sk_del_bind_node((struct sock *)tw); tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); - - __hlist_del(&tw->tw_bind2_node); tw->tw_tb2 = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); __sock_put((struct sock *)tw); } @@ -94,18 +92,6 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, hlist_nulls_add_head_rcu(&tw->tw_node, list); } -static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind_node, list); -} - -static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind2_node, list); -} - /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -133,11 +119,10 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); - inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); tw->tw_tb2 = icsk->icsk_bind2_hash; WARN_ON(!icsk->icsk_bind2_hash); - inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow); + sk_add_bind_node((struct sock *)tw, &tw->tw_tb2->owners); spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0063a237253b..9d6f59531b3a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -253,7 +253,7 @@ static int __net_init ipmr_rules_init(struct net *net) goto err1; } - err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0); + err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT); if (err < 0) goto err2; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 7c2003833010..7523c4baef35 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -475,11 +475,11 @@ static int __net_init fib6_rules_net_init(struct net *net) if (IS_ERR(ops)) return PTR_ERR(ops); - err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL); if (err) goto out_fib6_rules_ops; - err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN); if (err) goto out_fib6_rules_ops; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7772f42ff2b9..4fc2cae0d116 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -160,8 +160,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); - INIT_HLIST_NODE(&f6i->gc_link); - return f6i; } @@ -248,7 +246,6 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); - INIT_HLIST_HEAD(&table->tb6_gc_hlist); } return table; @@ -1060,8 +1057,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, lockdep_is_held(&table->tb6_lock)); } } - - fib6_clean_expires_locked(rt); } /* @@ -1123,10 +1118,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) - fib6_clean_expires_locked(iter); + fib6_clean_expires(iter); else - fib6_set_expires_locked(iter, - rt->expires); + fib6_set_expires(iter, rt->expires); if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, @@ -1485,10 +1479,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); - - if (fib6_has_expires(rt)) - hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist); - fib6_start_gc(info->nl_net, rt); } @@ -2291,8 +2281,9 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) +static int fib6_age(struct fib6_info *rt, void *arg) { + struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -2300,7 +2291,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Routes are expired even if they are in use. */ - if (fib6_has_expires(rt) && rt->expires) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; @@ -2317,40 +2308,6 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) return 0; } -static void fib6_gc_table(struct net *net, - struct fib6_table *tb6, - struct fib6_gc_args *gc_args) -{ - struct fib6_info *rt; - struct hlist_node *n; - struct nl_info info = { - .nl_net = net, - .skip_notify = false, - }; - - hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link) - if (fib6_age(rt, gc_args) == -1) - fib6_del(rt, &info); -} - -static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - spin_lock_bh(&table->tb6_lock); - fib6_gc_table(net, table, gc_args); - spin_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} - void fib6_run_gc(unsigned long expires, struct net *net, bool force) { struct fib6_gc_args gc_args; @@ -2366,7 +2323,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = 0; - fib6_gc_all(net, &gc_args); + fib6_clean_all(net, fib6_age, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 30ca064b76ef..9782c180fee6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -242,7 +242,7 @@ static int __net_init ip6mr_rules_init(struct net *net) goto err1; } - err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0); + err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT); if (err < 0) goto err2; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b132feae3393..ea1dec8448fc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3763,10 +3763,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->dst_nocount = true; if (cfg->fc_flags & RTF_EXPIRES) - fib6_set_expires_locked(rt, jiffies + - clock_t_to_jiffies(cfg->fc_expires)); + fib6_set_expires(rt, jiffies + + clock_t_to_jiffies(cfg->fc_expires)); else - fib6_clean_expires_locked(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 0ed6e34d6edd..6334f64f04d5 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -67,7 +67,7 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv) return 0; } -struct bus_type iucv_bus = { +const struct bus_type iucv_bus = { .name = "iucv", .match = iucv_bus_match, }; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 65d1f6755f98..1184d40167b8 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -634,7 +634,7 @@ retry: msize = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - msize += skb_shinfo(skb)->frags[i].bv_len; + msize += skb_frag_size(&skb_shinfo(skb)->frags[i]); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e0a4f9eecb2c..489dd97f5172 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1788,10 +1788,10 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, lockdep_is_held(&local->hw.wiphy->mtx)); /* - * If there are no changes, then accept a link that doesn't exist, + * If there are no changes, then accept a link that exist, * unless it's a new link. */ - if (params->link_id < 0 && !new_link && + if (params->link_id >= 0 && !new_link && !params->link_mac && !params->txpwr_set && !params->supported_rates_len && !params->ht_capa && !params->vht_capa && diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 7938ec87ef25..d3820333cd59 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2015 Intel Deutschland GmbH - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include <net/mac80211.h> #include "ieee80211_i.h" @@ -589,6 +589,10 @@ int drv_change_sta_links(struct ieee80211_local *local, if (ret) return ret; + /* during reconfig don't add it to debugfs again */ + if (local->in_reconfig) + return 0; + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index dbabeefe4515..28bf794f67f8 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1068,8 +1068,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_OPEN: if (!matches_local) event = OPN_RJCT; - if (!mesh_plink_free_count(sdata) || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + (sta->mesh->plid && sta->mesh->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; @@ -1077,9 +1077,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_CONFIRM: if (!matches_local) event = CNF_RJCT; - if (!mesh_plink_free_count(sdata) || - sta->mesh->llid != llid || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + sta->mesh->llid != llid || + (sta->mesh->plid && sta->mesh->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; @@ -1247,6 +1247,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, return; } elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, NULL); - mesh_process_plink_frame(sdata, mgmt, elems, rx_status); - kfree(elems); + if (elems) { + mesh_process_plink_frame(sdata, mgmt, elems, rx_status); + kfree(elems); + } } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5aef4f3dcbf1..073105deb424 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5796,7 +5796,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, { const struct ieee80211_multi_link_elem *ml; const struct element *sub; - size_t ml_len; + ssize_t ml_len; unsigned long removed_links = 0; u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {}; u8 link_id; @@ -5812,6 +5812,8 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, elems->scratch + elems->scratch_len - elems->scratch_pos, WLAN_EID_FRAGMENT); + if (ml_len < 0) + return; elems->ml_reconf = (const void *)elems->scratch_pos; elems->ml_reconf_len = ml_len; diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c index 5c3cb019f751..ef7f23af043f 100644 --- a/net/mac802154/cfg.c +++ b/net/mac802154/cfg.c @@ -315,6 +315,179 @@ static int mac802154_stop_beacons(struct wpan_phy *wpan_phy, return mac802154_stop_beacons_locked(local, sdata); } +static int mac802154_associate(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *coord) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + u64 ceaddr = swab64((__force u64)coord->extended_addr); + struct ieee802154_sub_if_data *sdata; + struct ieee802154_pan_device *parent; + __le16 short_addr; + int ret; + + ASSERT_RTNL(); + + sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev); + + if (wpan_dev->parent) { + dev_err(&sdata->dev->dev, + "Device %8phC is already associated\n", &ceaddr); + return -EPERM; + } + + if (coord->mode == IEEE802154_SHORT_ADDRESSING) + return -EINVAL; + + parent = kzalloc(sizeof(*parent), GFP_KERNEL); + if (!parent) + return -ENOMEM; + + parent->pan_id = coord->pan_id; + parent->mode = coord->mode; + parent->extended_addr = coord->extended_addr; + parent->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST); + + /* Set the PAN ID hardware address filter beforehand to avoid dropping + * the association response with a destination PAN ID field set to the + * "new" PAN ID. + */ + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_pan_id(local, coord->pan_id); + if (ret < 0) + goto free_parent; + } + + ret = mac802154_perform_association(sdata, parent, &short_addr); + if (ret) + goto reset_panid; + + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_short_addr(local, short_addr); + if (ret < 0) + goto reset_panid; + } + + wpan_dev->pan_id = coord->pan_id; + wpan_dev->short_addr = short_addr; + wpan_dev->parent = parent; + + return 0; + +reset_panid: + if (local->hw.flags & IEEE802154_HW_AFILT) + drv_set_pan_id(local, cpu_to_le16(IEEE802154_PAN_ID_BROADCAST)); + +free_parent: + kfree(parent); + return ret; +} + +static int mac802154_disassociate_from_parent(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev) +{ + struct ieee802154_local *local = wpan_phy_priv(wpan_phy); + struct ieee802154_pan_device *child, *tmp; + struct ieee802154_sub_if_data *sdata; + unsigned int max_assoc; + u64 eaddr; + int ret; + + sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev); + + /* Start by disassociating all the children and preventing new ones to + * attempt associations. + */ + max_assoc = cfg802154_set_max_associations(wpan_dev, 0); + list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) { + ret = mac802154_send_disassociation_notif(sdata, child, + IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE); + if (ret) { + eaddr = swab64((__force u64)child->extended_addr); + dev_err(&sdata->dev->dev, + "Disassociation with %8phC may have failed (%d)\n", + &eaddr, ret); + } + + list_del(&child->node); + } + + ret = mac802154_send_disassociation_notif(sdata, wpan_dev->parent, + IEEE802154_DEVICE_WISHES_TO_LEAVE); + if (ret) { + eaddr = swab64((__force u64)wpan_dev->parent->extended_addr); + dev_err(&sdata->dev->dev, + "Disassociation from %8phC may have failed (%d)\n", + &eaddr, ret); + } + + ret = 0; + + kfree(wpan_dev->parent); + wpan_dev->parent = NULL; + wpan_dev->pan_id = cpu_to_le16(IEEE802154_PAN_ID_BROADCAST); + wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST); + + if (local->hw.flags & IEEE802154_HW_AFILT) { + ret = drv_set_pan_id(local, wpan_dev->pan_id); + if (ret < 0) + goto reset_mac_assoc; + + ret = drv_set_short_addr(local, wpan_dev->short_addr); + if (ret < 0) + goto reset_mac_assoc; + } + +reset_mac_assoc: + cfg802154_set_max_associations(wpan_dev, max_assoc); + + return ret; +} + +static int mac802154_disassociate_child(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_pan_device *child) +{ + struct ieee802154_sub_if_data *sdata; + int ret; + + sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev); + + ret = mac802154_send_disassociation_notif(sdata, child, + IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE); + if (ret) + return ret; + + list_del(&child->node); + wpan_dev->nchildren--; + kfree(child); + + return 0; +} + +static int mac802154_disassociate(struct wpan_phy *wpan_phy, + struct wpan_dev *wpan_dev, + struct ieee802154_addr *target) +{ + u64 teaddr = swab64((__force u64)target->extended_addr); + struct ieee802154_pan_device *pan_device; + + ASSERT_RTNL(); + + if (cfg802154_device_is_parent(wpan_dev, target)) + return mac802154_disassociate_from_parent(wpan_phy, wpan_dev); + + pan_device = cfg802154_device_is_child(wpan_dev, target); + if (pan_device) + return mac802154_disassociate_child(wpan_phy, wpan_dev, + pan_device); + + dev_err(&wpan_dev->netdev->dev, + "Device %8phC is not associated with us\n", &teaddr); + + return -EINVAL; +} + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static void ieee802154_get_llsec_table(struct wpan_phy *wpan_phy, @@ -526,6 +699,8 @@ const struct cfg802154_ops mac802154_config_ops = { .abort_scan = mac802154_abort_scan, .send_beacons = mac802154_send_beacons, .stop_beacons = mac802154_stop_beacons, + .associate = mac802154_associate, + .disassociate = mac802154_disassociate, #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL .get_llsec_table = ieee802154_get_llsec_table, .lock_llsec_table = ieee802154_lock_llsec_table, diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index c347ec9ff8c9..08dd521a51a5 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -24,6 +24,7 @@ enum ieee802154_ongoing { IEEE802154_IS_SCANNING = BIT(0), IEEE802154_IS_BEACONING = BIT(1), + IEEE802154_IS_ASSOCIATING = BIT(2), }; /* mac802154 device private data */ @@ -74,6 +75,13 @@ struct ieee802154_local { struct list_head rx_mac_cmd_list; struct work_struct rx_mac_cmd_work; + /* Association */ + struct ieee802154_pan_device *assoc_dev; + struct completion assoc_done; + __le16 assoc_addr; + u8 assoc_status; + struct work_struct assoc_work; + bool started; bool suspended; unsigned long ongoing; @@ -296,6 +304,25 @@ static inline bool mac802154_is_beaconing(struct ieee802154_local *local) void mac802154_rx_mac_cmd_worker(struct work_struct *work); +int mac802154_perform_association(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *coord, + __le16 *short_addr); +int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); + +static inline bool mac802154_is_associating(struct ieee802154_local *local) +{ + return test_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing); +} + +int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *target, + u8 reason); +int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); +int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb); + /* interface handling */ int ieee802154_iface_init(void); void ieee802154_iface_exit(void); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 357ece67432b..9ab7396668d2 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -103,6 +103,8 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker); INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker); + init_completion(&local->assoc_done); + /* init supported flags with 802.15.4 default ranges */ phy->supported.max_minbe = 8; phy->supported.min_maxbe = 3; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index e2434b4fe514..e40a988d6c80 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -93,6 +93,31 @@ void mac802154_rx_mac_cmd_worker(struct work_struct *work) queue_delayed_work(local->mac_wq, &local->beacon_work, 0); break; + + case IEEE802154_CMD_ASSOCIATION_RESP: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC RESP\n"); + if (!mac802154_is_associating(local)) + break; + + mac802154_process_association_resp(mac_pkt->sdata, mac_pkt->skb); + break; + + case IEEE802154_CMD_ASSOCIATION_REQ: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC REQ\n"); + if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD) + break; + + mac802154_process_association_req(mac_pkt->sdata, mac_pkt->skb); + break; + + case IEEE802154_CMD_DISASSOCIATION_NOTIFY: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing DISASSOC NOTIF\n"); + if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD) + break; + + mac802154_process_disassociation_notif(mac_pkt->sdata, mac_pkt->skb); + break; + default: break; } @@ -131,12 +156,15 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: - if (hdr->source.mode != IEEE802154_ADDR_NONE) - /* FIXME: check if we are PAN coordinator */ - skb->pkt_type = PACKET_OTHERHOST; - else + if (hdr->source.mode == IEEE802154_ADDR_NONE) /* ACK comes with both addresses empty */ skb->pkt_type = PACKET_HOST; + else if (!wpan_dev->parent) + /* No dest means PAN coordinator is the recipient */ + skb->pkt_type = PACKET_HOST; + else + /* We are not the PAN coordinator, just relaying */ + skb->pkt_type = PACKET_OTHERHOST; break; case IEEE802154_ADDR_LONG: if (mac_cb(skb)->dest.pan_id != span && diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c index d9658f2c4ae6..1c0eeaa76560 100644 --- a/net/mac802154/scan.c +++ b/net/mac802154/scan.c @@ -466,6 +466,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, struct cfg802154_beacon_request *request) { struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; ASSERT_RTNL(); @@ -495,8 +496,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, local->beacon.mac_pl.superframe_order = request->interval; local->beacon.mac_pl.final_cap_slot = 0xf; local->beacon.mac_pl.battery_life_ext = 0; - /* TODO: Fill this field with the coordinator situation in the network */ - local->beacon.mac_pl.pan_coordinator = 1; + local->beacon.mac_pl.pan_coordinator = !wpan_dev->parent; local->beacon.mac_pl.assoc_permit = 1; if (request->interval == IEEE802154_ACTIVE_SCAN_DURATION) @@ -510,3 +510,406 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, return 0; } + +int mac802154_perform_association(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *coord, + __le16 *short_addr) +{ + u64 ceaddr = swab64((__force u64)coord->extended_addr); + struct ieee802154_association_req_frame frame = {}; + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct sk_buff *skb; + int ret; + + frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + frame.mhr.fc.security_enabled = 0; + frame.mhr.fc.frame_pending = 0; + frame.mhr.fc.ack_request = 1; /* We always expect an ack here */ + frame.mhr.fc.intra_pan = 0; + frame.mhr.fc.dest_addr_mode = (coord->mode == IEEE802154_ADDR_LONG) ? + IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING; + frame.mhr.fc.version = IEEE802154_2003_STD; + frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.source.mode = IEEE802154_ADDR_LONG; + frame.mhr.source.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); + frame.mhr.source.extended_addr = wpan_dev->extended_addr; + frame.mhr.dest.mode = coord->mode; + frame.mhr.dest.pan_id = coord->pan_id; + if (coord->mode == IEEE802154_ADDR_LONG) + frame.mhr.dest.extended_addr = coord->extended_addr; + else + frame.mhr.dest.short_addr = coord->short_addr; + frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF; + frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_REQ; + frame.assoc_req_pl.device_type = 1; + frame.assoc_req_pl.power_source = 1; + frame.assoc_req_pl.rx_on_when_idle = 1; + frame.assoc_req_pl.alloc_addr = 1; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.assoc_req_pl), + GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &frame, &frame.assoc_req_pl, + sizeof(frame.assoc_req_pl)); + if (ret) { + kfree_skb(skb); + return ret; + } + + local->assoc_dev = coord; + reinit_completion(&local->assoc_done); + set_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing); + + ret = ieee802154_mlme_tx_one_locked(local, sdata, skb); + if (ret) { + if (ret > 0) + ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO; + dev_warn(&sdata->dev->dev, + "No ASSOC REQ ACK received from %8phC\n", &ceaddr); + goto clear_assoc; + } + + ret = wait_for_completion_killable_timeout(&local->assoc_done, 10 * HZ); + if (ret <= 0) { + dev_warn(&sdata->dev->dev, + "No ASSOC RESP received from %8phC\n", &ceaddr); + ret = -ETIMEDOUT; + goto clear_assoc; + } + + if (local->assoc_status != IEEE802154_ASSOCIATION_SUCCESSFUL) { + if (local->assoc_status == IEEE802154_PAN_AT_CAPACITY) + ret = -ERANGE; + else + ret = -EPERM; + + dev_warn(&sdata->dev->dev, + "Negative ASSOC RESP received from %8phC: %s\n", &ceaddr, + local->assoc_status == IEEE802154_PAN_AT_CAPACITY ? + "PAN at capacity" : "access denied"); + } + + ret = 0; + *short_addr = local->assoc_addr; + +clear_assoc: + clear_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing); + local->assoc_dev = NULL; + + return ret; +} + +int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee802154_addr *src = &mac_cb(skb)->source; + struct ieee802154_addr *dest = &mac_cb(skb)->dest; + u64 deaddr = swab64((__force u64)dest->extended_addr); + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_assoc_resp_pl resp_pl = {}; + + if (skb->len != sizeof(resp_pl)) + return -EINVAL; + + if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING || + dest->mode != IEEE802154_EXTENDED_ADDRESSING)) + return -EINVAL; + + if (unlikely(dest->extended_addr != wpan_dev->extended_addr || + src->extended_addr != local->assoc_dev->extended_addr)) + return -ENODEV; + + memcpy(&resp_pl, skb->data, sizeof(resp_pl)); + local->assoc_addr = resp_pl.short_addr; + local->assoc_status = resp_pl.status; + + dev_dbg(&skb->dev->dev, + "ASSOC RESP 0x%x received from %8phC, getting short address %04x\n", + local->assoc_status, &deaddr, local->assoc_addr); + + complete(&local->assoc_done); + + return 0; +} + +int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *target, + u8 reason) +{ + struct ieee802154_disassociation_notif_frame frame = {}; + u64 teaddr = swab64((__force u64)target->extended_addr); + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct sk_buff *skb; + int ret; + + frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + frame.mhr.fc.security_enabled = 0; + frame.mhr.fc.frame_pending = 0; + frame.mhr.fc.ack_request = 1; + frame.mhr.fc.intra_pan = 1; + frame.mhr.fc.dest_addr_mode = (target->mode == IEEE802154_ADDR_LONG) ? + IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING; + frame.mhr.fc.version = IEEE802154_2003_STD; + frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.source.mode = IEEE802154_ADDR_LONG; + frame.mhr.source.pan_id = wpan_dev->pan_id; + frame.mhr.source.extended_addr = wpan_dev->extended_addr; + frame.mhr.dest.mode = target->mode; + frame.mhr.dest.pan_id = wpan_dev->pan_id; + if (target->mode == IEEE802154_ADDR_LONG) + frame.mhr.dest.extended_addr = target->extended_addr; + else + frame.mhr.dest.short_addr = target->short_addr; + frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF; + frame.mac_pl.cmd_id = IEEE802154_CMD_DISASSOCIATION_NOTIFY; + frame.disassoc_pl = reason; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.disassoc_pl), + GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &frame, &frame.disassoc_pl, + sizeof(frame.disassoc_pl)); + if (ret) { + kfree_skb(skb); + return ret; + } + + ret = ieee802154_mlme_tx_one_locked(local, sdata, skb); + if (ret) { + dev_warn(&sdata->dev->dev, + "No DISASSOC ACK received from %8phC\n", &teaddr); + if (ret > 0) + ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO; + return ret; + } + + dev_dbg(&sdata->dev->dev, "DISASSOC ACK received from %8phC\n", &teaddr); + return 0; +} + +static int +mac802154_send_association_resp_locked(struct ieee802154_sub_if_data *sdata, + struct ieee802154_pan_device *target, + struct ieee802154_assoc_resp_pl *assoc_resp_pl) +{ + u64 teaddr = swab64((__force u64)target->extended_addr); + struct ieee802154_association_resp_frame frame = {}; + struct ieee802154_local *local = sdata->local; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct sk_buff *skb; + int ret; + + frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + frame.mhr.fc.security_enabled = 0; + frame.mhr.fc.frame_pending = 0; + frame.mhr.fc.ack_request = 1; /* We always expect an ack here */ + frame.mhr.fc.intra_pan = 1; + frame.mhr.fc.dest_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.fc.version = IEEE802154_2003_STD; + frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING; + frame.mhr.source.mode = IEEE802154_ADDR_LONG; + frame.mhr.source.extended_addr = wpan_dev->extended_addr; + frame.mhr.dest.mode = IEEE802154_ADDR_LONG; + frame.mhr.dest.pan_id = wpan_dev->pan_id; + frame.mhr.dest.extended_addr = target->extended_addr; + frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF; + frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_RESP; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(*assoc_resp_pl), + GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &frame, assoc_resp_pl, + sizeof(*assoc_resp_pl)); + if (ret) { + kfree_skb(skb); + return ret; + } + + ret = ieee802154_mlme_tx_locked(local, sdata, skb); + if (ret) { + dev_warn(&sdata->dev->dev, + "No ASSOC RESP ACK received from %8phC\n", &teaddr); + if (ret > 0) + ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO; + return ret; + } + + return 0; +} + +int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_addr *src = &mac_cb(skb)->source; + struct ieee802154_addr *dest = &mac_cb(skb)->dest; + struct ieee802154_assoc_resp_pl assoc_resp_pl = {}; + struct ieee802154_assoc_req_pl assoc_req_pl; + struct ieee802154_pan_device *child, *exchild; + struct ieee802154_addr tmp = {}; + u64 ceaddr; + int ret; + + if (skb->len != sizeof(assoc_req_pl)) + return -EINVAL; + + if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING)) + return -EINVAL; + + if (unlikely(dest->pan_id != wpan_dev->pan_id)) + return -ENODEV; + + if (dest->mode == IEEE802154_EXTENDED_ADDRESSING && + unlikely(dest->extended_addr != wpan_dev->extended_addr)) + return -ENODEV; + else if (dest->mode == IEEE802154_SHORT_ADDRESSING && + unlikely(dest->short_addr != wpan_dev->short_addr)) + return -ENODEV; + + if (wpan_dev->parent) { + dev_dbg(&sdata->dev->dev, + "Ignoring ASSOC REQ, not the PAN coordinator\n"); + return -ENODEV; + } + + mutex_lock(&wpan_dev->association_lock); + + memcpy(&assoc_req_pl, skb->data, sizeof(assoc_req_pl)); + if (assoc_req_pl.assoc_type) { + dev_err(&skb->dev->dev, "Fast associations not supported yet\n"); + ret = -EOPNOTSUPP; + goto unlock; + } + + child = kzalloc(sizeof(*child), GFP_KERNEL); + if (!child) { + ret = -ENOMEM; + goto unlock; + } + + child->extended_addr = src->extended_addr; + child->mode = IEEE802154_EXTENDED_ADDRESSING; + ceaddr = swab64((__force u64)child->extended_addr); + + if (wpan_dev->nchildren >= wpan_dev->max_associations) { + if (!wpan_dev->max_associations) + assoc_resp_pl.status = IEEE802154_PAN_ACCESS_DENIED; + else + assoc_resp_pl.status = IEEE802154_PAN_AT_CAPACITY; + assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST); + dev_dbg(&sdata->dev->dev, + "Refusing ASSOC REQ from child %8phC, %s\n", &ceaddr, + assoc_resp_pl.status == IEEE802154_PAN_ACCESS_DENIED ? + "access denied" : "too many children"); + } else { + assoc_resp_pl.status = IEEE802154_ASSOCIATION_SUCCESSFUL; + if (assoc_req_pl.alloc_addr) { + assoc_resp_pl.short_addr = cfg802154_get_free_short_addr(wpan_dev); + child->mode = IEEE802154_SHORT_ADDRESSING; + } else { + assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); + } + child->short_addr = assoc_resp_pl.short_addr; + dev_dbg(&sdata->dev->dev, + "Accepting ASSOC REQ from child %8phC, providing short address 0x%04x\n", + &ceaddr, le16_to_cpu(child->short_addr)); + } + + ret = mac802154_send_association_resp_locked(sdata, child, &assoc_resp_pl); + if (ret || assoc_resp_pl.status != IEEE802154_ASSOCIATION_SUCCESSFUL) { + kfree(child); + goto unlock; + } + + dev_dbg(&sdata->dev->dev, + "Successful association with new child %8phC\n", &ceaddr); + + /* Ensure this child is not already associated (might happen due to + * retransmissions), in this case drop the ex structure. + */ + tmp.mode = child->mode; + tmp.extended_addr = child->extended_addr; + exchild = cfg802154_device_is_child(wpan_dev, &tmp); + if (exchild) { + dev_dbg(&sdata->dev->dev, + "Child %8phC was already known\n", &ceaddr); + list_del(&exchild->node); + } + + list_add(&child->node, &wpan_dev->children); + wpan_dev->nchildren++; + +unlock: + mutex_unlock(&wpan_dev->association_lock); + return ret; +} + +int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee802154_addr *src = &mac_cb(skb)->source; + struct ieee802154_addr *dest = &mac_cb(skb)->dest; + struct wpan_dev *wpan_dev = &sdata->wpan_dev; + struct ieee802154_pan_device *child; + struct ieee802154_addr target; + bool parent; + u64 teaddr; + + if (skb->len != sizeof(u8)) + return -EINVAL; + + if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING)) + return -EINVAL; + + if (dest->mode == IEEE802154_EXTENDED_ADDRESSING && + unlikely(dest->extended_addr != wpan_dev->extended_addr)) + return -ENODEV; + else if (dest->mode == IEEE802154_SHORT_ADDRESSING && + unlikely(dest->short_addr != wpan_dev->short_addr)) + return -ENODEV; + + if (dest->pan_id != wpan_dev->pan_id) + return -ENODEV; + + target.mode = IEEE802154_EXTENDED_ADDRESSING; + target.extended_addr = src->extended_addr; + teaddr = swab64((__force u64)target.extended_addr); + dev_dbg(&skb->dev->dev, "Processing DISASSOC NOTIF from %8phC\n", &teaddr); + + mutex_lock(&wpan_dev->association_lock); + parent = cfg802154_device_is_parent(wpan_dev, &target); + if (!parent) + child = cfg802154_device_is_child(wpan_dev, &target); + if (!parent && !child) { + mutex_unlock(&wpan_dev->association_lock); + return -EINVAL; + } + + if (parent) { + kfree(wpan_dev->parent); + wpan_dev->parent = NULL; + } else { + list_del(&child->node); + kfree(child); + wpan_dev->nchildren--; + } + + mutex_unlock(&wpan_dev->association_lock); + + return 0; +} diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c index 017248dea038..220414e5c850 100644 --- a/net/mptcp/crypto_test.c +++ b/net/mptcp/crypto_test.c @@ -70,3 +70,4 @@ static struct kunit_suite mptcp_crypto_suite = { kunit_test_suite(mptcp_crypto_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Crypto"); diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index a0990c365a2e..c30405e76833 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), + SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index cae71d947252..dd7fd1f246b5 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -65,6 +65,7 @@ enum linux_mptcp_mib_field { * conflict with another subflow while updating msk rcv wnd */ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ + MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ __MPTCP_MIB_MAX }; @@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net, __SNMP_INC_STATS(net->mib.mptcp_statistics, field); } +static inline void MPTCP_DEC_STATS(struct net *net, + enum linux_mptcp_mib_field field) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_DEC_STATS(net->mib.mptcp_statistics, field); +} + bool mptcp_mib_alloc(struct net *net); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index bf4d96f6f99a..287a60381eae 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + /* We don't use mptcp_set_state() here because it needs to be called + * under the msk socket lock. For the moment, that will not bring + * anything more than only calling inet_sk_state_store(), because the + * old status is known (TCP_CLOSE). + */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); @@ -1100,7 +1105,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc static const struct genl_multicast_group mptcp_pm_mcgrps[] = { [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, - .flags = GENL_UNS_ADMIN_PERM, + .flags = GENL_MCAST_CAP_NET_ADMIN, }, }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bc81ea53a049..3ed4709a7509 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) return READ_ONCE(msk->wnd_end); } -static bool mptcp_is_tcpsk(struct sock *sk) +static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) { - struct socket *sock = sk->sk_socket; - - if (unlikely(sk->sk_prot == &tcp_prot)) { - /* we are being invoked after mptcp_accept() has - * accepted a non-mp-capable flow: sk is a tcp_sk, - * not an mptcp one. - * - * Hand the socket over to tcp so all further socket ops - * bypass mptcp. - */ - WRITE_ONCE(sock->ops, &inet_stream_ops); - return true; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { - WRITE_ONCE(sock->ops, &inet6_stream_ops); - return true; + if (sk->sk_prot == &tcpv6_prot) + return &inet6_stream_ops; #endif - } - - return false; + WARN_ON_ONCE(sk->sk_prot != &tcp_prot); + return &inet_stream_ops; } static int __mptcp_socket_create(struct mptcp_sock *msk) @@ -443,11 +429,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk) switch (sk->sk_state) { case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_FIN_WAIT2); + mptcp_set_state(sk, TCP_FIN_WAIT2); break; case TCP_CLOSING: case TCP_LAST_ACK: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; } @@ -608,13 +594,13 @@ static bool mptcp_check_data_fin(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); + mptcp_set_state(sk, TCP_CLOSE_WAIT); break; case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); + mptcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; default: /* Other states not expected */ @@ -789,7 +775,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) */ ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); + mptcp_set_state(sk, ssk_state); WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ @@ -2477,7 +2463,7 @@ out: inet_sk_state_load(msk->first) == TCP_CLOSE) { if (sk->sk_state != TCP_ESTABLISHED || msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_close_wake_up(sk); } else { mptcp_start_tout_timer(sk); @@ -2572,7 +2558,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) WRITE_ONCE(sk->sk_err, ECONNRESET); } - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); @@ -2707,7 +2693,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2885,6 +2871,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) release_sock(ssk); } +void mptcp_set_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_state; + + switch (state) { + case TCP_ESTABLISHED: + if (oldstate != TCP_ESTABLISHED) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + break; + + default: + if (oldstate == TCP_ESTABLISHED) + MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + } + + inet_sk_state_store(sk, state); +} + static const unsigned char new_state[16] = { /* current state: new state: action: */ [0 /* (Invalid) */] = TCP_CLOSE, @@ -2907,7 +2911,7 @@ static int mptcp_close_state(struct sock *sk) int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; - inet_sk_state_store(sk, ns); + mptcp_set_state(sk, ns); return next & TCP_ACTION_FIN; } @@ -3018,7 +3022,7 @@ bool __mptcp_close(struct sock *sk, long timeout) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); goto cleanup; } @@ -3061,7 +3065,7 @@ cleanup: * state, let's not keep resources busy for no reasons */ if (subflows_alive == 0) - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); @@ -3127,7 +3131,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) return -EBUSY; mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); @@ -3215,7 +3219,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ - inet_sk_state_store(nsk, TCP_ESTABLISHED); + mptcp_set_state(nsk, TCP_ESTABLISHED); /* The msk maintain a ref to each subflow in the connections list */ WRITE_ONCE(msk->first, ssk); @@ -3258,44 +3262,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); } -static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err, - bool kern) -{ - struct sock *newsk; - - pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); - newsk = inet_csk_accept(ssk, flags, err, kern); - if (!newsk) - return NULL; - - pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); - if (sk_is_mptcp(newsk)) { - struct mptcp_subflow_context *subflow; - struct sock *new_mptcp_sock; - - subflow = mptcp_subflow_ctx(newsk); - new_mptcp_sock = subflow->conn; - - /* is_mptcp should be false if subflow->conn is missing, see - * subflow_syn_recv_sock() - */ - if (WARN_ON_ONCE(!new_mptcp_sock)) { - tcp_sk(newsk)->is_mptcp = 0; - goto out; - } - - newsk = new_mptcp_sock; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - } else { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); - } - -out: - newsk->sk_kern_sock = kern; - return newsk; -} - void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) { struct mptcp_subflow_context *subflow, *tmp; @@ -3402,12 +3368,12 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before taking actions + /* be sure to sync the msk state before taking actions * depending on sk_state (MPTCP_ERROR_REPORT) * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) - __mptcp_set_connected(sk); + if (__test_and_clear_bit(MPTCP_SYNC_STATE, &msk->cb_flags) && msk->first) + __mptcp_sync_state(sk, msk->pending_state); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) @@ -3674,7 +3640,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(ssk)) return PTR_ERR(ssk); - inet_sk_state_store(sk, TCP_SYN_SENT); + mptcp_set_state(sk, TCP_SYN_SENT); subflow = mptcp_subflow_ctx(ssk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -3724,7 +3690,7 @@ out: if (unlikely(err)) { /* avoid leaving a dangling token in an unconnected socket */ mptcp_token_destroy(msk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); return err; } @@ -3739,7 +3705,6 @@ static struct proto mptcp_prot = { .connect = mptcp_connect, .disconnect = mptcp_disconnect, .close = mptcp_close, - .accept = mptcp_accept, .setsockopt = mptcp_setsockopt, .getsockopt = mptcp_getsockopt, .shutdown = mptcp_shutdown, @@ -3814,13 +3779,13 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } - inet_sk_state_store(sk, TCP_LISTEN); + mptcp_set_state(sk, TCP_LISTEN); sock_set_flag(sk, SOCK_RCU_FREE); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); release_sock(ssk); - inet_sk_state_store(sk, inet_sk_state_load(ssk)); + mptcp_set_state(sk, inet_sk_state_load(ssk)); if (!err) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3849,18 +3814,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk) return -EINVAL; - newsk = mptcp_accept(ssk, flags, &err, kern); + pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); + newsk = inet_csk_accept(ssk, flags, &err, kern); if (!newsk) return err; - lock_sock(newsk); - - __inet_accept(sock, newsock, newsk); - if (!mptcp_is_tcpsk(newsock->sk)) { - struct mptcp_sock *msk = mptcp_sk(newsk); + pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); + if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; + struct sock *new_mptcp_sock; + + subflow = mptcp_subflow_ctx(newsk); + new_mptcp_sock = subflow->conn; + + /* is_mptcp should be false if subflow->conn is missing, see + * subflow_syn_recv_sock() + */ + if (WARN_ON_ONCE(!new_mptcp_sock)) { + tcp_sk(newsk)->is_mptcp = 0; + goto tcpfallback; + } + + newsk = new_mptcp_sock; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); + + newsk->sk_kern_sock = kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + msk = mptcp_sk(newsk); msk->in_accept_queue = 0; /* set ssk->sk_socket of accept()ed flows to mptcp socket. @@ -3880,8 +3863,23 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, __mptcp_close_ssk(newsk, msk->first, mptcp_subflow_ctx(msk->first), 0); if (unlikely(list_is_singular(&msk->conn_list))) - inet_sk_state_store(newsk, TCP_CLOSE); + mptcp_set_state(newsk, TCP_CLOSE); } + } else { + MPTCP_INC_STATS(sock_net(ssk), + MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); +tcpfallback: + newsk->sk_kern_sock = kern; + lock_sock(newsk); + __inet_accept(sock, newsock, newsk); + /* we are being invoked after accepting a non-mp-capable + * flow: sk is a tcp_sk, not an mptcp one. + * + * Hand the socket over to tcp so all further socket ops + * bypass mptcp. + */ + WRITE_ONCE(newsock->sk->sk_socket->ops, + mptcp_fallback_tcp_ops(newsock->sk)); } release_sock(newsk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 458a2d7bb0dd..3517f2d24a22 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -124,7 +124,7 @@ #define MPTCP_ERROR_REPORT 3 #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 -#define MPTCP_CONNECTED 6 +#define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { @@ -296,6 +296,9 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 pending_state; /* A subflow asked to set this sk_state, + * protected by the msk data lock + */ u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -638,6 +641,7 @@ bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); +void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); @@ -728,7 +732,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); -void __mptcp_set_connected(struct sock *sk); +void __mptcp_sync_state(struct sock *sk, int state); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline void mptcp_stop_tout_timer(struct sock *sk) @@ -1124,7 +1128,7 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return sk->sk_state == TCP_ESTABLISHED && + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index cabe856b2a45..c40f1428e602 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname) /* should work fine */ case IP_FREEBIND: case IP_TRANSPARENT: + case IP_BIND_ADDRESS_NO_PORT: + case IP_LOCAL_PORT_RANGE: /* the following are control cmsg related */ case IP_PKTINFO: @@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname) /* common stuff that need some love */ case IP_TOS: case IP_TTL: - case IP_BIND_ADDRESS_NO_PORT: case IP_MTU_DISCOVER: case IP_RECVERR: @@ -683,8 +684,8 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op return 0; } -static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, - sockptr_t optval, unsigned int optlen) +static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct sock *ssk; @@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); break; + case IP_BIND_ADDRESS_NO_PORT: + inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, + inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + break; + case IP_LOCAL_PORT_RANGE: + WRITE_ONCE(inet_sk(ssk)->local_port_range, + READ_ONCE(inet_sk(sk)->local_port_range)); + break; default: release_sock(sk); WARN_ON_ONCE(1); @@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_FREEBIND: case IP_TRANSPARENT: - return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); + case IP_BIND_ADDRESS_NO_PORT: + case IP_LOCAL_PORT_RANGE: + return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); case IP_TOS: return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); } @@ -1350,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); + case IP_BIND_ADDRESS_NO_PORT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + case IP_LOCAL_PORT_RANGE: + return mptcp_put_int_option(msk, optval, optlen, + READ_ONCE(inet_sk(sk)->local_port_range)); } return -EOPNOTSUPP; @@ -1450,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); + inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); + WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); } void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a4f3c27f0309..1ef28642afc4 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -419,22 +419,28 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } -void __mptcp_set_connected(struct sock *sk) +void __mptcp_sync_state(struct sock *sk, int state) { - __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, TCP_ESTABLISHED); + mptcp_set_state(sk, state); sk->sk_state_change(sk); } } -static void mptcp_set_connected(struct sock *sk) +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) { + struct mptcp_sock *msk = mptcp_sk(sk); + mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) - __mptcp_set_connected(sk); - else - __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags); + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } mptcp_data_unlock(sk); } @@ -496,7 +502,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -540,7 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(msk, sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } return; @@ -1740,7 +1746,7 @@ static void subflow_state_change(struct sock *sk) mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } /* as recvmsg() does not acquire the subflow socket for ssk selection diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index 0758865ab658..bfff53e668da 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -143,3 +143,4 @@ static struct kunit_suite mptcp_token_suite = { kunit_test_suite(mptcp_token_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Token"); diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 5257d5e7eb09..0e4beae421f8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, static const struct bpf_func_proto * bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } const struct bpf_verifier_ops netfilter_verifier_ops = { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fb0ae15e96df..0c22a02c2035 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -992,13 +992,13 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) if (err) goto err_filter; - if (!cda[CTA_FILTER]) - return filter; - err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); if (err < 0) goto err_filter; + if (!cda[CTA_FILTER]) + return filter; + err = ctnetlink_parse_filter(cda[CTA_FILTER], filter); if (err < 0) goto err_filter; @@ -1043,7 +1043,7 @@ err_filter: static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda) { - return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS]; + return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS] || cda[CTA_ZONE]; } static int ctnetlink_start(struct netlink_callback *cb) @@ -1148,6 +1148,10 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) if (filter->family && nf_ct_l3num(ct) != filter->family) goto ignore_entry; + if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID && + !nf_ct_zone_equal_any(ct, &filter->zone)) + goto ignore_entry; + if (filter->orig_flags) { tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL); if (!ctnetlink_filter_match_tuple(&filter->orig, tuple, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c5c17c6e80ed..5531b13d92b6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2261,7 +2261,16 @@ static int nft_chain_parse_hook(struct net *net, return -EOPNOTSUPP; } - type = basechain->type; + if (nla[NFTA_CHAIN_TYPE]) { + type = __nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], + family); + if (!type) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); + return -ENOENT; + } + } else { + type = basechain->type; + } } if (!try_module_get(type->owner)) { @@ -5817,10 +5826,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); nlmsg_end(skb, nlh); - if (dump_ctx->reset && args.iter.count > args.iter.skip) - audit_log_nft_set_reset(table, cb->seq, - args.iter.count - args.iter.skip); - rcu_read_unlock(); if (args.iter.err && args.iter.err != -EMSGSIZE) @@ -5836,6 +5841,26 @@ nla_put_failure: return -ENOSPC; } +static int nf_tables_dumpreset_set(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + struct nft_set_dump_ctx *dump_ctx = cb->data; + int ret, skip = cb->args[0]; + + mutex_lock(&nft_net->commit_mutex); + + ret = nf_tables_dump_set(skb, cb); + + if (cb->args[0] > skip) + audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq, + cb->args[0] - skip); + + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_set_start(struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; @@ -5910,7 +5935,7 @@ static int nft_setelem_parse_flags(const struct nft_set *set, return 0; } -static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, +static int nft_setelem_parse_key(struct nft_ctx *ctx, const struct nft_set *set, struct nft_data *key, struct nlattr *attr) { struct nft_data_desc desc = { @@ -5963,7 +5988,7 @@ static void *nft_setelem_catchall_get(const struct net *net, return priv; } -static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set, +static int nft_setelem_get(struct nft_ctx *ctx, const struct nft_set *set, struct nft_set_elem *elem, u32 flags) { void *priv; @@ -5982,7 +6007,7 @@ static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set, return 0; } -static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, +static int nft_get_set_elem(struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr, bool reset) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; @@ -6039,21 +6064,18 @@ err_fill_setelem: return err; } -/* called with rcu_read_lock held */ -static int nf_tables_getsetelem(struct sk_buff *skb, - const struct nfnl_info *info, - const struct nlattr * const nla[]) +static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx, + const struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[], + bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; - int rem, err = 0, nelems = 0; struct net *net = info->net; struct nft_table *table; struct nft_set *set; - struct nlattr *attr; - struct nft_ctx ctx; - bool reset = false; table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, genmask, 0); @@ -6068,10 +6090,22 @@ static int nf_tables_getsetelem(struct sk_buff *skb, return PTR_ERR(set); } - nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + nft_ctx_init(&dump_ctx->ctx, net, skb, + info->nlh, family, table, NULL, nla); + dump_ctx->set = set; + dump_ctx->reset = reset; + return 0; +} - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) - reset = true; +/* called with rcu_read_lock held */ +static int nf_tables_getsetelem(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct netlink_ext_ack *extack = info->extack; + struct nft_set_dump_ctx dump_ctx; + struct nlattr *attr; + int rem, err = 0; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -6080,12 +6114,55 @@ static int nf_tables_getsetelem(struct sk_buff *skb, .done = nf_tables_dump_set_done, .module = THIS_MODULE, }; - struct nft_set_dump_ctx dump_ctx = { - .set = set, - .ctx = ctx, - .reset = reset, + + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); + if (err) + return err; + + c.data = &dump_ctx; + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) + return -EINVAL; + + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false); + if (err) + return err; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false); + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); + break; + } + } + + return err; +} + +static int nf_tables_getsetelem_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + struct netlink_ext_ack *extack = info->extack; + struct nft_set_dump_ctx dump_ctx; + int rem, err = 0, nelems = 0; + struct nlattr *attr; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dump_set_start, + .dump = nf_tables_dumpreset_set, + .done = nf_tables_dump_set_done, + .module = THIS_MODULE, }; + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); + if (err) + return err; + c.data = &dump_ctx; return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } @@ -6093,18 +6170,31 @@ static int nf_tables_getsetelem(struct sk_buff *skb, if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + rcu_read_lock(); + + err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true); + if (err) + goto out_unlock; + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { - err = nft_get_set_elem(&ctx, set, attr, reset); + err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true); if (err < 0) { NL_SET_BAD_ATTR(extack, attr); break; } nelems++; } + audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems); - if (reset) - audit_log_nft_set_reset(table, nft_pernet(net)->base_seq, - nelems); +out_unlock: + rcu_read_unlock(); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); return err; } @@ -9078,7 +9168,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_set_elem_list_policy, }, [NFT_MSG_GETSETELEM_RESET] = { - .call = nf_tables_getsetelem, + .call = nf_tables_getsetelem_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_SET_ELEM_LIST_MAX, .policy = nft_set_elem_list_policy, @@ -10383,6 +10473,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } + nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7252fcdae349..efd523496be4 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1597,7 +1597,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) if (nft_set_elem_expired(&e->ext)) { priv->dirty = true; - gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) return; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 177126fb0484..4ed8ffd58ff3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1519,8 +1519,7 @@ out: int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, - int (*filter)(struct sock *dsk, - struct sk_buff *skb, void *data), + netlink_filter_fn filter, void *filter_data) { struct net *net = sock_net(ssk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 9c7ffd10df2a..8c7af02f8454 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -631,6 +631,138 @@ static int genl_validate_ops(const struct genl_family *family) return 0; } +static void *genl_sk_priv_alloc(struct genl_family *family) +{ + void *priv; + + priv = kzalloc(family->sock_priv_size, GFP_KERNEL); + if (!priv) + return ERR_PTR(-ENOMEM); + + if (family->sock_priv_init) + family->sock_priv_init(priv); + + return priv; +} + +static void genl_sk_priv_free(const struct genl_family *family, void *priv) +{ + if (family->sock_priv_destroy) + family->sock_priv_destroy(priv); + kfree(priv); +} + +static int genl_sk_privs_alloc(struct genl_family *family) +{ + if (!family->sock_priv_size) + return 0; + + family->sock_privs = kzalloc(sizeof(*family->sock_privs), GFP_KERNEL); + if (!family->sock_privs) + return -ENOMEM; + xa_init(family->sock_privs); + return 0; +} + +static void genl_sk_privs_free(const struct genl_family *family) +{ + unsigned long id; + void *priv; + + if (!family->sock_priv_size) + return; + + xa_for_each(family->sock_privs, id, priv) + genl_sk_priv_free(family, priv); + + xa_destroy(family->sock_privs); + kfree(family->sock_privs); +} + +static void genl_sk_priv_free_by_sock(struct genl_family *family, + struct sock *sk) +{ + void *priv; + + if (!family->sock_priv_size) + return; + priv = xa_erase(family->sock_privs, (unsigned long) sk); + if (!priv) + return; + genl_sk_priv_free(family, priv); +} + +static void genl_release(struct sock *sk, unsigned long *groups) +{ + struct genl_family *family; + unsigned int id; + + down_read(&cb_lock); + + idr_for_each_entry(&genl_fam_idr, family, id) + genl_sk_priv_free_by_sock(family, sk); + + up_read(&cb_lock); +} + +/** + * __genl_sk_priv_get - Get family private pointer for socket, if exists + * + * @family: family + * @sk: socket + * + * Lookup a private memory for a Generic netlink family and specified socket. + * + * Caller should make sure this is called in RCU read locked section. + * + * Return: valid pointer on success, otherwise negative error value + * encoded by ERR_PTR(), NULL in case priv does not exist. + */ +void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk) +{ + if (WARN_ON_ONCE(!family->sock_privs)) + return ERR_PTR(-EINVAL); + return xa_load(family->sock_privs, (unsigned long) sk); +} + +/** + * genl_sk_priv_get - Get family private pointer for socket + * + * @family: family + * @sk: socket + * + * Lookup a private memory for a Generic netlink family and specified socket. + * Allocate the private memory in case it was not already done. + * + * Return: valid pointer on success, otherwise negative error value + * encoded by ERR_PTR(). + */ +void *genl_sk_priv_get(struct genl_family *family, struct sock *sk) +{ + void *priv, *old_priv; + + priv = __genl_sk_priv_get(family, sk); + if (priv) + return priv; + + /* priv for the family does not exist so far, create it. */ + + priv = genl_sk_priv_alloc(family); + if (IS_ERR(priv)) + return ERR_CAST(priv); + + old_priv = xa_cmpxchg(family->sock_privs, (unsigned long) sk, NULL, + priv, GFP_KERNEL); + if (old_priv) { + genl_sk_priv_free(family, priv); + if (xa_is_err(old_priv)) + return ERR_PTR(xa_err(old_priv)); + /* Race happened, priv for the socket was already inserted. */ + return old_priv; + } + return priv; +} + /** * genl_register_family - register a generic netlink family * @family: generic netlink family @@ -659,6 +791,10 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } + err = genl_sk_privs_alloc(family); + if (err) + goto errout_locked; + /* * Sadly, a few cases need to be special-cased * due to them having previously abused the API @@ -679,7 +815,7 @@ int genl_register_family(struct genl_family *family) start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_sk_privs_free; } err = genl_validate_assign_mc_groups(family); @@ -698,6 +834,8 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_sk_privs_free: + genl_sk_privs_free(family); errout_locked: genl_unlock_all(); return err; @@ -728,6 +866,9 @@ int genl_unregister_family(const struct genl_family *family) up_write(&cb_lock); wait_event(genl_sk_destructing_waitq, atomic_read(&genl_sk_destructing_cnt) == 0); + + genl_sk_privs_free(family); + genl_unlock(); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -1688,10 +1829,10 @@ static int genl_bind(struct net *net, int group) continue; grp = &family->mcgrps[i]; - if ((grp->flags & GENL_UNS_ADMIN_PERM) && + if ((grp->flags & GENL_MCAST_CAP_NET_ADMIN) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) ret = -EPERM; - if (grp->cap_sys_admin && + if ((grp->flags & GENL_MCAST_CAP_SYS_ADMIN) && !ns_capable(net->user_ns, CAP_SYS_ADMIN)) ret = -EPERM; @@ -1708,6 +1849,7 @@ static int __net_init genl_pernet_init(struct net *net) .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, .bind = genl_bind, + .release = genl_release, }; /* we'll bump the group number right afterwards */ diff --git a/net/psample/psample.c b/net/psample/psample.c index c34e902855db..ddd211a151d0 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -32,7 +32,7 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, - .flags = GENL_UNS_ADMIN_PERM }, + .flags = GENL_MCAST_CAP_NET_ADMIN, }, }; static struct genl_family psample_nl_family __ro_after_init; diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 5a81505fba9a..4e32d659524e 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -126,6 +126,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } + ret = gpiod_direction_output(rfkill->reset_gpio, true); + if (ret) + return ret; + + ret = gpiod_direction_output(rfkill->shutdown_gpio, true); + if (ret) + return ret; + rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ecb91ad4ce63..ef81d019b20f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -182,21 +182,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) */ static void rose_kill_by_device(struct net_device *dev) { - struct sock *s; + struct sock *sk, *array[16]; + struct rose_sock *rose; + bool rescan; + int i, cnt; +start: + rescan = false; + cnt = 0; spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); + sk_for_each(sk, &rose_list) { + rose = rose_sk(sk); + if (rose->device == dev) { + if (cnt == ARRAY_SIZE(array)) { + rescan = true; + break; + } + sock_hold(sk); + array[cnt++] = sk; + } + } + spin_unlock_bh(&rose_list_lock); + for (i = 0; i < cnt; i++) { + sk = array[cnt]; + rose = rose_sk(sk); + lock_sock(sk); + spin_lock_bh(&rose_list_lock); if (rose->device == dev) { - rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); + rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } + spin_unlock_bh(&rose_list_lock); + release_sock(sk); + sock_put(sk); + cond_resched(); } - spin_unlock_bh(&rose_list_lock); + if (rescan) + goto start; } /* @@ -656,7 +682,10 @@ static int rose_release(struct socket *sock) break; } + spin_lock_bh(&rose_list_lock); netdev_put(rose->device, &rose->dev_tracker); + rose->device = NULL; + spin_unlock_bh(&rose_list_lock); sock->sk = NULL; release_sock(sk); sock_put(sk); diff --git a/net/sched/Makefile b/net/sched/Makefile index b5fd49641d91..82c3f78ca486 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o -obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3a7770eff52d..ef70d4771811 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1119,7 +1119,8 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); @@ -1312,7 +1313,7 @@ void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) tcf_act_for_each_action(i, a, actions) { struct tcf_idrinfo *idrinfo; - if (init_res[i] == 0) /* Bound */ + if (init_res[i] == ACT_P_BOUND) continue; idrinfo = a->idrinfo; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b0455fda7d0b..6cfee6658103 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -318,7 +318,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, } else if (ret > 0) { /* Don't override defaults. */ if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 0d7aee8933c5..f8762756657d 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -146,7 +146,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, } else if (ret > 0) { ci = to_connmark(*a); if (bind) { - err = 0; + err = ACT_P_BOUND; goto out_free; } if (!(flags & TCA_ACT_FLAGS_REPLACE)) { diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 8ed285023a40..7f8b1f2f2ed9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -77,8 +77,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else if (err > 0) { - if (bind)/* dont override defaults */ - return 0; + if (bind) /* dont override defaults */ + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f69c47945175..c3e004b5b820 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1349,7 +1349,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, res = ACT_P_CREATED; } else { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 4d15b6a6169c..e620f9a84afe 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) /* don't override defaults */ - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 904ab3d457ef..4af3b7ec249f 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -108,7 +108,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind)/* dont override defaults */ - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 393b78729216..c681cd011afd 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -356,7 +356,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, return err; if (err && bind) - return 0; + return ACT_P_BOUND; if (!err) { ret = tcf_idr_create_from_flags(tn, index, est, a, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index bc7611b0744c..0e867d13beb5 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -548,7 +548,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, exists = err; if (exists && bind) { kfree(p); - return 0; + return ACT_P_BOUND; } if (!exists) { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c deleted file mode 100644 index 598d6e299152..000000000000 --- a/net/sched/act_ipt.c +++ /dev/null @@ -1,464 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/act_ipt.c iptables target interface - * - *TODO: Add other tables. For now we only support the ipv4 table targets - * - * Copyright: Jamal Hadi Salim (2002-13) - */ - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <net/netlink.h> -#include <net/pkt_sched.h> -#include <linux/tc_act/tc_ipt.h> -#include <net/tc_act/tc_ipt.h> -#include <net/tc_wrapper.h> -#include <net/ip.h> - -#include <linux/netfilter_ipv4/ip_tables.h> - - -static struct tc_action_ops act_ipt_ops; -static struct tc_action_ops act_xt_ops; - -static int ipt_init_target(struct net *net, struct xt_entry_target *t, - char *table, unsigned int hook) -{ - struct xt_tgchk_param par; - struct xt_target *target; - struct ipt_entry e = {}; - int ret = 0; - - target = xt_request_find_target(AF_INET, t->u.user.name, - t->u.user.revision); - if (IS_ERR(target)) - return PTR_ERR(target); - - t->u.kernel.target = target; - memset(&par, 0, sizeof(par)); - par.net = net; - par.table = table; - par.entryinfo = &e; - par.target = target; - par.targinfo = t->data; - par.hook_mask = 1 << hook; - par.family = NFPROTO_IPV4; - - ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); - if (ret < 0) { - module_put(t->u.kernel.target->me); - return ret; - } - return 0; -} - -static void ipt_destroy_target(struct xt_entry_target *t, struct net *net) -{ - struct xt_tgdtor_param par = { - .target = t->u.kernel.target, - .targinfo = t->data, - .family = NFPROTO_IPV4, - .net = net, - }; - if (par.target->destroy != NULL) - par.target->destroy(&par); - module_put(par.target->me); -} - -static void tcf_ipt_release(struct tc_action *a) -{ - struct tcf_ipt *ipt = to_ipt(a); - - if (ipt->tcfi_t) { - ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net); - kfree(ipt->tcfi_t); - } - kfree(ipt->tcfi_tname); -} - -static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { - [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, - [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING, - NF_INET_NUMHOOKS), - [TCA_IPT_INDEX] = { .type = NLA_U32 }, - [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, -}; - -static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, - struct tcf_proto *tp, u32 flags) -{ - struct tc_action_net *tn = net_generic(net, id); - bool bind = flags & TCA_ACT_FLAGS_BIND; - struct nlattr *tb[TCA_IPT_MAX + 1]; - struct tcf_ipt *ipt; - struct xt_entry_target *td, *t; - char *tname; - bool exists = false; - int ret = 0, err; - u32 hook = 0; - u32 index = 0; - - if (nla == NULL) - return -EINVAL; - - err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy, - NULL); - if (err < 0) - return err; - - if (tb[TCA_IPT_INDEX] != NULL) - index = nla_get_u32(tb[TCA_IPT_INDEX]); - - err = tcf_idr_check_alloc(tn, &index, a, bind); - if (err < 0) - return err; - exists = err; - if (exists && bind) - return 0; - - if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { - if (exists) - tcf_idr_release(*a, bind); - else - tcf_idr_cleanup(tn, index); - return -EINVAL; - } - - td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) { - if (exists) - tcf_idr_release(*a, bind); - else - tcf_idr_cleanup(tn, index); - return -EINVAL; - } - - if (!exists) { - ret = tcf_idr_create(tn, index, est, a, ops, bind, - false, flags); - if (ret) { - tcf_idr_cleanup(tn, index); - return ret; - } - ret = ACT_P_CREATED; - } else { - if (bind)/* dont override defaults */ - return 0; - - if (!(flags & TCA_ACT_FLAGS_REPLACE)) { - tcf_idr_release(*a, bind); - return -EEXIST; - } - } - - err = -EINVAL; - hook = nla_get_u32(tb[TCA_IPT_HOOK]); - switch (hook) { - case NF_INET_PRE_ROUTING: - break; - case NF_INET_POST_ROUTING: - break; - default: - goto err1; - } - - if (tb[TCA_IPT_TABLE]) { - /* mangle only for now */ - if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle")) - goto err1; - } - - tname = kstrdup("mangle", GFP_KERNEL); - if (unlikely(!tname)) - goto err1; - - t = kmemdup(td, td->u.target_size, GFP_KERNEL); - if (unlikely(!t)) - goto err2; - - err = ipt_init_target(net, t, tname, hook); - if (err < 0) - goto err3; - - ipt = to_ipt(*a); - - spin_lock_bh(&ipt->tcf_lock); - if (ret != ACT_P_CREATED) { - ipt_destroy_target(ipt->tcfi_t, net); - kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); - } - ipt->tcfi_tname = tname; - ipt->tcfi_t = t; - ipt->tcfi_hook = hook; - spin_unlock_bh(&ipt->tcf_lock); - return ret; - -err3: - kfree(t); -err2: - kfree(tname); -err1: - tcf_idr_release(*a, bind); - return err; -} - -static int tcf_ipt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - struct tcf_proto *tp, - u32 flags, struct netlink_ext_ack *extack) -{ - return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est, - a, &act_ipt_ops, tp, flags); -} - -static int tcf_xt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, - struct tcf_proto *tp, - u32 flags, struct netlink_ext_ack *extack) -{ - return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est, - a, &act_xt_ops, tp, flags); -} - -static bool tcf_ipt_act_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - unsigned int nhoff, len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return false; - - nhoff = skb_network_offset(skb); - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return false; - - len = skb_ip_totlen(skb); - if (skb->len < nhoff + len || len < (iph->ihl * 4u)) - return false; - - return pskb_may_pull(skb, iph->ihl * 4u); -} - -TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) -{ - char saved_cb[sizeof_field(struct sk_buff, cb)]; - int ret = 0, result = 0; - struct tcf_ipt *ipt = to_ipt(a); - struct xt_action_param par; - struct nf_hook_state state = { - .net = dev_net(skb->dev), - .in = skb->dev, - .hook = ipt->tcfi_hook, - .pf = NFPROTO_IPV4, - }; - - if (skb_protocol(skb, false) != htons(ETH_P_IP)) - return TC_ACT_UNSPEC; - - if (skb_unclone(skb, GFP_ATOMIC)) - return TC_ACT_UNSPEC; - - if (!tcf_ipt_act_check(skb)) - return TC_ACT_UNSPEC; - - if (state.hook == NF_INET_POST_ROUTING) { - if (!skb_dst(skb)) - return TC_ACT_UNSPEC; - - state.out = skb->dev; - } - - memcpy(saved_cb, skb->cb, sizeof(saved_cb)); - - spin_lock(&ipt->tcf_lock); - - tcf_lastuse_update(&ipt->tcf_tm); - bstats_update(&ipt->tcf_bstats, skb); - - /* yes, we have to worry about both in and out dev - * worry later - danger - this API seems to have changed - * from earlier kernels - */ - par.state = &state; - par.target = ipt->tcfi_t->u.kernel.target; - par.targinfo = ipt->tcfi_t->data; - - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - - ret = par.target->target(skb, &par); - - switch (ret) { - case NF_ACCEPT: - result = TC_ACT_OK; - break; - case NF_DROP: - result = TC_ACT_SHOT; - ipt->tcf_qstats.drops++; - break; - case XT_CONTINUE: - result = TC_ACT_PIPE; - break; - default: - net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n", - ret); - result = TC_ACT_OK; - break; - } - spin_unlock(&ipt->tcf_lock); - - memcpy(skb->cb, saved_cb, sizeof(skb->cb)); - - return result; - -} - -static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, - int ref) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tcf_ipt *ipt = to_ipt(a); - struct xt_entry_target *t; - struct tcf_t tm; - struct tc_cnt c; - - /* for simple targets kernel size == user size - * user name = target name - * for foolproof you need to not assume this - */ - - spin_lock_bh(&ipt->tcf_lock); - t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); - if (unlikely(!t)) - goto nla_put_failure; - - c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind; - c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref; - strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); - - if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) || - nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) || - nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) || - nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) || - nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname)) - goto nla_put_failure; - - tcf_tm_dump(&tm, &ipt->tcf_tm); - if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD)) - goto nla_put_failure; - - spin_unlock_bh(&ipt->tcf_lock); - kfree(t); - return skb->len; - -nla_put_failure: - spin_unlock_bh(&ipt->tcf_lock); - nlmsg_trim(skb, b); - kfree(t); - return -1; -} - -static struct tc_action_ops act_ipt_ops = { - .kind = "ipt", - .id = TCA_ID_IPT, - .owner = THIS_MODULE, - .act = tcf_ipt_act, - .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_release, - .init = tcf_ipt_init, - .size = sizeof(struct tcf_ipt), -}; - -static __net_init int ipt_init_net(struct net *net) -{ - struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); - - return tc_action_net_init(net, tn, &act_ipt_ops); -} - -static void __net_exit ipt_exit_net(struct list_head *net_list) -{ - tc_action_net_exit(net_list, act_ipt_ops.net_id); -} - -static struct pernet_operations ipt_net_ops = { - .init = ipt_init_net, - .exit_batch = ipt_exit_net, - .id = &act_ipt_ops.net_id, - .size = sizeof(struct tc_action_net), -}; - -static struct tc_action_ops act_xt_ops = { - .kind = "xt", - .id = TCA_ID_XT, - .owner = THIS_MODULE, - .act = tcf_ipt_act, - .dump = tcf_ipt_dump, - .cleanup = tcf_ipt_release, - .init = tcf_xt_init, - .size = sizeof(struct tcf_ipt), -}; - -static __net_init int xt_init_net(struct net *net) -{ - struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); - - return tc_action_net_init(net, tn, &act_xt_ops); -} - -static void __net_exit xt_exit_net(struct list_head *net_list) -{ - tc_action_net_exit(net_list, act_xt_ops.net_id); -} - -static struct pernet_operations xt_net_ops = { - .init = xt_init_net, - .exit_batch = xt_exit_net, - .id = &act_xt_ops.net_id, - .size = sizeof(struct tc_action_net), -}; - -MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); -MODULE_DESCRIPTION("Iptables target actions"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("act_xt"); - -static int __init ipt_init_module(void) -{ - int ret1, ret2; - - ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops); - if (ret1 < 0) - pr_err("Failed to load xt action\n"); - - ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops); - if (ret2 < 0) - pr_err("Failed to load ipt action\n"); - - if (ret1 < 0 && ret2 < 0) { - return ret1; - } else - return 0; -} - -static void __exit ipt_cleanup_module(void) -{ - tcf_unregister_action(&act_ipt_ops, &ipt_net_ops); - tcf_unregister_action(&act_xt_ops, &xt_net_ops); -} - -module_init(ipt_init_module); -module_exit(ipt_cleanup_module); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a711c184c29..12386f590b0f 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -85,10 +85,21 @@ static void tcf_mirred_release(struct tc_action *a) static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, + [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1), }; static struct tc_action_ops act_mirred_ops; +static void tcf_mirred_replace_dev(struct tcf_mirred *m, + struct net_device *ndev) +{ + struct net_device *odev; + + odev = rcu_replace_pointer(m->tcfm_dev, ndev, + lockdep_is_held(&m->tcf_lock)); + netdev_put(odev, &m->tcfm_dev_tracker); +} + static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, @@ -124,7 +135,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; + + if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot specify Block ID and dev simultaneously"); + if (exists) + tcf_idr_release(*a, bind); + else + tcf_idr_cleanup(tn, index); + + return -EINVAL; + } switch (parm->eaction) { case TCA_EGRESS_MIRROR: @@ -142,9 +164,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } if (!exists) { - if (!parm->ifindex) { + if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) { tcf_idr_cleanup(tn, index); - NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist"); + NL_SET_ERR_MSG_MOD(extack, + "Must specify device or block"); return -EINVAL; } ret = tcf_idr_create_from_flags(tn, index, est, a, @@ -170,7 +193,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { - struct net_device *odev, *ndev; + struct net_device *ndev; ndev = dev_get_by_index(net, parm->ifindex); if (!ndev) { @@ -179,11 +202,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(ndev); - odev = rcu_replace_pointer(m->tcfm_dev, ndev, - lockdep_is_held(&m->tcf_lock)); - netdev_put(odev, &m->tcfm_dev_tracker); + tcf_mirred_replace_dev(m, ndev); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; + m->tcfm_blockid = 0; + } else if (tb[TCA_MIRRED_BLOCKID]) { + tcf_mirred_replace_dev(m, NULL); + m->tcfm_mac_header_xmit = false; + m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]); } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); m->tcfm_eaction = parm->eaction; @@ -225,48 +251,26 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) return err; } -TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) +static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, + struct net_device *dev, + const bool m_mac_header_xmit, int m_eaction, + int retval) { - struct tcf_mirred *m = to_mirred(a); - struct sk_buff *skb2 = skb; - bool m_mac_header_xmit; - struct net_device *dev; - unsigned int nest_level; - int retval, err = 0; - bool use_reinsert; + struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; - int m_eaction; + bool dont_clone; int mac_len; bool at_nh; + int err; - nest_level = __this_cpu_inc_return(mirred_nest_level); - if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { - net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", - netdev_name(skb->dev)); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_SHOT; - } - - tcf_lastuse_update(&m->tcf_tm); - tcf_action_update_bstats(&m->common, skb); - - m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); - retval = READ_ONCE(m->tcf_action); - dev = rcu_dereference_bh(m->tcfm_dev); - if (unlikely(!dev)) { - pr_notice_once("tc mirred: target device is gone\n"); - goto out; - } - + is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); + err = -ENODEV; goto out; } @@ -274,61 +278,188 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ - is_redirect = tcf_mirred_is_act_redirect(m_eaction); at_ingress = skb_at_tc_ingress(skb); - use_reinsert = at_ingress && is_redirect && - tcf_mirred_can_reinsert(retval); - if (!use_reinsert) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) + dont_clone = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!dont_clone) { + skb_to_send = skb_clone(skb, GFP_ATOMIC); + if (!skb_to_send) { + err = -ENOMEM; goto out; + } } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ - nf_reset_ct(skb2); + nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ - skb_dst_drop(skb2); + skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { - mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : + mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ - skb_pull_rcsum(skb2, mac_len); + skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ - skb_push_rcsum(skb2, mac_len); + skb_push_rcsum(skb_to_send, mac_len); } } - skb2->skb_iif = skb->dev->ifindex; - skb2->dev = dev; + skb_to_send->skb_iif = skb->dev->ifindex; + skb_to_send->dev = dev; - /* mirror is always swallowed */ if (is_redirect) { - skb_set_redirected(skb2, skb2->tc_at_ingress); - - /* let's the caller reinsert the packet, if possible */ - if (use_reinsert) { - err = tcf_mirred_forward(want_ingress, skb); - if (err) - tcf_action_inc_overlimit_qstats(&m->common); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_CONSUMED; - } + if (skb == skb_to_send) + retval = TC_ACT_CONSUMED; + + skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); + + err = tcf_mirred_forward(want_ingress, skb_to_send); + } else { + err = tcf_mirred_forward(want_ingress, skb_to_send); } - err = tcf_mirred_forward(want_ingress, skb2); if (err) { out: tcf_action_inc_overlimit_qstats(&m->common); - if (tcf_mirred_is_act_redirect(m_eaction)) + if (is_redirect) retval = TC_ACT_SHOT; } + + return retval; +} + +static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, + struct tcf_block *block, int m_eaction, + const u32 exception_ifindex, int retval) +{ + struct net_device *dev_prev = NULL; + struct net_device *dev = NULL; + unsigned long index; + int mirred_eaction; + + mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ? + TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR; + + xa_for_each(&block->ports, index, dev) { + if (index == exception_ifindex) + continue; + + if (!dev_prev) + goto assign_prev; + + tcf_mirred_to_dev(skb, m, dev_prev, + dev_is_mac_header_xmit(dev), + mirred_eaction, retval); +assign_prev: + dev_prev = dev; + } + + if (dev_prev) + return tcf_mirred_to_dev(skb, m, dev_prev, + dev_is_mac_header_xmit(dev_prev), + m_eaction, retval); + + return retval; +} + +static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, + struct tcf_block *block, int m_eaction, + const u32 exception_ifindex, int retval) +{ + struct net_device *dev = NULL; + unsigned long index; + + xa_for_each(&block->ports, index, dev) { + if (index == exception_ifindex) + continue; + + tcf_mirred_to_dev(skb, m, dev, + dev_is_mac_header_xmit(dev), + m_eaction, retval); + } + + return retval; +} + +static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, + const u32 blockid, struct tcf_result *res, + int retval) +{ + const u32 exception_ifindex = skb->dev->ifindex; + struct tcf_block *block; + bool is_redirect; + int m_eaction; + + m_eaction = READ_ONCE(m->tcfm_eaction); + is_redirect = tcf_mirred_is_act_redirect(m_eaction); + + /* we are already under rcu protection, so can call block lookup + * directly. + */ + block = tcf_block_lookup(dev_net(skb->dev), blockid); + if (!block || xa_empty(&block->ports)) { + tcf_action_inc_overlimit_qstats(&m->common); + return retval; + } + + if (is_redirect) + return tcf_blockcast_redir(skb, m, block, m_eaction, + exception_ifindex, retval); + + /* If it's not redirect, it is mirror */ + return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex, + retval); +} + +TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_mirred *m = to_mirred(a); + int retval = READ_ONCE(m->tcf_action); + unsigned int nest_level; + bool m_mac_header_xmit; + struct net_device *dev; + int m_eaction; + u32 blockid; + + nest_level = __this_cpu_inc_return(mirred_nest_level); + if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); + retval = TC_ACT_SHOT; + goto dec_nest_level; + } + + tcf_lastuse_update(&m->tcf_tm); + tcf_action_update_bstats(&m->common, skb); + + blockid = READ_ONCE(m->tcfm_blockid); + if (blockid) { + retval = tcf_blockcast(skb, m, blockid, res, retval); + goto dec_nest_level; + } + + dev = rcu_dereference_bh(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); + tcf_action_inc_overlimit_qstats(&m->common); + goto dec_nest_level; + } + + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); + + retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, + retval); + +dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; @@ -356,6 +487,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, }; struct net_device *dev; struct tcf_t t; + u32 blockid; spin_lock_bh(&m->tcf_lock); opt.action = m->tcf_action; @@ -367,6 +499,10 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + blockid = m->tcfm_blockid; + if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid)) + goto nla_put_failure; + tcf_tm_dump(&t, &m->tcf_tm); if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD)) goto nla_put_failure; @@ -397,6 +533,8 @@ static int mirred_device_event(struct notifier_block *unused, * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); + } else if (m->tcfm_blockid) { + m->tcfm_blockid = 0; } spin_unlock_bh(&m->tcf_lock); } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 1010dc632874..34b8edb6cc77 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -195,7 +195,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 4184af5abbf3..a180e724634e 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -69,7 +69,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 1ef8fcfa9997..2ef22969f274 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -202,7 +202,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else if (err > 0) { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { ret = -EEXIST; goto out_release; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f3121c5a85e9..e119b4a3db9f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -77,7 +77,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, NULL, a, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 4c670e7568dc..c5c61efe6db4 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -66,7 +66,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!exists) { ret = tcf_idr_create(tn, index, est, a, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4b84514534f3..0a3e92888295 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -118,7 +118,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (tb[TCA_DEF_DATA] == NULL) { if (exists) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ce7008cf291c..754f78b35bb8 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -209,7 +209,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!flags) { if (exists) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index dffa990a9629..bcb673ab0008 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -157,7 +157,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; if (!lflags) { if (exists) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 0c8aa7e686ea..300b08aa8283 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -401,7 +401,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; switch (parm->t_action) { case TCA_TUNNEL_KEY_ACT_RELEASE: diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 0251442f5f29..836183011a7c 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -151,7 +151,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return err; exists = err; if (exists && bind) - return 0; + return ACT_P_BOUND; switch (parm->v_action) { case TCA_VLAN_ACT_POP: diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc1c19a25882..adf5de1ff773 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); mutex_destroy(&block->proto_destroy_lock); + xa_destroy(&block->ports); kfree_rcu(block, rcu); } @@ -1002,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, refcount_set(&block->refcnt, 1); block->net = net; block->index = block_index; + xa_init(&block->ports); /* Don't store q pointer for blocks which are shared */ if (!tcf_block_shared(block)) @@ -1009,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return block; } -static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) +struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) { struct tcf_net *tn = net_generic(net, tcf_net_id); return idr_find(&tn->idr, block_index); } +EXPORT_SYMBOL(tcf_block_lookup); static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) { @@ -1657,7 +1660,6 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { - u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1682,13 +1684,15 @@ reclassify: */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || !tp->ops->get_exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } exts = tp->ops->get_exts(tp, n->handle); if (unlikely(!exts || n->exts != exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1712,18 +1716,13 @@ reclassify: goto reset; } #endif - if (err >= 0) { - /* Policy drop or drop reason is over-written by - * classifiers with a bogus value(0) */ - if (err == TC_ACT_SHOT && - res->drop_reason == SKB_NOT_DROPPED_YET) - tcf_set_drop_reason(res, orig_reason); + if (err >= 0) return err; - } } if (unlikely(n)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1735,7 +1734,8 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_RECLASSIFY_LOOP); return TC_ACT_SHOT; } @@ -1773,7 +1773,8 @@ int tcf_classify(struct sk_buff *skb, n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); if (!n) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1784,7 +1785,9 @@ int tcf_classify(struct sk_buff *skb, fchain = tcf_chain_lookup_rcu(block, chain); if (!fchain) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); + return TC_ACT_SHOT; } @@ -1806,10 +1809,9 @@ int tcf_classify(struct sk_buff *skb, ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } - ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; @@ -2733,6 +2735,7 @@ errout: } static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = { + [TCA_CHAIN] = { .type = NLA_U32 }, [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE), }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e9eaf637220e..2a2a48838eb9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1003,6 +1003,32 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) return false; } +static int qdisc_get_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, struct Qdisc *q, + struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (!tc_qdisc_dump_ignore(q, false)) { + if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0, + RTM_NEWQDISC, extack) < 0) + goto err_out; + } + + if (skb->len) + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + +err_out: + kfree_skb(skb); + return -EINVAL; +} + static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new, @@ -1011,6 +1037,9 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1180,6 +1209,43 @@ skip: return 0; } +static int qdisc_block_add_dev(struct Qdisc *sch, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + const struct Qdisc_class_ops *cl_ops = sch->ops->cl_ops; + struct tcf_block *block; + int err; + + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL); + if (block) { + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); + if (err) { + NL_SET_ERR_MSG(extack, + "ingress block dev insert failed"); + return err; + } + } + + block = cl_ops->tcf_block(sch, TC_H_MIN_EGRESS, NULL); + if (block) { + err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL); + if (err) { + NL_SET_ERR_MSG(extack, + "Egress block dev insert failed"); + goto err_out; + } + } + + return 0; + +err_out: + block = cl_ops->tcf_block(sch, TC_H_MIN_INGRESS, NULL); + if (block) + xa_erase(&block->ports, dev->ifindex); + + return err; +} + static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, struct netlink_ext_ack *extack) { @@ -1350,6 +1416,10 @@ static struct Qdisc *qdisc_create(struct net_device *dev, qdisc_hash_add(sch, false); trace_qdisc_create(ops, dev, parent); + err = qdisc_block_add_dev(sch, dev, extack); + if (err) + goto err_out4; + return sch; err_out4: @@ -1542,7 +1612,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, if (err != 0) return err; } else { - qdisc_notify(net, skb, n, clid, NULL, q, NULL); + qdisc_get_notify(net, skb, n, clid, q, NULL); } return 0; } @@ -1936,6 +2006,9 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -1949,6 +2022,27 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, n->nlmsg_flags & NLM_F_ECHO); } +static int tclass_get_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS, + extack) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, @@ -1962,14 +2056,18 @@ static int tclass_del_notify(struct net *net, if (!cops->delete) return -EOPNOTSUPP; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; + if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, - RTM_DELTCLASS, extack) < 0) { - kfree_skb(skb); - return -EINVAL; + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, + RTM_DELTCLASS, extack) < 0) { + kfree_skb(skb); + return -EINVAL; + } + } else { + skb = NULL; } err = cops->delete(q, cl, extack); @@ -1978,8 +2076,8 @@ static int tclass_del_notify(struct net *net, return err; } - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); return err; } @@ -2174,7 +2272,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: - err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack); + err = tclass_get_notify(net, skb, n, q, cl, extack); goto out; default: err = -EINVAL; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8dd0e5925342..e33568df97a5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1051,6 +1051,9 @@ static void qdisc_free_cb(struct rcu_head *head) static void __qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; + struct net_device *dev = qdisc_dev(qdisc); + const struct Qdisc_class_ops *cops; + struct tcf_block *block; #ifdef CONFIG_NET_SCHED qdisc_hash_del(qdisc); @@ -1061,11 +1064,24 @@ static void __qdisc_destroy(struct Qdisc *qdisc) qdisc_reset(qdisc); + cops = ops->cl_ops; + if (ops->ingress_block_get) { + block = cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); + if (block) + xa_erase(&block->ports, dev->ifindex); + } + + if (ops->egress_block_get) { + block = cops->tcf_block(qdisc, TC_H_MIN_EGRESS, NULL); + if (block) + xa_erase(&block->ports, dev->ifindex); + } + if (ops->destroy) ops->destroy(qdisc); module_put(ops->owner); - netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker); + netdev_put(dev, &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fb02bbb4b34..6b9fcdb0952a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2102,6 +2102,10 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); + if (sk_can_busy_loop(sk) && + skb_queue_empty_lockless(&sk->sk_receive_queue)) + sk_busy_loop(sk, flags & MSG_DONTWAIT); + lock_sock(sk); if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) && @@ -9046,12 +9050,6 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err) if (sk->sk_shutdown & RCV_SHUTDOWN) break; - if (sk_can_busy_loop(sk)) { - sk_busy_loop(sk, flags & MSG_DONTWAIT); - - if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) - continue; - } /* User doesn't want to wait. */ error = -EAGAIN; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7fc2f3c6d248..a2cb30af46cb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -677,8 +677,6 @@ static bool smc_isascii(char *hostname) static void smc_conn_save_peer_info_fce(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)clc; struct smc_clc_first_contact_ext *fce; int clc_v2_len; @@ -687,17 +685,15 @@ static void smc_conn_save_peer_info_fce(struct smc_sock *smc, return; if (smc->conn.lgr->is_smcd) { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid, + memcpy(smc->conn.lgr->negotiated_eid, clc->d1.eid, SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - d1); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, d1); } else { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid, + memcpy(smc->conn.lgr->negotiated_eid, clc->r1.eid, SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - r1); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, r1); } - fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len); + fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len); smc->conn.lgr->peer_os = fce->os_type; smc->conn.lgr->peer_smc_release = fce->release; if (smc_isascii(fce->hostname)) @@ -1048,7 +1044,8 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, { int rc = SMC_CLC_DECL_NOSMCDDEV; struct smcd_dev *smcd; - int i = 1; + int i = 1, entry = 1; + bool is_virtual; u16 chid; if (smcd_indicated(ini->smc_type_v1)) @@ -1060,14 +1057,23 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, chid = smc_ism_get_chid(smcd); if (!smc_find_ism_v2_is_unique_chid(chid, ini, i)) continue; + is_virtual = __smc_ism_is_virtual(chid); if (!smc_pnet_is_pnetid_set(smcd->pnetid) || smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { + if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES) + /* It's the last GID-CHID entry left in CLC + * Proposal SMC-Dv2 extension, but a virtual + * ISM device will take two entries. So give + * up it and try the next potential ISM device. + */ + continue; ini->ism_dev[i] = smcd; ini->ism_chid[i] = chid; ini->is_smcd = true; rc = 0; i++; - if (i > SMC_MAX_ISM_DEVS) + entry = is_virtual ? entry + 2 : entry + 1; + if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES) break; } } @@ -1149,13 +1155,13 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, } #define SMC_CLC_MAX_ACCEPT_LEN \ - (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ + (sizeof(struct smc_clc_msg_accept_confirm) + \ sizeof(struct smc_clc_first_contact_ext_v2x) + \ sizeof(struct smc_clc_msg_trail)) /* CLC handshake during connect */ static int smc_connect_clc(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm_v2 *aclc2, + struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { int rc = 0; @@ -1165,7 +1171,7 @@ static int smc_connect_clc(struct smc_sock *smc, if (rc) return rc; /* receive SMC Accept CLC message */ - return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN, + return smc_clc_wait_msg(smc, aclc, SMC_CLC_MAX_ACCEPT_LEN, SMC_CLC_ACCEPT, CLC_WAIT_TIME); } @@ -1201,10 +1207,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(clc_v2, false); + smc_get_clc_first_contact_ext(aclc, false); struct net *net = sock_net(&smc->sk); int rc; @@ -1327,10 +1331,7 @@ static int smc_connect_rdma(struct smc_sock *smc, } if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->r1.eid; + eid = aclc->r1.eid; if (ini->first_contact_local) smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist, link->smcibdev, link->gid); @@ -1371,7 +1372,7 @@ connect_abort: * Determine from the CHID of the received CLC ACCEPT the ISM device chosen. */ static int -smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, +smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { int i; @@ -1398,12 +1399,9 @@ static int smc_connect_ism(struct smc_sock *smc, ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; if (aclc->hdr.version == SMC_V2) { - struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - if (ini->first_contact_peer) { struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(aclc_v2, true); + smc_get_clc_first_contact_ext(aclc, true); ini->release_nr = fce->release; rc = smc_clc_clnt_v2x_features_validate(fce, ini); @@ -1411,11 +1409,16 @@ static int smc_connect_ism(struct smc_sock *smc, return rc; } - rc = smc_v2_determine_accepted_chid(aclc_v2, ini); + rc = smc_v2_determine_accepted_chid(aclc, ini); if (rc) return rc; + + if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected])) + ini->ism_peer_gid[ini->ism_selected].gid_ext = + ntohll(aclc->d1.gid_ext); + /* for non-virtual ISM devices, peer gid_ext remains 0. */ } - ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid); + ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); @@ -1437,12 +1440,8 @@ static int smc_connect_ism(struct smc_sock *smc, smc_rx_init(smc); smc_tx_init(smc); - if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->d1.eid; - } + if (aclc->hdr.version > SMC_V1) + eid = aclc->d1.eid; rc = smc_clc_send_confirm(smc, ini->first_contact_local, aclc->hdr.version, eid, ini); @@ -1493,7 +1492,6 @@ static int smc_connect_check_aclc(struct smc_init_info *ini, static int __smc_connect(struct smc_sock *smc) { u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; - struct smc_clc_msg_accept_confirm_v2 *aclc2; struct smc_clc_msg_accept_confirm *aclc; struct smc_init_info *ini = NULL; u8 *buf = NULL; @@ -1541,11 +1539,10 @@ static int __smc_connect(struct smc_sock *smc) rc = SMC_CLC_DECL_MEM; goto fallback; } - aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; - aclc = (struct smc_clc_msg_accept_confirm *)aclc2; + aclc = (struct smc_clc_msg_accept_confirm *)buf; /* perform CLC handshake */ - rc = smc_connect_clc(smc, aclc2, ini); + rc = smc_connect_clc(smc, aclc, ini); if (rc) { /* -EAGAIN on timeout, see tcp_recvmsg() */ if (rc == -EAGAIN) { @@ -2106,7 +2103,8 @@ static bool smc_is_already_selected(struct smcd_dev *smcd, /* check for ISM devices matching proposed ISM devices */ static void smc_check_ism_v2_match(struct smc_init_info *ini, - u16 proposed_chid, u64 proposed_gid, + u16 proposed_chid, + struct smcd_gid *proposed_gid, unsigned int *matches) { struct smcd_dev *smcd; @@ -2118,7 +2116,11 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini, continue; if (smc_ism_get_chid(smcd) == proposed_chid && !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { - ini->ism_peer_gid[*matches] = proposed_gid; + ini->ism_peer_gid[*matches].gid = proposed_gid->gid; + if (__smc_ism_is_virtual(proposed_chid)) + ini->ism_peer_gid[*matches].gid_ext = + proposed_gid->gid_ext; + /* non-virtual ISM's peer gid_ext remains 0. */ ini->ism_dev[*matches] = smcd; (*matches)++; break; @@ -2140,9 +2142,11 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, struct smc_clc_v2_extension *smc_v2_ext; struct smc_clc_msg_smcd *pclc_smcd; unsigned int matches = 0; + struct smcd_gid smcd_gid; u8 smcd_version; u8 *eid = NULL; int i, rc; + u16 chid; if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) goto not_found; @@ -2152,18 +2156,35 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); mutex_lock(&smcd_dev_list.mutex); - if (pclc_smcd->ism.chid) + if (pclc_smcd->ism.chid) { /* check for ISM device matching proposed native ISM device */ + smcd_gid.gid = ntohll(pclc_smcd->ism.gid); + smcd_gid.gid_ext = 0; smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid), - ntohll(pclc_smcd->ism.gid), &matches); - for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) { + &smcd_gid, &matches); + } + for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) { /* check for ISM devices matching proposed non-native ISM * devices */ - smc_check_ism_v2_match(ini, - ntohs(smcd_v2_ext->gidchid[i - 1].chid), - ntohll(smcd_v2_ext->gidchid[i - 1].gid), - &matches); + smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid); + smcd_gid.gid_ext = 0; + chid = ntohs(smcd_v2_ext->gidchid[i].chid); + if (__smc_ism_is_virtual(chid)) { + if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt || + chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid)) + /* each virtual ISM device takes two GID-CHID + * entries and CHID of the second entry repeats + * that of the first entry. + * + * So check if the next GID-CHID entry exists + * and both two entries' CHIDs are the same. + */ + continue; + smcd_gid.gid_ext = + ntohll(smcd_v2_ext->gidchid[++i].gid); + } + smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches); } mutex_unlock(&smcd_dev_list.mutex); @@ -2212,7 +2233,8 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) goto not_found; ini->is_smcd = true; /* prepare ISM check */ - ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); + ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid); + ini->ism_peer_gid[0].gid_ext = 0; rc = smc_find_ism_device(new_smc, ini); if (rc) goto not_found; diff --git a/net/smc/smc.h b/net/smc/smc.h index cd51261b7d9e..df64efd2dee8 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -29,9 +29,6 @@ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ -#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM - * devices - */ #define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */ extern struct proto smc_proto; @@ -58,6 +55,13 @@ enum smc_state { /* possible states of an SMC socket */ SMC_PROCESSABORT = 27, }; +enum smc_supplemental_features { + SMC_SPF_VIRT_ISM_DEV = 0, +}; + +#define SMC_FEATURE_MASK \ + (BIT(SMC_SPF_VIRT_ISM_DEV)) + struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 95e19aa3e769..9a13709bea1c 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -155,10 +155,12 @@ static int smc_clc_ueid_remove(char *ueid) rc = 0; } } +#if IS_ENABLED(CONFIG_S390) if (!rc && !smc_clc_eid_table.ueid_cnt) { smc_clc_eid_table.seid_enabled = 1; rc = -EAGAIN; /* indicate success and enabling of seid */ } +#endif write_unlock(&smc_clc_eid_table.lock); return rc; } @@ -273,22 +275,30 @@ err: int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) { +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); smc_clc_eid_table.seid_enabled = 1; write_unlock(&smc_clc_eid_table.lock); return 0; +#else + return -EOPNOTSUPP; +#endif } int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) { int rc = 0; +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); if (!smc_clc_eid_table.ueid_cnt) rc = -ENOENT; else smc_clc_eid_table.seid_enabled = 0; write_unlock(&smc_clc_eid_table.lock); +#else + rc = -EOPNOTSUPP; +#endif return rc; } @@ -377,9 +387,9 @@ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) /* check arriving CLC accept or confirm */ static bool -smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) +smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm *clc) { - struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; + struct smc_clc_msg_hdr *hdr = &clc->hdr; if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) return false; @@ -418,15 +428,16 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) return true; } -static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce, - struct smc_init_info *ini) +static int smc_clc_fill_fce_v2x(struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_init_info *ini) { - int ret = sizeof(*fce); + int ret = sizeof(*fce_v2x); - memset(fce, 0, sizeof(*fce)); - fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX; - fce->fce_v2_base.release = ini->release_nr; - memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname)); + memset(fce_v2x, 0, sizeof(*fce_v2x)); + fce_v2x->fce_v2_base.os_type = SMC_CLC_OS_LINUX; + fce_v2x->fce_v2_base.release = ini->release_nr; + memcpy(fce_v2x->fce_v2_base.hostname, + smc_hostname, sizeof(smc_hostname)); if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) { ret = sizeof(struct smc_clc_first_contact_ext); goto out; @@ -434,9 +445,10 @@ static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce, if (ini->release_nr >= SMC_RELEASE_1) { if (!ini->is_smcd) { - fce->max_conns = ini->max_conns; - fce->max_links = ini->max_links; + fce_v2x->max_conns = ini->max_conns; + fce_v2x->max_links = ini->max_links; } + fce_v2x->feature_mask = htons(ini->feature_mask); } out: @@ -448,7 +460,7 @@ out: */ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2; + struct smc_clc_msg_accept_confirm *clc; struct smc_clc_msg_proposal *pclc; struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; @@ -466,12 +478,11 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: - clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; - if (!smc_clc_msg_acc_conf_valid(clc_v2)) + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (!smc_clc_msg_acc_conf_valid(clc)) return false; trl = (struct smc_clc_msg_trail *) - ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - - sizeof(*trl)); + ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; @@ -882,11 +893,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ETH_ALEN); } if (smcd_indicated(ini->smc_type_v1)) { + struct smcd_gid smcd_gid; + /* add SMC-D specifics */ if (ini->ism_dev[0]) { smcd = ini->ism_dev[0]; - pclc_smcd->ism.gid = - htonll(smcd->ops->get_local_gid(smcd)); + smcd->ops->get_local_gid(smcd, &smcd_gid); + pclc_smcd->ism.gid = htonll(smcd_gid.gid); pclc_smcd->ism.chid = htons(smc_ism_get_chid(ini->ism_dev[0])); } @@ -907,6 +920,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) pclc_smcd->v2_ext_offset = htons(v2_ext_offset); plen += sizeof(*v2_ext); + v2_ext->feature_mask = htons(SMC_FEATURE_MASK); read_lock(&smc_clc_eid_table.lock); v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; @@ -918,10 +932,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) read_unlock(&smc_clc_eid_table.lock); } if (smcd_indicated(ini->smc_type_v2)) { + struct smcd_gid smcd_gid; u8 *eid = NULL; + int entry = 0; v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; - v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + @@ -933,14 +948,26 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) if (ini->ism_offered_cnt) { for (i = 1; i <= ini->ism_offered_cnt; i++) { smcd = ini->ism_dev[i]; - gidchids[i - 1].gid = - htonll(smcd->ops->get_local_gid(smcd)); - gidchids[i - 1].chid = + smcd->ops->get_local_gid(smcd, &smcd_gid); + gidchids[entry].chid = htons(smc_ism_get_chid(ini->ism_dev[i])); + gidchids[entry].gid = htonll(smcd_gid.gid); + if (smc_ism_is_virtual(smcd)) { + /* a virtual ISM device takes two + * entries. CHID of the second entry + * repeats that of the first entry. + */ + gidchids[entry + 1].chid = + gidchids[entry].chid; + gidchids[entry + 1].gid = + htonll(smcd_gid.gid_ext); + entry++; + } + entry++; } - plen += ini->ism_offered_cnt * - sizeof(struct smc_clc_smcd_gid_chid); + plen += entry * sizeof(struct smc_clc_smcd_gid_chid); } + v2_ext->hdr.ism_gid_cnt = entry; } if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); @@ -976,7 +1003,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) vec[i++].iov_len = sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { vec[i].iov_base = gidchids; - vec[i++].iov_len = ini->ism_offered_cnt * + vec[i++].iov_len = v2_ext->hdr.ism_gid_cnt * sizeof(struct smc_clc_smcd_gid_chid); } } @@ -997,109 +1024,143 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) return reason_code; } +static void +smcd_clc_prep_confirm_accept(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc, + int first_contact, u8 version, + u8 *eid, struct smc_init_info *ini, + int *fce_len, + struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_clc_msg_trail *trl) +{ + struct smcd_dev *smcd = conn->lgr->smcd; + struct smcd_gid smcd_gid; + u16 chid; + int len; + + /* SMC-D specific settings */ + memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + smcd->ops->get_local_gid(smcd, &smcd_gid); + clc->hdr.typev1 = SMC_TYPE_D; + clc->d0.gid = htonll(smcd_gid.gid); + clc->d0.token = htonll(conn->rmb_desc->token); + clc->d0.dmbe_size = conn->rmbe_size_comp; + clc->d0.dmbe_idx = 0; + memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + } else { + chid = smc_ism_get_chid(smcd); + clc->d1.chid = htons(chid); + if (eid && eid[0]) + memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN); + if (__smc_ism_is_virtual(chid)) + clc->d1.gid_ext = htonll(smcd_gid.gid_ext); + len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); + len += *fce_len; + } + clc->hdr.length = htons(len); + } + memcpy(trl->eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); +} + +static void +smcr_clc_prep_confirm_accept(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc, + int first_contact, u8 version, + u8 *eid, struct smc_init_info *ini, + int *fce_len, + struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_clc_fce_gid_ext *gle, + struct smc_clc_msg_trail *trl) +{ + struct smc_link *link = conn->lnk; + int len; + + /* SMC-R specific settings */ + memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + clc->hdr.typev1 = SMC_TYPE_R; + memcpy(clc->r0.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); + memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + hton24(clc->r0.qpn, link->roce_qp->qp_num); + clc->r0.rmb_rkey = + htonl(conn->rmb_desc->mr[link->link_idx]->rkey); + clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ + clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); + switch (clc->hdr.type) { + case SMC_CLC_ACCEPT: + clc->r0.qp_mtu = link->path_mtu; + break; + case SMC_CLC_CONFIRM: + clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); + break; + } + clc->r0.rmbe_size = conn->rmbe_size_comp; + clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? + cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : + cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[link->link_idx].sgl)); + hton24(clc->r0.psn, link->psn_initial); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + } else { + if (eid && eid[0]) + memcpy(clc->r1.eid, eid, SMC_MAX_EID_LEN); + len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); + len += *fce_len; + fce_v2x->fce_v2_base.v2_direct = + !link->lgr->uses_gateway; + if (clc->hdr.type == SMC_CLC_CONFIRM) { + memset(gle, 0, sizeof(*gle)); + gle->gid_cnt = ini->smcrv2.gidlist.len; + len += sizeof(*gle); + len += gle->gid_cnt * sizeof(gle->gid[0]); + } + } + clc->hdr.length = htons(len); + } + memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); +} + /* build and send CLC CONFIRM / ACCEPT message */ static int smc_clc_send_confirm_accept(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm_v2 *clc_v2, + struct smc_clc_msg_accept_confirm *clc, int first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { + struct smc_clc_first_contact_ext_v2x fce_v2x; struct smc_connection *conn = &smc->conn; - struct smc_clc_first_contact_ext_v2x fce; - struct smcd_dev *smcd = conn->lgr->smcd; - struct smc_clc_msg_accept_confirm *clc; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; - int i, len, fce_len; + int i, fce_len; struct kvec vec[5]; struct msghdr msg; /* send SMC Confirm CLC msg */ - clc = (struct smc_clc_msg_accept_confirm *)clc_v2; clc->hdr.version = version; /* SMC version */ if (first_contact) clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; - if (conn->lgr->is_smcd) { - /* SMC-D specific settings */ - memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - clc->hdr.typev1 = SMC_TYPE_D; - clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd)); - clc->d0.token = htonll(conn->rmb_desc->token); - clc->d0.dmbe_size = conn->rmbe_size_comp; - clc->d0.dmbe_idx = 0; - memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); - if (version == SMC_V1) { - clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); - } else { - clc_v2->d1.chid = htons(smc_ism_get_chid(smcd)); - if (eid && eid[0]) - memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); - len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) { - fce_len = smc_clc_fill_fce(&fce, ini); - len += fce_len; - } - clc_v2->hdr.length = htons(len); - } - memcpy(trl.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - } else { - struct smc_link *link = conn->lnk; - - /* SMC-R specific settings */ - memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); - clc->hdr.typev1 = SMC_TYPE_R; - clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - memcpy(clc->r0.lcl.id_for_peer, local_systemid, - sizeof(local_systemid)); - memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); - memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], - ETH_ALEN); - hton24(clc->r0.qpn, link->roce_qp->qp_num); - clc->r0.rmb_rkey = - htonl(conn->rmb_desc->mr[link->link_idx]->rkey); - clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ - clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); - switch (clc->hdr.type) { - case SMC_CLC_ACCEPT: - clc->r0.qp_mtu = link->path_mtu; - break; - case SMC_CLC_CONFIRM: - clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); - break; - } - clc->r0.rmbe_size = conn->rmbe_size_comp; - clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? - cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : - cpu_to_be64((u64)sg_dma_address - (conn->rmb_desc->sgt[link->link_idx].sgl)); - hton24(clc->r0.psn, link->psn_initial); - if (version == SMC_V1) { - clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - } else { - if (eid && eid[0]) - memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN); - len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) { - fce_len = smc_clc_fill_fce(&fce, ini); - len += fce_len; - fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway; - if (clc->hdr.type == SMC_CLC_CONFIRM) { - memset(&gle, 0, sizeof(gle)); - gle.gid_cnt = ini->smcrv2.gidlist.len; - len += sizeof(gle); - len += gle.gid_cnt * sizeof(gle.gid[0]); - } - } - clc_v2->hdr.length = htons(len); - } - memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); - } - + if (conn->lgr->is_smcd) + smcd_clc_prep_confirm_accept(conn, clc, first_contact, + version, eid, ini, &fce_len, + &fce_v2x, &trl); + else + smcr_clc_prep_confirm_accept(conn, clc, first_contact, + version, eid, ini, &fce_len, + &fce_v2x, &gle, &trl); memset(&msg, 0, sizeof(msg)); i = 0; - vec[i].iov_base = clc_v2; + vec[i].iov_base = clc; if (version > SMC_V1) vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 : @@ -1111,7 +1172,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, SMCR_CLC_ACCEPT_CONFIRM_LEN) - sizeof(trl); if (version > SMC_V1 && first_contact) { - vec[i].iov_base = &fce; + vec[i].iov_base = &fce_v2x; vec[i++].iov_len = fce_len; if (!conn->lgr->is_smcd) { if (clc->hdr.type == SMC_CLC_CONFIRM) { @@ -1133,16 +1194,16 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 cclc_v2; + struct smc_clc_msg_accept_confirm cclc; int reason_code = 0; int len; /* send SMC Confirm CLC msg */ - memset(&cclc_v2, 0, sizeof(cclc_v2)); - cclc_v2.hdr.type = SMC_CLC_CONFIRM; - len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, + memset(&cclc, 0, sizeof(cclc)); + cclc.hdr.type = SMC_CLC_CONFIRM; + len = smc_clc_send_confirm_accept(smc, &cclc, clnt_first_contact, version, eid, ini); - if (len < ntohs(cclc_v2.hdr.length)) { + if (len < ntohs(cclc.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; @@ -1158,14 +1219,14 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, u8 version, u8 *negotiated_eid, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 aclc_v2; + struct smc_clc_msg_accept_confirm aclc; int len; - memset(&aclc_v2, 0, sizeof(aclc_v2)); - aclc_v2.hdr.type = SMC_CLC_ACCEPT; - len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, + memset(&aclc, 0, sizeof(aclc)); + aclc.hdr.type = SMC_CLC_ACCEPT; + len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact, version, negotiated_eid, ini); - if (len < ntohs(aclc_v2.hdr.length)) + if (len < ntohs(aclc.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; return len > 0 ? 0 : len; @@ -1180,6 +1241,7 @@ int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, ini->max_conns = SMC_CONN_PER_LGR_MAX; ini->max_links = SMC_LINKS_ADD_LNK_MAX; + ini->feature_mask = SMC_FEATURE_MASK; if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) || ini->release_nr < SMC_RELEASE_1) @@ -1223,6 +1285,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, return SMC_CLC_DECL_MAXLINKERR; ini->max_links = fce_v2x->max_links; } + /* common supplemental features of server and client */ + ini->feature_mask = ntohs(fce_v2x->feature_mask) & SMC_FEATURE_MASK; return 0; } @@ -1230,10 +1294,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)cclc; struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd); + smc_get_clc_first_contact_ext(cclc, ini->is_smcd); struct smc_clc_first_contact_ext_v2x *fce_v2x = (struct smc_clc_first_contact_ext_v2x *)fce; @@ -1253,6 +1315,8 @@ int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, if (fce_v2x->max_links != ini->max_links) return SMC_CLC_DECL_MAXLINKERR; } + /* common supplemental features returned by client */ + ini->feature_mask = ntohs(fce_v2x->feature_mask); return 0; } @@ -1274,7 +1338,11 @@ void __init smc_clc_init(void) INIT_LIST_HEAD(&smc_clc_eid_table.list); rwlock_init(&smc_clc_eid_table.lock); smc_clc_eid_table.ueid_cnt = 0; +#if IS_ENABLED(CONFIG_S390) smc_clc_eid_table.seid_enabled = 1; +#else + smc_clc_eid_table.seid_enabled = 0; +#endif } void smc_clc_exit(void) diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 1697b84d85be..a9f9bdd26dcd 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -138,7 +138,8 @@ struct smc_clc_v2_extension { u8 roce[16]; /* RoCEv2 GID */ u8 max_conns; u8 max_links; - u8 reserved[14]; + __be16 feature_mask; + u8 reserved[12]; u8 user_eids[][SMC_MAX_EID_LEN]; }; @@ -171,6 +172,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ #define SMC_CLC_MAX_V6_PREFIX 8 #define SMC_CLC_MAX_UEID 8 +#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC + * proposal SMC-Dv2 extension. + * each ISM device takes one entry and + * each virtual ISM takes two entries. + */ struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal pclc_base; @@ -180,7 +186,8 @@ struct smc_clc_msg_proposal_area { struct smc_clc_v2_extension pclc_v2_ext; u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN]; struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; - struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; + struct smc_clc_smcd_gid_chid + pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES]; struct smc_clc_msg_trail pclc_trl; }; @@ -240,9 +247,14 @@ struct smc_clc_first_contact_ext { struct smc_clc_first_contact_ext_v2x { struct smc_clc_first_contact_ext fce_v2_base; - u8 max_conns; /* for SMC-R only */ - u8 max_links; /* for SMC-R only */ - u8 reserved3[2]; + union { + struct { + u8 max_conns; /* for SMC-R only */ + u8 max_links; /* for SMC-R only */ + }; + u8 reserved3[2]; /* for SMC-D only */ + }; + __be16 feature_mask; __be32 vendor_exp_options; u8 reserved4[8]; } __packed; /* format defined in @@ -259,28 +271,21 @@ struct smc_clc_fce_gid_ext { struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { - struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ - struct { /* SMC-D */ - struct smcd_clc_msg_accept_confirm_common d0; - u32 reserved5[3]; - }; - }; -} __packed; /* format defined in RFC7609 */ - -struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ - struct smc_clc_msg_hdr hdr; - union { struct { /* SMC-R */ struct smcr_clc_msg_accept_confirm r0; - u8 eid[SMC_MAX_EID_LEN]; - u8 reserved6[8]; - } r1; + struct { /* v2 only */ + u8 eid[SMC_MAX_EID_LEN]; + u8 reserved6[8]; + } __packed r1; + }; struct { /* SMC-D */ struct smcd_clc_msg_accept_confirm_common d0; - __be16 chid; - u8 eid[SMC_MAX_EID_LEN]; - u8 reserved5[8]; - } d1; + struct { /* v2 only, but 12 bytes reserved in v1 */ + __be16 chid; + u8 eid[SMC_MAX_EID_LEN]; + __be64 gid_ext; + } __packed d1; + }; }; }; @@ -389,24 +394,23 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext) } static inline struct smc_clc_first_contact_ext * -smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2, +smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm *clc, bool is_smcd) { int clc_v2_len; - if (clc_v2->hdr.version == SMC_V1 || - !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) + if (clc->hdr.version == SMC_V1 || + !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) return NULL; if (is_smcd) clc_v2_len = - offsetofend(struct smc_clc_msg_accept_confirm_v2, d1); + offsetofend(struct smc_clc_msg_accept_confirm, d1); else clc_v2_len = - offsetofend(struct smc_clc_msg_accept_confirm_v2, r1); + offsetofend(struct smc_clc_msg_accept_confirm, r1); - return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + - clc_v2_len); + return (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len); } struct smcd_dev; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d520ee62c8ec..95cc95458e2d 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -506,6 +506,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, { char smc_pnet[SMC_MAX_PNETID_LEN + 1]; struct smcd_dev *smcd = lgr->smcd; + struct smcd_gid smcd_gid; struct nlattr *attrs; void *nlh; @@ -521,13 +522,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id))) goto errattr; + smcd->ops->get_local_gid(smcd, &smcd_gid); if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, - smcd->ops->get_local_gid(smcd), - SMC_NLA_LGR_D_PAD)) + smcd_gid.gid, SMC_NLA_LGR_D_PAD)) goto errattr; - if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid, + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID, + smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD)) + goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid, SMC_NLA_LGR_D_PAD)) goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID, + lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD)) + goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id)) goto errattr; if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num)) @@ -876,7 +883,10 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) /* SMC-D specific settings */ smcd = ini->ism_dev[ini->ism_selected]; get_device(smcd->ops->get_dev(smcd)); - lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected]; + lgr->peer_gid.gid = + ini->ism_peer_gid[ini->ism_selected].gid; + lgr->peer_gid.gid_ext = + ini->ism_peer_gid[ini->ism_selected].gid_ext; lgr->smcd = ini->ism_dev[ini->ism_selected]; lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; lgr_lock = &lgr->smcd->lgr_lock; @@ -1514,7 +1524,8 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr) } /* Called when peer lgr shutdown (regularly or abnormally) is received */ -void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) +void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, + unsigned short vlan) { struct smc_link_group *lgr, *l; LIST_HEAD(lgr_free_list); @@ -1522,9 +1533,12 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) /* run common cleanup function and build free list */ spin_lock_bh(&dev->lgr_lock); list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { - if ((!peer_gid || lgr->peer_gid == peer_gid) && + if ((!peer_gid->gid || + (lgr->peer_gid.gid == peer_gid->gid && + !smc_ism_is_virtual(dev) ? 1 : + lgr->peer_gid.gid_ext == peer_gid->gid_ext)) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { - if (peer_gid) /* peer triggered termination */ + if (peer_gid->gid) /* peer triggered termination */ lgr->peer_shutdown = 1; list_move(&lgr->list, &lgr_free_list); lgr->freeing = 1; @@ -1860,9 +1874,12 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, } static bool smcd_lgr_match(struct smc_link_group *lgr, - struct smcd_dev *smcismdev, u64 peer_gid) + struct smcd_dev *smcismdev, + struct smcd_gid *peer_gid) { - return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; + return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev && + smc_ism_is_virtual(smcismdev) ? + (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1; } /* create a new SMC connection (and a new link group if necessary) */ @@ -1892,7 +1909,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], - ini->ism_peer_gid[ini->ism_selected]) : + &ini->ism_peer_gid[ini->ism_selected]) : smcr_lgr_match(lgr, ini->smcr_version, ini->peer_systemid, ini->peer_gid, ini->peer_mac, role, diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 120027d40469..1f175376037b 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -17,9 +17,11 @@ #include <linux/pci.h> #include <rdma/ib_verbs.h> #include <net/genetlink.h> +#include <net/smc.h> #include "smc.h" #include "smc_ib.h" +#include "smc_clc.h" #define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */ #define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */ @@ -355,7 +357,7 @@ struct smc_link_group { /* max links can be added in lgr */ }; struct { /* SMC-D */ - u64 peer_gid; + struct smcd_gid peer_gid; /* Peer GID (remote) */ struct smcd_dev *smcd; /* ISM device for VLAN reg. */ @@ -392,6 +394,11 @@ struct smc_init_info_smcrv2 { struct smc_gidlist gidlist; }; +#define SMC_MAX_V2_ISM_DEVS SMCD_CLC_MAX_V2_GID_ENTRIES + /* max # of proposed non-native ISM devices, + * which can't exceed the max # of CHID-GID + * entries in CLC proposal SMC-Dv2 extension. + */ struct smc_init_info { u8 is_smcd; u8 smc_type_v1; @@ -401,6 +408,7 @@ struct smc_init_info { u8 max_links; u8 first_contact_peer; u8 first_contact_local; + u16 feature_mask; unsigned short vlan_id; u32 rc; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -416,9 +424,9 @@ struct smc_init_info { u32 ib_clcqpn; struct smc_init_info_smcrv2 smcrv2; /* SMC-D */ - u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; - struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; - u16 ism_chid[SMC_MAX_ISM_DEVS + 1]; + struct smcd_gid ism_peer_gid[SMC_MAX_V2_ISM_DEVS + 1]; + struct smcd_dev *ism_dev[SMC_MAX_V2_ISM_DEVS + 1]; + u16 ism_chid[SMC_MAX_V2_ISM_DEVS + 1]; u8 ism_offered_cnt; /* # of ISM devices offered */ u8 ism_selected; /* index of selected ISM dev*/ u8 smcd_version; @@ -544,7 +552,7 @@ void smc_lgr_hold(struct smc_link_group *lgr); void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); -void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, +void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, unsigned short vlan); void smc_smcd_terminate_all(struct smcd_dev *dev); void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index a584613aca12..3fbe14e09ad8 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -21,6 +21,7 @@ #include "smc.h" #include "smc_core.h" +#include "smc_ism.h" struct smc_diag_dump_ctx { int pos[2]; @@ -168,12 +169,16 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; + struct smcd_gid smcd_gid; memset(&dinfo, 0, sizeof(dinfo)); dinfo.linkid = *((u32 *)conn->lgr->id); - dinfo.peer_gid = conn->lgr->peer_gid; - dinfo.my_gid = smcd->ops->get_local_gid(smcd); + dinfo.peer_gid = conn->lgr->peer_gid.gid; + dinfo.peer_gid_ext = conn->lgr->peer_gid.gid_ext; + smcd->ops->get_local_gid(smcd, &smcd_gid); + dinfo.my_gid = smcd_gid.gid; + dinfo.my_gid_ext = smcd_gid.gid_ext; dinfo.token = conn->rmb_desc->token; dinfo.peer_token = conn->peer_token; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index fbee2493091f..ac88de2a06a0 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -43,8 +43,30 @@ static struct ism_client smc_ism_client = { }; #endif +static void smc_ism_create_system_eid(void) +{ + struct smc_ism_seid *seid = + (struct smc_ism_seid *)smc_ism_v2_system_eid; +#if IS_ENABLED(CONFIG_S390) + struct cpuid id; + u16 ident_tail; + char tmp[5]; + + memcpy(seid->seid_string, "IBM-SYSZ-ISMSEID00000000", 24); + get_cpu_id(&id); + ident_tail = (u16)(id.ident & SMC_ISM_IDENT_MASK); + snprintf(tmp, 5, "%04X", ident_tail); + memcpy(seid->serial_number, tmp, 4); + snprintf(tmp, 5, "%04X", id.machine); + memcpy(seid->type, tmp, 4); +#else + memset(seid, 0, SMC_MAX_EID_LEN); +#endif +} + /* Test if an ISM communication is possible - same CPC */ -int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) +int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id, + struct smcd_dev *smcd) { return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, vlan_id); @@ -208,7 +230,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, dmb.dmb_len = dmb_len; dmb.sba_idx = dmb_desc->sba_idx; dmb.vlan_id = lgr->vlan_id; - dmb.rgid = lgr->peer_gid; + dmb.rgid = lgr->peer_gid.gid; rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client); if (!rc) { dmb_desc->sba_idx = dmb.sba_idx; @@ -340,18 +362,20 @@ union smcd_sw_event_info { static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) { + struct smcd_gid peer_gid = { .gid = wrk->event.tok, + .gid_ext = 0 }; union smcd_sw_event_info ev_info; ev_info.info = wrk->event.info; switch (wrk->event.code) { case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */ - smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id); + smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id); break; case ISM_EVENT_CODE_TESTLINK: /* Activity timer */ if (ev_info.code == ISM_EVENT_REQUEST) { ev_info.code = ISM_EVENT_RESPONSE; wrk->smcd->ops->signal_event(wrk->smcd, - wrk->event.tok, + &peer_gid, ISM_EVENT_REQUEST_IR, ISM_EVENT_CODE_TESTLINK, ev_info.info); @@ -365,10 +389,12 @@ static void smc_ism_event_work(struct work_struct *work) { struct smc_ism_event_work *wrk = container_of(work, struct smc_ism_event_work, work); + struct smcd_gid smcd_gid = { .gid = wrk->event.tok, + .gid_ext = 0 }; switch (wrk->event.type) { case ISM_EVENT_GID: /* GID event, token is peer GID */ - smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK); + smc_smcd_terminate(wrk->smcd, &smcd_gid, VLAN_VID_MASK); break; case ISM_EVENT_DMB: break; @@ -426,14 +452,8 @@ static void smcd_register_dev(struct ism_dev *ism) mutex_lock(&smcd_dev_list.mutex); if (list_empty(&smcd_dev_list.list)) { - u8 *system_eid = NULL; - - system_eid = smcd->ops->get_system_eid(); - if (smcd->ops->supports_v2()) { + if (smcd->ops->supports_v2()) smc_ism_v2_capable = true; - memcpy(smc_ism_v2_system_eid, system_eid, - SMC_MAX_EID_LEN); - } } /* sort list: devices without pnetid before devices with pnetid */ if (smcd->pnetid[0]) @@ -525,7 +545,7 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr) memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE); ev_info.vlan_id = lgr->vlan_id; ev_info.code = ISM_EVENT_REQUEST; - rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid, + rc = lgr->smcd->ops->signal_event(lgr->smcd, &lgr->peer_gid, ISM_EVENT_REQUEST_IR, ISM_EVENT_CODE_SHUTDOWN, ev_info.info); @@ -537,10 +557,10 @@ int smc_ism_init(void) { int rc = 0; -#if IS_ENABLED(CONFIG_ISM) smc_ism_v2_capable = false; - memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN); + smc_ism_create_system_eid(); +#if IS_ENABLED(CONFIG_ISM) rc = ism_register_client(&smc_ism_client); #endif return rc; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 832b2f42d79f..ffff40c30a06 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -15,6 +15,9 @@ #include "smc.h" +#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00 +#define SMC_ISM_IDENT_MASK 0x00FFFF + struct smcd_dev_list { /* List of SMCD devices */ struct list_head list; struct mutex mutex; /* Protects list of devices */ @@ -28,9 +31,16 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */ refcount_t refcnt; /* Reference count */ }; +struct smc_ism_seid { + u8 seid_string[24]; + u8 serial_number[4]; + u8 type[4]; +}; + struct smcd_dev; -int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); +int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id, + struct smcd_dev *dev); void smc_ism_set_conn(struct smc_connection *conn); void smc_ism_unset_conn(struct smc_connection *conn); int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id); @@ -56,4 +66,22 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok, return rc < 0 ? rc : 0; } +static inline bool __smc_ism_is_virtual(u16 chid) +{ + /* CHIDs in range of 0xFF00 to 0xFFFF are reserved + * for virtual ISM device. + * + * loopback-ism: 0xFFFF + * virtio-ism: 0xFF00 ~ 0xFFFE + */ + return ((chid & 0xFF00) == 0xFF00); +} + +static inline bool smc_ism_is_virtual(struct smcd_dev *smcd) +{ + u16 chid = smcd->ops->get_chid(smcd); + + return __smc_ism_is_virtual(chid); +} + #endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 11775401df68..9f2c58c5a86b 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1103,8 +1103,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away && - (!ini->ism_peer_gid[0] || - !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id, + (!ini->ism_peer_gid[0].gid || + !smc_ism_cantalk(&ini->ism_peer_gid[0], ini->vlan_id, ismdev))) { ini->ism_dev[0] = ismdev; break; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7bfe7d9a32aa..04534ea537c8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -40,9 +40,6 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), -#ifdef CONFIG_DEBUG_CREDENTIALS - .magic = CRED_MAGIC, -#endif }; /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index fee83d1024bc..1b71055fc391 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -654,9 +654,8 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array_node(GFP_KERNEL, - rqstp->rq_pool->sp_id, - pages, rqstp->rq_pages); + ret = alloc_pages_bulk_array(GFP_KERNEL, pages, + rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; diff --git a/net/tipc/link.c b/net/tipc/link.c index d0143823658d..0716eb5c8a31 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -82,10 +82,7 @@ struct tipc_stats { * struct tipc_link - TIPC link data structure * @addr: network address of link's peer node * @name: link name character string - * @media_addr: media address to use when sending messages over link - * @timer: link timer * @net: pointer to namespace struct - * @refcnt: reference counter for permanent references (owner node & timer) * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link @@ -94,31 +91,19 @@ struct tipc_stats { * @state: current state of link FSM * @peer_caps: bitmap describing capabilities of peer node * @silent_intv_cnt: # of timer intervals without any reception from peer - * @proto_msg: template for control messages generated by link - * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') * @mon_state: cookie with information needed by link monitor - * @backlog_limit: backlog queue congestion thresholds (indexed by importance) - * @exp_msg_count: # of tunnelled messages expected during link changeover - * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset * @mtu: current maximum packet size for this link * @advertised_mtu: advertised own mtu when link is being established - * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent - * @snt_nxt: next sequence number to use for outbound messages * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages - * @deferred_queue: deferred queue saved OOS b'cast message received from node - * @unacked_window: # of inbound messages rx'd without ack'ing back to peer * @inputq: buffer queue for messages to be delivered upwards * @namedq: buffer queue for name table messages to be delivered upwards - * @next_out: ptr to first unsent outbound message in queue * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate - * @long_msg_seq_no: next identifier to use for outbound fragmented messages * @reasm_buf: head of partially reassembled inbound message fragments - * @bc_rcvr: marks that this is a broadcast receiver link * @stats: collects statistics regarding link activity * @session: session to be used by link * @snd_nxt_state: next send seq number diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index f495b9e5186b..1748268e0694 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -153,10 +153,10 @@ virtio_transport_send_pkt_work(struct work_struct *work) * 'virt_to_phys()' later to fill the buffer descriptor. * We don't touch memory at "virtual" address of this page. */ - va = page_to_virt(skb_frag->bv_page); + va = page_to_virt(skb_frag_page(skb_frag)); sg_init_one(sgs[out_sg], - va + skb_frag->bv_offset, - skb_frag->bv_len); + va + skb_frag_off(skb_frag), + skb_frag_size(skb_frag)); out_sg++; } } diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex new file mode 100644 index 000000000000..0d50369bede9 --- /dev/null +++ b/net/wireless/certs/wens.hex @@ -0,0 +1,87 @@ +/* Chen-Yu Tsai's regdb certificate */ +0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f, +0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab, +0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c, +0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d, +0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, +0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31, +0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03, +0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20, +0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31, +0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18, +0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30, +0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, +0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, +0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e, +0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06, +0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, +0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f, +0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01, +0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7, +0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef, +0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16, +0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe, +0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef, +0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f, +0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e, +0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c, +0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48, +0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf, +0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f, +0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e, +0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce, +0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03, +0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93, +0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a, +0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3, +0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb, +0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b, +0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6, +0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a, +0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32, +0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58, +0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b, +0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16, +0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e, +0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83, +0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91, +0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73, +0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36, +0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52, +0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78, +0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, +0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, +0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82, +0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74, +0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1, +0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82, +0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91, +0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f, +0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8, +0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b, +0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0, +0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c, +0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51, +0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b, +0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91, +0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6, +0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84, +0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb, +0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e, +0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63, +0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8, +0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28, +0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62, +0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8, +0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb, +0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5, +0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53, +0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee, +0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa, +0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86, +0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b, +0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4, +0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, +0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, +0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, +0x45, 0x4e, 0xc3, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index aad8ffeaee04..f7a7c7798c3b 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -704,7 +704,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rc = -EINVAL; } release_sock(sk); - SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); + net_dbg_ratelimited("x25_bind: socket is bound\n"); out: return rc; } @@ -1165,10 +1165,10 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; } - SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n"); + net_dbg_ratelimited("x25_sendmsg: sendto: Addresses built.\n"); /* Build a packet */ - SOCK_DEBUG(sk, "x25_sendmsg: sendto: building packet.\n"); + net_dbg_ratelimited("x25_sendmsg: sendto: building packet.\n"); if ((msg->msg_flags & MSG_OOB) && len > 32) len = 32; @@ -1187,7 +1187,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* * Put the data on the end */ - SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n"); + net_dbg_ratelimited("x25_sendmsg: Copying user data\n"); skb_reset_transport_header(skb); skb_put(skb, len); @@ -1211,7 +1211,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* * Push down the X.25 header */ - SOCK_DEBUG(sk, "x25_sendmsg: Building X.25 Header.\n"); + net_dbg_ratelimited("x25_sendmsg: Building X.25 Header.\n"); if (msg->msg_flags & MSG_OOB) { if (x25->neighbour->extended) { @@ -1245,8 +1245,8 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) skb->data[0] |= X25_Q_BIT; } - SOCK_DEBUG(sk, "x25_sendmsg: Built header.\n"); - SOCK_DEBUG(sk, "x25_sendmsg: Transmitting buffer\n"); + net_dbg_ratelimited("x25_sendmsg: Built header.\n"); + net_dbg_ratelimited("x25_sendmsg: Transmitting buffer\n"); rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 8e1a49b0c0dc..6dadb217e101 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -282,7 +282,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, * They want reverse charging, we won't accept it. */ if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) { - SOCK_DEBUG(sk, "X.25: rejecting reverse charging request\n"); + net_dbg_ratelimited("X.25: rejecting reverse charging request\n"); return -1; } @@ -294,11 +294,11 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, int ours_in = ours->throughput & 0x0f; int ours_out = ours->throughput & 0xf0; if (!ours_in || theirs_in < ours_in) { - SOCK_DEBUG(sk, "X.25: inbound throughput negotiated\n"); + net_dbg_ratelimited("X.25: inbound throughput negotiated\n"); new->throughput = (new->throughput & 0xf0) | theirs_in; } if (!ours_out || theirs_out < ours_out) { - SOCK_DEBUG(sk, + net_dbg_ratelimited( "X.25: outbound throughput negotiated\n"); new->throughput = (new->throughput & 0x0f) | theirs_out; } @@ -306,22 +306,22 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, if (theirs.pacsize_in && theirs.pacsize_out) { if (theirs.pacsize_in < ours->pacsize_in) { - SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down\n"); + net_dbg_ratelimited("X.25: packet size inwards negotiated down\n"); new->pacsize_in = theirs.pacsize_in; } if (theirs.pacsize_out < ours->pacsize_out) { - SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down\n"); + net_dbg_ratelimited("X.25: packet size outwards negotiated down\n"); new->pacsize_out = theirs.pacsize_out; } } if (theirs.winsize_in && theirs.winsize_out) { if (theirs.winsize_in < ours->winsize_in) { - SOCK_DEBUG(sk, "X.25: window size inwards negotiated down\n"); + net_dbg_ratelimited("X.25: window size inwards negotiated down\n"); new->winsize_in = theirs.winsize_in; } if (theirs.winsize_out < ours->winsize_out) { - SOCK_DEBUG(sk, "X.25: window size outwards negotiated down\n"); + net_dbg_ratelimited("X.25: window size outwards negotiated down\n"); new->winsize_out = theirs.winsize_out; } } diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c index dbc0940bf35f..f8922b0e23a4 100644 --- a/net/x25/x25_out.c +++ b/net/x25/x25_out.c @@ -72,7 +72,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return sent; } - SOCK_DEBUG(sk, "x25_output: fragment alloc" + net_dbg_ratelimited("x25_output: fragment alloc" " failed, err=%d, %d bytes " "sent\n", err, sent); return err; |