diff options
Diffstat (limited to 'net')
292 files changed, 4870 insertions, 2585 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f00158234505..9404dd551dfd 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, if (unlikely(!vhdr)) goto out; + NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen; + type = vhdr->h_vlan_encapsulated_proto; ptype = gro_find_receive_by_type(type); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 39876eff51d2..3efba4f857ac 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -149,7 +149,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) if (max_mtu < new_mtu) return -ERANGE; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 00ebce9e5a65..bcdab9c23b40 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -5,6 +5,7 @@ menuconfig NET_9P tristate "Plan 9 Resource Sharing Support (9P2000)" + select NETFS_SUPPORT help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. diff --git a/net/9p/client.c b/net/9p/client.c index f7e90b4769bb..00774656eeac 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -18,6 +18,7 @@ #include <linux/sched/signal.h> #include <linux/uaccess.h> #include <linux/uio.h> +#include <linux/netfs.h> #include <net/9p/9p.h> #include <linux/parser.h> #include <linux/seq_file.h> @@ -1661,6 +1662,54 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) } EXPORT_SYMBOL(p9_client_write); +void +p9_client_write_subreq(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *wreq = subreq->rreq; + struct p9_fid *fid = wreq->netfs_priv; + struct p9_client *clnt = fid->clnt; + struct p9_req_t *req; + unsigned long long start = subreq->start + subreq->transferred; + int written, len = subreq->len - subreq->transferred; + int err; + + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n", + fid->fid, start, len); + + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && len > 1024) { + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter, + 0, wreq->len, P9_ZC_HDR_SZ, "dqd", + fid->fid, start, len); + } else { + req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, + start, len, &subreq->io_iter); + } + if (IS_ERR(req)) { + netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false); + return; + } + + err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written); + if (err) { + trace_9p_protocol_dump(clnt, &req->rc); + p9_req_put(clnt, req); + netfs_write_subrequest_terminated(subreq, err, false); + return; + } + + if (written > len) { + pr_err("bogus RWRITE count (%d > %u)\n", written, len); + written = len; + } + + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); + + p9_req_put(clnt, req); + netfs_write_subrequest_terminated(subreq, written, false); +} +EXPORT_SYMBOL(p9_client_write_subreq); + struct p9_wstat *p9_client_stat(struct p9_fid *fid) { int err; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e305071eb7b8..0b8086f58ad5 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -781,7 +781,6 @@ static struct virtio_driver p9_virtio_drv = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = p9_virtio_probe, .remove = p9_virtio_remove, diff --git a/net/Kconfig b/net/Kconfig index d5ab791f7afa..f0a8692496ff 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -452,6 +452,9 @@ config GRO_CELLS config SOCK_VALIDATE_XMIT bool +config NET_IEEE8021Q_HELPERS + bool + config NET_SELFTESTS def_tristate PHYLIB depends on PHYLIB && INET diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 198f5ba2feae..b068651984fe 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -88,6 +88,7 @@ static inline void atalk_remove_socket(struct sock *sk) static struct sock *atalk_search_socket(struct sockaddr_at *to, struct atalk_iface *atif) { + struct sock *def_socket = NULL; struct sock *s; read_lock_bh(&atalk_sockets_lock); @@ -98,8 +99,20 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, continue; if (to->sat_addr.s_net == ATADDR_ANYNET && - to->sat_addr.s_node == ATADDR_BCAST) - goto found; + to->sat_addr.s_node == ATADDR_BCAST) { + if (atif->address.s_node == at->src_node && + atif->address.s_net == at->src_net) { + /* This socket's address matches the address of the interface + * that received the packet -- use it + */ + goto found; + } + + /* Continue searching for a socket matching the interface address, + * but use this socket by default if no other one is found + */ + def_socket = s; + } if (to->sat_addr.s_net == at->src_net && (to->sat_addr.s_node == at->src_node || @@ -116,7 +129,7 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to, goto found; } } - s = NULL; + s = def_socket; found: read_unlock_bh(&atalk_sockets_lock); return s; diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index d945b7c0176d..7aebfe903242 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -40,7 +40,6 @@ static struct ctl_table atalk_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { }, }; static struct ctl_table_header *atalk_table_header; diff --git a/net/atm/clip.c b/net/atm/clip.c index 362e8d25a79e..42b910cb4e8e 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -345,7 +345,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, dev->stats.tx_dropped++; return NETDEV_TX_OK; } - rt = (struct rtable *) dst; + rt = dst_rtable(dst); if (rt->rt_gw_family == AF_INET) daddr = &rt->rt_gw4; else diff --git a/net/atm/svc.c b/net/atm/svc.c index 36a814f1fbd1..f8137ae693b0 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -324,8 +324,8 @@ out: return error; } -static int svc_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int svc_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sk_buff *skb; @@ -336,7 +336,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, lock_sock(sk); - error = svc_create(sock_net(sk), newsock, 0, kern); + error = svc_create(sock_net(sk), newsock, 0, arg->kern); if (error) goto out; @@ -355,7 +355,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, error = -sk->sk_err; break; } - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { error = -EAGAIN; break; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 9169efb2f43a..8077cf2ee448 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1373,8 +1373,8 @@ out_release: return err; } -static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int ax25_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sk_buff *skb; struct sock *newsk; @@ -1409,7 +1409,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, if (skb) break; - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { err = -EWOULDBLOCK; break; } diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 282ec581c072..742d7c68e7e7 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -22,11 +22,12 @@ #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> +#include <linux/list.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/init.h> -ax25_dev *ax25_dev_list; +static LIST_HEAD(ax25_dev_list); DEFINE_SPINLOCK(ax25_dev_lock); ax25_dev *ax25_addr_ax25dev(ax25_address *addr) @@ -34,10 +35,11 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) ax25_dev *ax25_dev, *res = NULL; spin_lock_bh(&ax25_dev_lock); - for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) + list_for_each_entry(ax25_dev, &ax25_dev_list, list) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; ax25_dev_hold(ax25_dev); + break; } spin_unlock_bh(&ax25_dev_lock); @@ -59,7 +61,6 @@ void ax25_dev_device_up(struct net_device *dev) } refcount_set(&ax25_dev->refcount, 1); - dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; netdev_hold(dev, &ax25_dev->dev_tracker, GFP_KERNEL); ax25_dev->forward = NULL; @@ -78,17 +79,19 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->values[AX25_VALUES_N2] = AX25_DEF_N2; ax25_dev->values[AX25_VALUES_PACLEN] = AX25_DEF_PACLEN; ax25_dev->values[AX25_VALUES_PROTOCOL] = AX25_DEF_PROTOCOL; + +#ifdef CONFIG_AX25_DAMA_SLAVE ax25_dev->values[AX25_VALUES_DS_TIMEOUT]= AX25_DEF_DS_TIMEOUT; +#endif #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_ds_setup_timer(ax25_dev); #endif spin_lock_bh(&ax25_dev_lock); - ax25_dev->next = ax25_dev_list; - ax25_dev_list = ax25_dev; + list_add(&ax25_dev->list, &ax25_dev_list); + dev->ax25_ptr = ax25_dev; spin_unlock_bh(&ax25_dev_lock); - ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -111,32 +114,19 @@ void ax25_dev_device_down(struct net_device *dev) /* * Remove any packet forwarding that points to this device. */ - for (s = ax25_dev_list; s != NULL; s = s->next) + list_for_each_entry(s, &ax25_dev_list, list) if (s->forward == dev) s->forward = NULL; - if ((s = ax25_dev_list) == ax25_dev) { - ax25_dev_list = s->next; - goto unlock_put; - } - - while (s != NULL && s->next != NULL) { - if (s->next == ax25_dev) { - s->next = ax25_dev->next; - goto unlock_put; + list_for_each_entry(s, &ax25_dev_list, list) { + if (s == ax25_dev) { + list_del(&s->list); + break; } - - s = s->next; } - spin_unlock_bh(&ax25_dev_lock); - dev->ax25_ptr = NULL; - ax25_dev_put(ax25_dev); - return; -unlock_put: - spin_unlock_bh(&ax25_dev_lock); - ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; + spin_unlock_bh(&ax25_dev_lock); netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } @@ -200,16 +190,13 @@ struct net_device *ax25_fwd_dev(struct net_device *dev) */ void __exit ax25_dev_free(void) { - ax25_dev *s, *ax25_dev; + ax25_dev *s, *n; spin_lock_bh(&ax25_dev_lock); - ax25_dev = ax25_dev_list; - while (ax25_dev != NULL) { - s = ax25_dev; - netdev_put(ax25_dev->dev, &ax25_dev->dev_tracker); - ax25_dev = ax25_dev->next; + list_for_each_entry_safe(s, n, &ax25_dev_list, list) { + netdev_put(s->dev, &s->dev_tracker); + list_del(&s->list); kfree(s); } - ax25_dev_list = NULL; spin_unlock_bh(&ax25_dev_lock); } diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index e0128dc9def3..68753aa30334 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -141,8 +141,6 @@ static const struct ctl_table ax25_param_table[] = { .extra2 = &max_ds_timeout }, #endif - - { } /* that's all, folks! */ }; int ax25_register_dev_sysctl(ax25_dev *ax25_dev) @@ -155,6 +153,7 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev) if (!table) return -ENOMEM; + BUILD_BUG_ON(ARRAY_SIZE(ax25_param_table) != AX25_MAX_VALUES); for (k = 0; k < AX25_MAX_VALUES; k++) table[k].data = &ax25_dev->values[k]; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 89c51b3cf430..30ecbc2ef1fd 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -159,7 +159,7 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) if (new_mtu < ETH_MIN_MTU || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); bat_priv->mtu_set_by_user = new_mtu; return 0; diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index 5dd52bc5cabb..6b816cf1a953 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -40,8 +40,8 @@ TRACE_EVENT(batadv_dbg, ), TP_fast_assign( - __assign_str(device, bat_priv->soft_iface->name); - __assign_str(driver, KBUILD_MODNAME); + __assign_str(device); + __assign_str(driver); __assign_vstr(msg, vaf->fmt, vaf->va); ), diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 27520a8a486f..50cfec8ccac4 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -133,7 +133,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, struct in6_addr *daddr, struct sk_buff *skb) { - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int count = atomic_read(&dev->peer_count); const struct in6_addr *nexthop; struct lowpan_peer *peer; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 05346250f719..0c76dcde5361 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -241,13 +241,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec) __u8 vnd_len, *vnd_data = NULL; struct hci_op_configure_data_path *cmd = NULL; + /* Do not take below 2 checks as error since the 1st means user do not + * want to use HFP offload mode and the 2nd means the vendor controller + * do not need to send below HCI command for offload mode. + */ if (!codec->data_path || !hdev->get_codec_config_data) return 0; - /* Do not take me as error */ - if (!hdev->get_codec_config_data) - return 0; - err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, &vnd_data); if (err < 0) @@ -664,11 +664,6 @@ static void le_conn_timeout(struct work_struct *work) hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); } -struct iso_cig_params { - struct hci_cp_le_set_cig_params cp; - struct hci_cis_params cis[0x1f]; -}; - struct iso_list_data { union { u8 cig; @@ -909,11 +904,37 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, { struct hci_conn *conn; + switch (type) { + case ACL_LINK: + if (!hdev->acl_mtu) + return ERR_PTR(-ECONNREFUSED); + break; + case ISO_LINK: + if (hdev->iso_mtu) + /* Dedicated ISO Buffer exists */ + break; + fallthrough; + case LE_LINK: + if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU) + return ERR_PTR(-ECONNREFUSED); + if (!hdev->le_mtu && hdev->acl_mtu < HCI_MIN_LE_MTU) + return ERR_PTR(-ECONNREFUSED); + break; + case SCO_LINK: + case ESCO_LINK: + if (!hdev->sco_pkts) + /* Controller does not support SCO or eSCO over HCI */ + return ERR_PTR(-ECONNREFUSED); + break; + default: + return ERR_PTR(-ECONNREFUSED); + } + bt_dev_dbg(hdev, "dst %pMR handle 0x%4.4x", dst, handle); conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) - return NULL; + return ERR_PTR(-ENOMEM); bacpy(&conn->dst, dst); bacpy(&conn->src, &hdev->bdaddr); @@ -944,10 +965,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, switch (type) { case ACL_LINK: conn->pkt_type = hdev->pkt_type & ACL_PTYPE_MASK; + conn->mtu = hdev->acl_mtu; break; case LE_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; case ISO_LINK: /* conn->src should reflect the local identity address */ @@ -959,6 +982,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, else if (conn->role == HCI_ROLE_MASTER) conn->cleanup = cis_cleanup; + conn->mtu = hdev->iso_mtu ? hdev->iso_mtu : + hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; case SCO_LINK: if (lmp_esco_capable(hdev)) @@ -966,9 +991,12 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, (hdev->esco_type & EDR_ESCO_MASK); else conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; + + conn->mtu = hdev->sco_mtu; break; case ESCO_LINK: conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK; + conn->mtu = hdev->sco_mtu; break; } @@ -1011,7 +1039,7 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, handle = hci_conn_hash_alloc_unset(hdev); if (unlikely(handle < 0)) - return NULL; + return ERR_PTR(-ECONNREFUSED); return hci_conn_add(hdev, type, dst, role, handle); } @@ -1140,8 +1168,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || - hci_dev_test_flag(d, HCI_USER_CHANNEL) || - d->dev_type != HCI_PRIMARY) + hci_dev_test_flag(d, HCI_USER_CHANNEL)) continue; /* Simple routing: @@ -1317,8 +1344,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, bacpy(&conn->dst, dst); } else { conn = hci_conn_add_unset(hdev, LE_LINK, dst, role); - if (!conn) - return ERR_PTR(-ENOMEM); + if (IS_ERR(conn)) + return conn; hci_conn_hold(conn); conn->pending_sec_level = sec_level; } @@ -1494,8 +1521,8 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EADDRINUSE); conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); - if (!conn) - return ERR_PTR(-ENOMEM); + if (IS_ERR(conn)) + return conn; conn->state = BT_CONNECT; @@ -1538,8 +1565,8 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, BT_DBG("requesting refresh of dst_addr"); conn = hci_conn_add_unset(hdev, LE_LINK, dst, HCI_ROLE_MASTER); - if (!conn) - return ERR_PTR(-ENOMEM); + if (IS_ERR(conn)) + return conn; if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) { hci_conn_del(conn); @@ -1586,8 +1613,8 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add_unset(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); - if (!acl) - return ERR_PTR(-ENOMEM); + if (IS_ERR(acl)) + return acl; } hci_conn_hold(acl); @@ -1655,9 +1682,9 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, sco = hci_conn_hash_lookup_ba(hdev, type, dst); if (!sco) { sco = hci_conn_add_unset(hdev, type, dst, HCI_ROLE_MASTER); - if (!sco) { + if (IS_ERR(sco)) { hci_conn_drop(acl); - return ERR_PTR(-ENOMEM); + return sco; } } @@ -1722,34 +1749,33 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos) static int set_cig_params_sync(struct hci_dev *hdev, void *data) { + DEFINE_FLEX(struct hci_cp_le_set_cig_params, pdu, cis, num_cis, 0x1f); u8 cig_id = PTR_UINT(data); struct hci_conn *conn; struct bt_iso_qos *qos; - struct iso_cig_params pdu; + u8 aux_num_cis = 0; u8 cis_id; conn = hci_conn_hash_lookup_cig(hdev, cig_id); if (!conn) return 0; - memset(&pdu, 0, sizeof(pdu)); - qos = &conn->iso_qos; - pdu.cp.cig_id = cig_id; - hci_cpu_to_le24(qos->ucast.out.interval, pdu.cp.c_interval); - hci_cpu_to_le24(qos->ucast.in.interval, pdu.cp.p_interval); - pdu.cp.sca = qos->ucast.sca; - pdu.cp.packing = qos->ucast.packing; - pdu.cp.framing = qos->ucast.framing; - pdu.cp.c_latency = cpu_to_le16(qos->ucast.out.latency); - pdu.cp.p_latency = cpu_to_le16(qos->ucast.in.latency); + pdu->cig_id = cig_id; + hci_cpu_to_le24(qos->ucast.out.interval, pdu->c_interval); + hci_cpu_to_le24(qos->ucast.in.interval, pdu->p_interval); + pdu->sca = qos->ucast.sca; + pdu->packing = qos->ucast.packing; + pdu->framing = qos->ucast.framing; + pdu->c_latency = cpu_to_le16(qos->ucast.out.latency); + pdu->p_latency = cpu_to_le16(qos->ucast.in.latency); /* Reprogram all CIS(s) with the same CIG, valid range are: * num_cis: 0x00 to 0x1F * cis_id: 0x00 to 0xEF */ for (cis_id = 0x00; cis_id < 0xf0 && - pdu.cp.num_cis < ARRAY_SIZE(pdu.cis); cis_id++) { + aux_num_cis < pdu->num_cis; cis_id++) { struct hci_cis_params *cis; conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, cig_id, cis_id); @@ -1758,7 +1784,7 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) qos = &conn->iso_qos; - cis = &pdu.cis[pdu.cp.num_cis++]; + cis = &pdu->cis[aux_num_cis++]; cis->cis_id = cis_id; cis->c_sdu = cpu_to_le16(conn->iso_qos.ucast.out.sdu); cis->p_sdu = cpu_to_le16(conn->iso_qos.ucast.in.sdu); @@ -1769,14 +1795,14 @@ static int set_cig_params_sync(struct hci_dev *hdev, void *data) cis->c_rtn = qos->ucast.out.rtn; cis->p_rtn = qos->ucast.in.rtn; } + pdu->num_cis = aux_num_cis; - if (!pdu.cp.num_cis) + if (!pdu->num_cis) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_CIG_PARAMS, - sizeof(pdu.cp) + - pdu.cp.num_cis * sizeof(pdu.cis[0]), &pdu, - HCI_CMD_TIMEOUT); + struct_size(pdu, cis, pdu->num_cis), + pdu, HCI_CMD_TIMEOUT); } static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) @@ -1847,8 +1873,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, qos->ucast.cis); if (!cis) { cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); - if (!cis) - return ERR_PTR(-ENOMEM); + if (IS_ERR(cis)) + return cis; cis->cleanup = cis_cleanup; cis->dst_type = dst_type; cis->iso_qos.ucast.cig = BT_ISO_QOS_CIG_UNSET; @@ -1983,14 +2009,8 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn, struct bt_iso_io_qos *qos, __u8 phy) { /* Only set MTU if PHY is enabled */ - if (!qos->sdu && qos->phy) { - if (hdev->iso_mtu > 0) - qos->sdu = hdev->iso_mtu; - else if (hdev->le_mtu > 0) - qos->sdu = hdev->le_mtu; - else - qos->sdu = hdev->acl_mtu; - } + if (!qos->sdu && qos->phy) + qos->sdu = conn->mtu; /* Use the same PHY as ACL if set to any */ if (qos->phy == BT_ISO_PHY_ANY) @@ -2071,8 +2091,8 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EBUSY); conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE); - if (!conn) - return ERR_PTR(-ENOMEM); + if (IS_ERR(conn)) + return conn; conn->iso_qos = *qos; conn->state = BT_LISTEN; @@ -2109,13 +2129,10 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, struct bt_iso_qos *qos, __u16 sync_handle, __u8 num_bis, __u8 bis[]) { - struct _packed { - struct hci_cp_le_big_create_sync cp; - __u8 bis[0x11]; - } pdu; + DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11); int err; - if (num_bis < 0x01 || num_bis > sizeof(pdu.bis)) + if (num_bis < 0x01 || num_bis > pdu->num_bis) return -EINVAL; err = qos_set_big(hdev, qos); @@ -2125,18 +2142,17 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, if (hcon) hcon->iso_qos.bcast.big = qos->bcast.big; - memset(&pdu, 0, sizeof(pdu)); - pdu.cp.handle = qos->bcast.big; - pdu.cp.sync_handle = cpu_to_le16(sync_handle); - pdu.cp.encryption = qos->bcast.encryption; - memcpy(pdu.cp.bcode, qos->bcast.bcode, sizeof(pdu.cp.bcode)); - pdu.cp.mse = qos->bcast.mse; - pdu.cp.timeout = cpu_to_le16(qos->bcast.timeout); - pdu.cp.num_bis = num_bis; - memcpy(pdu.bis, bis, num_bis); + pdu->handle = qos->bcast.big; + pdu->sync_handle = cpu_to_le16(sync_handle); + pdu->encryption = qos->bcast.encryption; + memcpy(pdu->bcode, qos->bcast.bcode, sizeof(pdu->bcode)); + pdu->mse = qos->bcast.mse; + pdu->timeout = cpu_to_le16(qos->bcast.timeout); + pdu->num_bis = num_bis; + memcpy(pdu->bis, bis, num_bis); return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, - sizeof(pdu.cp) + num_bis, &pdu); + struct_size(pdu, bis, num_bis), pdu); } static void create_big_complete(struct hci_dev *hdev, void *data, int err) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a7028d38c1f5..dd3b0f501018 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -149,8 +149,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) { int old_state = hdev->discovery.state; - BT_DBG("%s state %u -> %u", hdev->name, hdev->discovery.state, state); - if (old_state == state) return; @@ -166,6 +164,13 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) case DISCOVERY_STARTING: break; case DISCOVERY_FINDING: + /* If discovery was not started then it was initiated by the + * MGMT interface so no MGMT event shall be generated either + */ + if (old_state != DISCOVERY_STARTING) { + hdev->discovery.state = old_state; + return; + } mgmt_discovering(hdev, 1); break; case DISCOVERY_RESOLVING: @@ -173,6 +178,8 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) case DISCOVERY_STOPPING: break; } + + bt_dev_dbg(hdev, "state %u -> %u", old_state, state); } void hci_inquiry_cache_flush(struct hci_dev *hdev) @@ -395,11 +402,6 @@ int hci_inquiry(void __user *arg) goto done; } - if (hdev->dev_type != HCI_PRIMARY) { - err = -EOPNOTSUPP; - goto done; - } - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; @@ -752,11 +754,6 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) goto done; } - if (hdev->dev_type != HCI_PRIMARY) { - err = -EOPNOTSUPP; - goto done; - } - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; @@ -910,7 +907,7 @@ int hci_get_dev_info(void __user *arg) strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; - di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); + di.type = (hdev->bus & 0x0f); di.flags = flags; di.pkt_type = hdev->pkt_type; if (lmp_bredr_capable(hdev)) { @@ -1026,8 +1023,7 @@ static void hci_power_on(struct work_struct *work) */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || - (hdev->dev_type == HCI_PRIMARY && - !bacmp(&hdev->bdaddr, BDADDR_ANY) && + (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_do_close(hdev); @@ -1769,6 +1765,15 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, adv->pending = true; adv->instance = instance; + + /* If controller support only one set and the instance is set to + * 1 then there is no option other than using handle 0x00. + */ + if (hdev->le_num_of_adv_sets == 1 && instance == 1) + adv->handle = 0x00; + else + adv->handle = instance; + list_add(&adv->list, &hdev->adv_instances); hdev->adv_instance_cnt++; } @@ -2523,16 +2528,16 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) hdev->le_adv_channel_map = 0x07; hdev->le_adv_min_interval = 0x0800; hdev->le_adv_max_interval = 0x0800; - hdev->le_scan_interval = 0x0060; - hdev->le_scan_window = 0x0030; - hdev->le_scan_int_suspend = 0x0400; - hdev->le_scan_window_suspend = 0x0012; + hdev->le_scan_interval = DISCOV_LE_SCAN_INT_FAST; + hdev->le_scan_window = DISCOV_LE_SCAN_WIN_FAST; + hdev->le_scan_int_suspend = DISCOV_LE_SCAN_INT_SLOW1; + hdev->le_scan_window_suspend = DISCOV_LE_SCAN_WIN_SLOW1; hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT; hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN; - hdev->le_scan_int_adv_monitor = 0x0060; - hdev->le_scan_window_adv_monitor = 0x0030; - hdev->le_scan_int_connect = 0x0060; - hdev->le_scan_window_connect = 0x0060; + hdev->le_scan_int_adv_monitor = DISCOV_LE_SCAN_INT_FAST; + hdev->le_scan_window_adv_monitor = DISCOV_LE_SCAN_WIN_FAST; + hdev->le_scan_int_connect = DISCOV_LE_SCAN_INT_CONN; + hdev->le_scan_window_connect = DISCOV_LE_SCAN_WIN_CONN; hdev->le_conn_min_interval = 0x0018; hdev->le_conn_max_interval = 0x0028; hdev->le_conn_latency = 0x0000; @@ -2549,7 +2554,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES; hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION; - hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT; + hdev->def_le_autoconnect_timeout = HCI_LE_CONN_TIMEOUT; hdev->min_le_tx_power = HCI_TX_POWER_INVALID; hdev->max_le_tx_power = HCI_TX_POWER_INVALID; @@ -2635,21 +2640,7 @@ int hci_register_dev(struct hci_dev *hdev) if (!hdev->open || !hdev->close || !hdev->send) return -EINVAL; - /* Do not allow HCI_AMP devices to register at index 0, - * so the index can be used as the AMP controller ID. - */ - switch (hdev->dev_type) { - case HCI_PRIMARY: - id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); - break; - case HCI_AMP: - id = ida_alloc_range(&hci_index_ida, 1, HCI_MAX_ID - 1, - GFP_KERNEL); - break; - default: - return -EINVAL; - } - + id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); if (id < 0) return id; @@ -2701,12 +2692,10 @@ int hci_register_dev(struct hci_dev *hdev) hci_dev_set_flag(hdev, HCI_SETUP); hci_dev_set_flag(hdev, HCI_AUTO_OFF); - if (hdev->dev_type == HCI_PRIMARY) { - /* Assume BR/EDR support until proven otherwise (such as - * through reading supported features during init. - */ - hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); - } + /* Assume BR/EDR support until proven otherwise (such as + * through reading supported features during init. + */ + hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); write_lock(&hci_dev_list_lock); list_add(&hdev->list, &hci_dev_list); @@ -2768,8 +2757,6 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_unregister_suspend_notifier(hdev); - msft_unregister(hdev); - hci_dev_do_close(hdev); if (!test_bit(HCI_INIT, &hdev->flags) && @@ -2823,6 +2810,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_discovery_filter_clear(hdev); hci_blocked_keys_clear(hdev); hci_codec_list_clear(&hdev->local_codecs); + msft_release(hdev); hci_dev_unlock(hdev); ida_destroy(&hdev->unset_handle_ida); @@ -3243,17 +3231,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; - switch (hdev->dev_type) { - case HCI_PRIMARY: - hci_add_acl_hdr(skb, conn->handle, flags); - break; - case HCI_AMP: - hci_add_acl_hdr(skb, chan->handle, flags); - break; - default: - bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); - return; - } + hci_add_acl_hdr(skb, conn->handle, flags); list = skb_shinfo(skb)->frag_list; if (!list) { @@ -3413,9 +3391,6 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) case ACL_LINK: cnt = hdev->acl_cnt; break; - case AMP_LINK: - cnt = hdev->block_cnt; - break; case SCO_LINK: case ESCO_LINK: cnt = hdev->sco_cnt; @@ -3613,12 +3588,6 @@ static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type) } -static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) -{ - /* Calculate count of blocks used by this packet */ - return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len); -} - static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type) { unsigned long last_tx; @@ -3732,81 +3701,15 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) hci_prio_recalculate(hdev, ACL_LINK); } -static void hci_sched_acl_blk(struct hci_dev *hdev) -{ - unsigned int cnt = hdev->block_cnt; - struct hci_chan *chan; - struct sk_buff *skb; - int quote; - u8 type; - - BT_DBG("%s", hdev->name); - - if (hdev->dev_type == HCI_AMP) - type = AMP_LINK; - else - type = ACL_LINK; - - __check_timeout(hdev, cnt, type); - - while (hdev->block_cnt > 0 && - (chan = hci_chan_sent(hdev, type, "e))) { - u32 priority = (skb_peek(&chan->data_q))->priority; - while (quote > 0 && (skb = skb_peek(&chan->data_q))) { - int blocks; - - BT_DBG("chan %p skb %p len %d priority %u", chan, skb, - skb->len, skb->priority); - - /* Stop if priority has changed */ - if (skb->priority < priority) - break; - - skb = skb_dequeue(&chan->data_q); - - blocks = __get_blocks(hdev, skb); - if (blocks > hdev->block_cnt) - return; - - hci_conn_enter_active_mode(chan->conn, - bt_cb(skb)->force_active); - - hci_send_frame(hdev, skb); - hdev->acl_last_tx = jiffies; - - hdev->block_cnt -= blocks; - quote -= blocks; - - chan->sent += blocks; - chan->conn->sent += blocks; - } - } - - if (cnt != hdev->block_cnt) - hci_prio_recalculate(hdev, type); -} - static void hci_sched_acl(struct hci_dev *hdev) { BT_DBG("%s", hdev->name); /* No ACL link over BR/EDR controller */ - if (!hci_conn_num(hdev, ACL_LINK) && hdev->dev_type == HCI_PRIMARY) - return; - - /* No AMP link over AMP controller */ - if (!hci_conn_num(hdev, AMP_LINK) && hdev->dev_type == HCI_AMP) + if (!hci_conn_num(hdev, ACL_LINK)) return; - switch (hdev->flow_ctl_mode) { - case HCI_FLOW_CTL_MODE_PACKET_BASED: - hci_sched_acl_pkt(hdev); - break; - - case HCI_FLOW_CTL_MODE_BLOCK_BASED: - hci_sched_acl_blk(hdev); - break; - } + hci_sched_acl_pkt(hdev); } static void hci_sched_le(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4a27e4a17a67..a487f9df8145 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1,7 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. - Copyright 2023 NXP + Copyright 2023-2024 NXP Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -913,21 +913,6 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_read_flow_control_mode(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_flow_control_mode *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->flow_ctl_mode = rp->mode; - - return rp->status; -} - static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -954,6 +939,9 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data, BT_DBG("%s acl mtu %d:%d sco mtu %d:%d", hdev->name, hdev->acl_mtu, hdev->acl_pkts, hdev->sco_mtu, hdev->sco_pkts); + if (!hdev->acl_mtu || !hdev->acl_pkts) + return HCI_ERROR_INVALID_PARAMETERS; + return rp->status; } @@ -1068,28 +1056,6 @@ static u8 hci_cc_write_page_scan_type(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_read_data_block_size(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_data_block_size *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->block_mtu = __le16_to_cpu(rp->max_acl_len); - hdev->block_len = __le16_to_cpu(rp->block_len); - hdev->num_blocks = __le16_to_cpu(rp->num_blocks); - - hdev->block_cnt = hdev->num_blocks; - - BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, - hdev->block_cnt, hdev->block_len); - - return rp->status; -} - static u8 hci_cc_read_clock(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -1124,30 +1090,6 @@ unlock: return rp->status; } -static u8 hci_cc_read_local_amp_info(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_read_local_amp_info *rp = data; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - hdev->amp_status = rp->amp_status; - hdev->amp_total_bw = __le32_to_cpu(rp->total_bw); - hdev->amp_max_bw = __le32_to_cpu(rp->max_bw); - hdev->amp_min_latency = __le32_to_cpu(rp->min_latency); - hdev->amp_max_pdu = __le32_to_cpu(rp->max_pdu); - hdev->amp_type = rp->amp_type; - hdev->amp_pal_cap = __le16_to_cpu(rp->pal_cap); - hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size); - hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); - hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); - - return rp->status; -} - static u8 hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -1263,6 +1205,9 @@ static u8 hci_cc_le_read_buffer_size(struct hci_dev *hdev, void *data, BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); + if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU) + return HCI_ERROR_INVALID_PARAMETERS; + return rp->status; } @@ -1779,8 +1724,7 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) hci_dev_set_flag(hdev, HCI_LE_SCAN); if (hdev->le_scan_type == LE_SCAN_ACTIVE) clear_pending_adv_report(hdev); - if (hci_dev_test_flag(hdev, HCI_MESH)) - hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); break; case LE_SCAN_DISABLE: @@ -2342,8 +2286,8 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) if (!conn) { conn = hci_conn_add_unset(hdev, ACL_LINK, &cp->bdaddr, HCI_ROLE_MASTER); - if (!conn) - bt_dev_err(hdev, "no memory for new connection"); + if (IS_ERR(conn)) + bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn)); } } @@ -3154,8 +3098,8 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, BDADDR_BREDR)) { conn = hci_conn_add_unset(hdev, ev->link_type, &ev->bdaddr, HCI_ROLE_SLAVE); - if (!conn) { - bt_dev_err(hdev, "no memory for new conn"); + if (IS_ERR(conn)) { + bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn)); goto unlock; } } else { @@ -3343,8 +3287,8 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, if (!conn) { conn = hci_conn_add_unset(hdev, ev->link_type, &ev->bdaddr, HCI_ROLE_SLAVE); - if (!conn) { - bt_dev_err(hdev, "no memory for new connection"); + if (IS_ERR(conn)) { + bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn)); goto unlock; } } @@ -3821,6 +3765,9 @@ static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data, BT_DBG("%s acl mtu %d:%d iso mtu %d:%d", hdev->name, hdev->acl_mtu, hdev->acl_pkts, hdev->iso_mtu, hdev->iso_pkts); + if (hdev->le_mtu && hdev->le_mtu < HCI_MIN_LE_MTU) + return HCI_ERROR_INVALID_PARAMETERS; + return rp->status; } @@ -4112,12 +4059,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_READ_PAGE_SCAN_TYPE, hci_cc_read_page_scan_type, sizeof(struct hci_rp_read_page_scan_type)), HCI_CC_STATUS(HCI_OP_WRITE_PAGE_SCAN_TYPE, hci_cc_write_page_scan_type), - HCI_CC(HCI_OP_READ_DATA_BLOCK_SIZE, hci_cc_read_data_block_size, - sizeof(struct hci_rp_read_data_block_size)), - HCI_CC(HCI_OP_READ_FLOW_CONTROL_MODE, hci_cc_read_flow_control_mode, - sizeof(struct hci_rp_read_flow_control_mode)), - HCI_CC(HCI_OP_READ_LOCAL_AMP_INFO, hci_cc_read_local_amp_info, - sizeof(struct hci_rp_read_local_amp_info)), HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock, sizeof(struct hci_rp_read_clock)), HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size, @@ -4308,7 +4249,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status) hci_dev_lock(hdev); /* Remove connection if command failed */ - for (i = 0; cp->num_cis; cp->num_cis--, i++) { + for (i = 0; i < cp->num_cis; i++) { struct hci_conn *conn; u16 handle; @@ -4324,6 +4265,7 @@ static void hci_cs_le_create_cis(struct hci_dev *hdev, u8 status) hci_conn_del(conn); } } + cp->num_cis = 0; if (pending) hci_le_create_cis_pending(hdev); @@ -4452,11 +4394,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, flex_array_size(ev, handles, ev->num))) return; - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { - bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); - return; - } - bt_dev_dbg(hdev, "num %d", ev->num); for (i = 0; i < ev->num; i++) { @@ -4524,78 +4461,6 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, queue_work(hdev->workqueue, &hdev->tx_work); } -static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev, - __u16 handle) -{ - struct hci_chan *chan; - - switch (hdev->dev_type) { - case HCI_PRIMARY: - return hci_conn_hash_lookup_handle(hdev, handle); - case HCI_AMP: - chan = hci_chan_lookup_handle(hdev, handle); - if (chan) - return chan->conn; - break; - default: - bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); - break; - } - - return NULL; -} - -static void hci_num_comp_blocks_evt(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_ev_num_comp_blocks *ev = data; - int i; - - if (!hci_ev_skb_pull(hdev, skb, HCI_EV_NUM_COMP_BLOCKS, - flex_array_size(ev, handles, ev->num_hndl))) - return; - - if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { - bt_dev_err(hdev, "wrong event for mode %d", - hdev->flow_ctl_mode); - return; - } - - bt_dev_dbg(hdev, "num_blocks %d num_hndl %d", ev->num_blocks, - ev->num_hndl); - - for (i = 0; i < ev->num_hndl; i++) { - struct hci_comp_blocks_info *info = &ev->handles[i]; - struct hci_conn *conn = NULL; - __u16 handle, block_count; - - handle = __le16_to_cpu(info->handle); - block_count = __le16_to_cpu(info->blocks); - - conn = __hci_conn_lookup_handle(hdev, handle); - if (!conn) - continue; - - conn->sent -= block_count; - - switch (conn->type) { - case ACL_LINK: - case AMP_LINK: - hdev->block_cnt += block_count; - if (hdev->block_cnt > hdev->num_blocks) - hdev->block_cnt = hdev->num_blocks; - break; - - default: - bt_dev_err(hdev, "unknown type %d conn %p", - conn->type, conn); - break; - } - } - - queue_work(hdev->workqueue, &hdev->tx_work); -} - static void hci_mode_change_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -5768,8 +5633,8 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, goto unlock; conn = hci_conn_add_unset(hdev, LE_LINK, bdaddr, role); - if (!conn) { - bt_dev_err(hdev, "no memory for new connection"); + if (IS_ERR(conn)) { + bt_dev_err(hdev, "connection err: %ld", PTR_ERR(conn)); goto unlock; } @@ -6493,14 +6358,16 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, if (!(flags & HCI_PROTO_DEFER)) goto unlock; - if (ev->status) { - /* Add connection to indicate the failed PA sync event */ - pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); + /* Add connection to indicate PA sync event */ + pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); - if (!pa_sync) - goto unlock; + if (IS_ERR(pa_sync)) + goto unlock; + + pa_sync->sync_handle = le16_to_cpu(ev->handle); + if (ev->status) { set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags); /* Notify iso layer */ @@ -6517,6 +6384,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data, struct hci_ev_le_per_adv_report *ev = data; int mask = hdev->link_mode; __u8 flags = 0; + struct hci_conn *pa_sync; bt_dev_dbg(hdev, "sync_handle 0x%4.4x", le16_to_cpu(ev->sync_handle)); @@ -6524,8 +6392,28 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data, mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) - hci_le_pa_term_sync(hdev, ev->sync_handle); + goto unlock; + + if (!(flags & HCI_PROTO_DEFER)) + goto unlock; + + pa_sync = hci_conn_hash_lookup_pa_sync_handle + (hdev, + le16_to_cpu(ev->sync_handle)); + + if (!pa_sync) + goto unlock; + + if (ev->data_status == LE_PA_DATA_COMPLETE && + !test_and_set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags)) { + /* Notify iso layer */ + hci_connect_cfm(pa_sync, 0); + /* Notify MGMT layer */ + mgmt_device_connected(hdev, pa_sync, NULL, 0); + } + +unlock: hci_dev_unlock(hdev); } @@ -6898,7 +6786,7 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, if (!cis) { cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE, cis_handle); - if (!cis) { + if (IS_ERR(cis)) { hci_le_reject_cis(hdev, ev->cis_handle); goto unlock; } @@ -7007,7 +6895,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, if (!bis) { bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, HCI_ROLE_SLAVE, handle); - if (!bis) + if (IS_ERR(bis)) continue; } @@ -7037,6 +6925,8 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, u16 handle = le16_to_cpu(ev->bis[i]); bis = hci_conn_hash_lookup_handle(hdev, handle); + if (!bis) + continue; set_bit(HCI_CONN_BIG_SYNC_FAILED, &bis->flags); hci_connect_cfm(bis, ev->status); @@ -7058,10 +6948,8 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); - if (!(mask & HCI_LM_ACCEPT)) { - hci_le_pa_term_sync(hdev, ev->sync_handle); + if (!(mask & HCI_LM_ACCEPT)) goto unlock; - } if (!(flags & HCI_PROTO_DEFER)) goto unlock; @@ -7070,24 +6958,11 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, (hdev, le16_to_cpu(ev->sync_handle)); - if (pa_sync) - goto unlock; - - /* Add connection to indicate the PA sync event */ - pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, - HCI_ROLE_SLAVE); - if (!pa_sync) goto unlock; - pa_sync->sync_handle = le16_to_cpu(ev->sync_handle); - set_bit(HCI_CONN_PA_SYNC, &pa_sync->flags); - /* Notify iso layer */ - hci_connect_cfm(pa_sync, 0x00); - - /* Notify MGMT layer */ - mgmt_device_connected(hdev, pa_sync, NULL, 0); + hci_connect_cfm(pa_sync, 0); unlock: hci_dev_unlock(hdev); @@ -7501,9 +7376,6 @@ static const struct hci_ev { /* [0x3e = HCI_EV_LE_META] */ HCI_EV_REQ_VL(HCI_EV_LE_META, hci_le_meta_evt, sizeof(struct hci_ev_le_meta), HCI_MAX_EVENT_SIZE), - /* [0x48 = HCI_EV_NUM_COMP_BLOCKS] */ - HCI_EV(HCI_EV_NUM_COMP_BLOCKS, hci_num_comp_blocks_evt, - sizeof(struct hci_ev_num_comp_blocks)), /* [0xff = HCI_EV_VENDOR] */ HCI_EV_VL(HCI_EV_VENDOR, msft_vendor_evt, 0, HCI_MAX_EVENT_SIZE), }; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 0be75cf0efed..c91f2838f542 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -29,10 +29,6 @@ #define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) #define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) -#define HCI_REQ_DONE 0 -#define HCI_REQ_PEND 1 -#define HCI_REQ_CANCELED 2 - struct hci_request { struct hci_dev *hdev; struct sk_buff_head cmd_q; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 703b84bd48d5..69c2ba1e843e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -485,7 +485,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return NULL; ni = skb_put(skb, HCI_MON_NEW_INDEX_SIZE); - ni->type = hdev->dev_type; + ni->type = 0x00; /* Old hdev->dev_type */ ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, @@ -1007,9 +1007,6 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return -EOPNOTSUPP; - if (hdev->dev_type != HCI_PRIMARY) - return -EOPNOTSUPP; - switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 4c707eb64e6f..16daa79b7981 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1043,11 +1043,10 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) struct hci_cp_ext_adv_set *set; u8 data[sizeof(*cp) + sizeof(*set) * 1]; u8 size; + struct adv_info *adv = NULL; /* If request specifies an instance that doesn't exist, fail */ if (instance > 0) { - struct adv_info *adv; - adv = hci_find_adv_instance(hdev, instance); if (!adv) return -EINVAL; @@ -1066,7 +1065,7 @@ static int hci_disable_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp->num_of_sets = !!instance; cp->enable = 0x00; - set->handle = instance; + set->handle = adv ? adv->handle : instance; size = sizeof(*cp) + sizeof(*set) * cp->num_of_sets; @@ -1235,31 +1234,27 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) { - struct { - struct hci_cp_le_set_ext_scan_rsp_data cp; - u8 data[HCI_MAX_EXT_AD_LENGTH]; - } pdu; + DEFINE_FLEX(struct hci_cp_le_set_ext_scan_rsp_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); u8 len; struct adv_info *adv = NULL; int err; - memset(&pdu, 0, sizeof(pdu)); - if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->scan_rsp_changed) return 0; } - len = eir_create_scan_rsp(hdev, instance, pdu.data); + len = eir_create_scan_rsp(hdev, instance, pdu->data); - pdu.cp.handle = instance; - pdu.cp.length = len; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; + pdu->handle = adv ? adv->handle : instance; + pdu->length = len; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, - sizeof(pdu.cp) + len, &pdu.cp, + struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); if (err) return err; @@ -1267,7 +1262,7 @@ static int hci_set_ext_scan_rsp_data_sync(struct hci_dev *hdev, u8 instance) if (adv) { adv->scan_rsp_changed = false; } else { - memcpy(hdev->scan_rsp_data, pdu.data, len); + memcpy(hdev->scan_rsp_data, pdu->data, len); hdev->scan_rsp_data_len = len; } @@ -1335,7 +1330,7 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance) memset(set, 0, sizeof(*set)); - set->handle = instance; + set->handle = adv ? adv->handle : instance; /* Set duration per instance since controller is responsible for * scheduling it. @@ -1411,29 +1406,25 @@ static int hci_set_per_adv_params_sync(struct hci_dev *hdev, u8 instance, static int hci_set_per_adv_data_sync(struct hci_dev *hdev, u8 instance) { - struct { - struct hci_cp_le_set_per_adv_data cp; - u8 data[HCI_MAX_PER_AD_LENGTH]; - } pdu; + DEFINE_FLEX(struct hci_cp_le_set_per_adv_data, pdu, data, length, + HCI_MAX_PER_AD_LENGTH); u8 len; - - memset(&pdu, 0, sizeof(pdu)); + struct adv_info *adv = NULL; if (instance) { - struct adv_info *adv = hci_find_adv_instance(hdev, instance); - + adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->periodic) return 0; } - len = eir_create_per_adv_data(hdev, instance, pdu.data); + len = eir_create_per_adv_data(hdev, instance, pdu->data); - pdu.cp.length = len; - pdu.cp.handle = instance; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PER_ADV_DATA, - sizeof(pdu.cp) + len, &pdu, + struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); } @@ -1727,31 +1718,27 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) { - struct { - struct hci_cp_le_set_ext_adv_data cp; - u8 data[HCI_MAX_EXT_AD_LENGTH]; - } pdu; + DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); u8 len; struct adv_info *adv = NULL; int err; - memset(&pdu, 0, sizeof(pdu)); - if (instance) { adv = hci_find_adv_instance(hdev, instance); if (!adv || !adv->adv_data_changed) return 0; } - len = eir_create_adv_data(hdev, instance, pdu.data); + len = eir_create_adv_data(hdev, instance, pdu->data); - pdu.cp.length = len; - pdu.cp.handle = instance; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - sizeof(pdu.cp) + len, &pdu.cp, + struct_size(pdu, data, len), pdu, HCI_CMD_TIMEOUT); if (err) return err; @@ -1760,7 +1747,7 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) if (adv) { adv->adv_data_changed = false; } else { - memcpy(hdev->adv_data, pdu.data, len); + memcpy(hdev->adv_data, pdu->data, len); hdev->adv_data_len = len; } @@ -3523,10 +3510,6 @@ static int hci_unconf_init_sync(struct hci_dev *hdev) /* Read Local Supported Features. */ static int hci_read_local_features_sync(struct hci_dev *hdev) { - /* Not all AMP controllers support this command */ - if (hdev->dev_type == HCI_AMP && !(hdev->commands[14] & 0x20)) - return 0; - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL, HCI_CMD_TIMEOUT); } @@ -3561,51 +3544,6 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev) return 0; } -/* Read Local AMP Info */ -static int hci_read_local_amp_info_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_AMP_INFO, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Data Blk size */ -static int hci_read_data_block_size_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Flow Control Mode */ -static int hci_read_flow_control_mode_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* Read Location Data */ -static int hci_read_location_data_sync(struct hci_dev *hdev) -{ - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCATION_DATA, - 0, NULL, HCI_CMD_TIMEOUT); -} - -/* AMP Controller init stage 1 command sequence */ -static const struct hci_init_stage amp_init1[] = { - /* HCI_OP_READ_LOCAL_VERSION */ - HCI_INIT(hci_read_local_version_sync), - /* HCI_OP_READ_LOCAL_COMMANDS */ - HCI_INIT(hci_read_local_cmds_sync), - /* HCI_OP_READ_LOCAL_AMP_INFO */ - HCI_INIT(hci_read_local_amp_info_sync), - /* HCI_OP_READ_DATA_BLOCK_SIZE */ - HCI_INIT(hci_read_data_block_size_sync), - /* HCI_OP_READ_FLOW_CONTROL_MODE */ - HCI_INIT(hci_read_flow_control_mode_sync), - /* HCI_OP_READ_LOCATION_DATA */ - HCI_INIT(hci_read_location_data_sync), - {} -}; - static int hci_init1_sync(struct hci_dev *hdev) { int err; @@ -3619,28 +3557,9 @@ static int hci_init1_sync(struct hci_dev *hdev) return err; } - switch (hdev->dev_type) { - case HCI_PRIMARY: - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; - return hci_init_stage_sync(hdev, br_init1); - case HCI_AMP: - hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; - return hci_init_stage_sync(hdev, amp_init1); - default: - bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type); - break; - } - - return 0; + return hci_init_stage_sync(hdev, br_init1); } -/* AMP Controller init stage 2 command sequence */ -static const struct hci_init_stage amp_init2[] = { - /* HCI_OP_READ_LOCAL_FEATURES */ - HCI_INIT(hci_read_local_features_sync), - {} -}; - /* Read Buffer Size (ACL mtu, max pkt, etc.) */ static int hci_read_buffer_size_sync(struct hci_dev *hdev) { @@ -3898,9 +3817,6 @@ static int hci_init2_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); - if (hdev->dev_type == HCI_AMP) - return hci_init_stage_sync(hdev, amp_init2); - err = hci_init_stage_sync(hdev, hci_init2); if (err) return err; @@ -4728,13 +4644,6 @@ static int hci_init_sync(struct hci_dev *hdev) if (err < 0) return err; - /* HCI_PRIMARY covers both single-mode LE, BR/EDR and dual-mode - * BR/EDR/LE type controllers. AMP controllers only need the - * first two stages of init. - */ - if (hdev->dev_type != HCI_PRIMARY) - return 0; - err = hci_init3_sync(hdev); if (err < 0) return err; @@ -4963,12 +4872,8 @@ int hci_dev_open_sync(struct hci_dev *hdev) * In case of user channel usage, it is not important * if a public address or static random address is * available. - * - * This check is only valid for BR/EDR controllers - * since AMP controllers do not have an address. */ if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - hdev->dev_type == HCI_PRIMARY && !bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY)) { ret = -EADDRNOTAVAIL; @@ -5003,8 +4908,7 @@ int hci_dev_open_sync(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - hci_dev_test_flag(hdev, HCI_MGMT) && - hdev->dev_type == HCI_PRIMARY) { + hci_dev_test_flag(hdev, HCI_MGMT)) { ret = hci_powered_update_sync(hdev); mgmt_power_on(hdev, ret); } @@ -5149,8 +5053,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); - if (!auto_off && hdev->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + if (!auto_off && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); @@ -5212,9 +5115,6 @@ int hci_dev_close_sync(struct hci_dev *hdev) hdev->flags &= BIT(HCI_RAW); hci_dev_clear_volatile_flags(hdev); - /* Controller radio is available but is currently powered down */ - hdev->amp_status = AMP_STATUS_POWERED_DOWN; - memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); bacpy(&hdev->random_addr, BDADDR_ANY); @@ -5251,8 +5151,7 @@ static int hci_power_on_sync(struct hci_dev *hdev) */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || - (hdev->dev_type == HCI_PRIMARY && - !bacmp(&hdev->bdaddr, BDADDR_ANY) && + (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_close_sync(hdev); @@ -5354,27 +5253,11 @@ int hci_stop_discovery_sync(struct hci_dev *hdev) return 0; } -static int hci_disconnect_phy_link_sync(struct hci_dev *hdev, u16 handle, - u8 reason) -{ - struct hci_cp_disconn_phy_link cp; - - memset(&cp, 0, sizeof(cp)); - cp.phy_handle = HCI_PHY_HANDLE(handle); - cp.reason = reason; - - return __hci_cmd_sync_status(hdev, HCI_OP_DISCONN_PHY_LINK, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { struct hci_cp_disconnect cp; - if (conn->type == AMP_LINK) - return hci_disconnect_phy_link_sync(hdev, conn->handle, reason); - if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. @@ -6493,10 +6376,8 @@ done: int hci_le_create_cis_sync(struct hci_dev *hdev) { - struct { - struct hci_cp_le_create_cis cp; - struct hci_cis cis[0x1f]; - } cmd; + DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f); + size_t aux_num_cis = 0; struct hci_conn *conn; u8 cig = BT_ISO_QOS_CIG_UNSET; @@ -6523,8 +6404,6 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) * remains pending. */ - memset(&cmd, 0, sizeof(cmd)); - hci_dev_lock(hdev); rcu_read_lock(); @@ -6561,7 +6440,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) goto done; list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - struct hci_cis *cis = &cmd.cis[cmd.cp.num_cis]; + struct hci_cis *cis = &cmd->cis[aux_num_cis]; if (hci_conn_check_create_cis(conn) || conn->iso_qos.ucast.cig != cig) @@ -6570,25 +6449,25 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) set_bit(HCI_CONN_CREATE_CIS, &conn->flags); cis->acl_handle = cpu_to_le16(conn->parent->handle); cis->cis_handle = cpu_to_le16(conn->handle); - cmd.cp.num_cis++; + aux_num_cis++; - if (cmd.cp.num_cis >= ARRAY_SIZE(cmd.cis)) + if (aux_num_cis >= cmd->num_cis) break; } + cmd->num_cis = aux_num_cis; done: rcu_read_unlock(); hci_dev_unlock(hdev); - if (!cmd.cp.num_cis) + if (!aux_num_cis) return 0; /* Wait for HCI_LE_CIS_Established */ return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS, - sizeof(cmd.cp) + sizeof(cmd.cis[0]) * - cmd.cp.num_cis, &cmd, - HCI_EVT_LE_CIS_ESTABLISHED, + struct_size(cmd, cis, cmd->num_cis), + cmd, HCI_EVT_LE_CIS_ESTABLISHED, conn->conn_timeout, NULL); } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index ef0cc80b4c0c..cc055b952ce6 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -54,7 +54,6 @@ static void iso_sock_kill(struct sock *sk); enum { BT_SK_BIG_SYNC, BT_SK_PA_SYNC, - BT_SK_PA_SYNC_TERM, }; struct iso_pinfo { @@ -81,12 +80,14 @@ static bool check_ucast_qos(struct bt_iso_qos *qos); static bool check_bcast_qos(struct bt_iso_qos *qos); static bool iso_match_sid(struct sock *sk, void *data); static bool iso_match_sync_handle(struct sock *sk, void *data); +static bool iso_match_sync_handle_pa_report(struct sock *sk, void *data); static void iso_sock_disconn(struct sock *sk); typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); -static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, - iso_sock_match_t match, void *data); +static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst, + enum bt_sock_state state, + iso_sock_match_t match, void *data); /* ---- ISO timers ---- */ #define ISO_CONN_TIMEOUT (HZ * 40) @@ -196,21 +197,10 @@ static void iso_chan_del(struct sock *sk, int err) sock_set_flag(sk, SOCK_ZAPPED); } -static bool iso_match_conn_sync_handle(struct sock *sk, void *data) -{ - struct hci_conn *hcon = data; - - if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) - return false; - - return hcon->sync_handle == iso_pi(sk)->sync_handle; -} - static void iso_conn_del(struct hci_conn *hcon, int err) { struct iso_conn *conn = hcon->iso_data; struct sock *sk; - struct sock *parent; if (!conn) return; @@ -226,25 +216,6 @@ static void iso_conn_del(struct hci_conn *hcon, int err) if (sk) { lock_sock(sk); - - /* While a PA sync hcon is in the process of closing, - * mark parent socket with a flag, so that any residual - * BIGInfo adv reports that arrive before PA sync is - * terminated are not processed anymore. - */ - if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_conn_sync_handle, - hcon); - - if (parent) { - set_bit(BT_SK_PA_SYNC_TERM, - &iso_pi(parent)->flags); - sock_put(parent); - } - } - iso_sock_clear_timer(sk); iso_chan_del(sk, err); release_sock(sk); @@ -581,22 +552,23 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc, return NULL; } -/* Find socket listening: +/* Find socket in given state: * source bdaddr (Unicast) * destination bdaddr (Broadcast only) * match func - pass NULL to ignore * match func data - pass -1 to ignore * Returns closest match. */ -static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, - iso_sock_match_t match, void *data) +static struct sock *iso_get_sock(bdaddr_t *src, bdaddr_t *dst, + enum bt_sock_state state, + iso_sock_match_t match, void *data) { struct sock *sk = NULL, *sk1 = NULL; read_lock(&iso_sk_list.lock); sk_for_each(sk, &iso_sk_list.head) { - if (sk->sk_state != BT_LISTEN) + if (sk->sk_state != state) continue; /* Match Broadcast destination */ @@ -857,6 +829,7 @@ static struct sock *iso_sock_alloc(struct net *net, struct socket *sock, iso_pi(sk)->src_type = BDADDR_LE_PUBLIC; iso_pi(sk)->qos = default_qos; + iso_pi(sk)->sync_handle = -1; bt_sock_link(&iso_sk_list, sk); return sk; @@ -904,7 +877,6 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, return -EINVAL; iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type; - iso_pi(sk)->sync_handle = -1; if (sa->iso_bc->bc_sid > 0x0f) return -EINVAL; @@ -981,7 +953,8 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr, /* Allow the user to bind a PA sync socket to a number * of BISes to sync to. */ - if (sk->sk_state == BT_CONNECT2 && + if ((sk->sk_state == BT_CONNECT2 || + sk->sk_state == BT_CONNECTED) && test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { err = iso_sock_bind_pa_sk(sk, sa, addr_len); goto done; @@ -1186,7 +1159,7 @@ done: } static int iso_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; @@ -1195,7 +1168,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, lock_sock(sk); - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); @@ -1285,7 +1258,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg, return -ENOTCONN; } - mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu; + mtu = iso_pi(sk)->conn->hcon->mtu; release_sock(sk); @@ -1393,6 +1366,16 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, } release_sock(sk); return 0; + case BT_CONNECTED: + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + iso_conn_big_sync(sk); + sk->sk_state = BT_LISTEN; + release_sock(sk); + return 0; + } + + release_sock(sk); + break; case BT_CONNECT: release_sock(sk); return iso_connect_cis(sk); @@ -1538,7 +1521,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname, case BT_ISO_QOS: if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND && - sk->sk_state != BT_CONNECT2) { + sk->sk_state != BT_CONNECT2 && + (!test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags) || + sk->sk_state != BT_CONNECTED)) { err = -EINVAL; break; } @@ -1759,7 +1744,7 @@ static void iso_conn_ready(struct iso_conn *conn) struct sock *sk = conn->sk; struct hci_ev_le_big_sync_estabilished *ev = NULL; struct hci_ev_le_pa_sync_established *ev2 = NULL; - struct hci_evt_le_big_info_adv_report *ev3 = NULL; + struct hci_ev_le_per_adv_report *ev3 = NULL; struct hci_conn *hcon; BT_DBG("conn %p", conn); @@ -1777,32 +1762,37 @@ static void iso_conn_ready(struct iso_conn *conn) HCI_EVT_LE_BIG_SYNC_ESTABILISHED); /* Get reference to PA sync parent socket, if it exists */ - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_pa_sync_flag, NULL); + parent = iso_get_sock(&hcon->src, &hcon->dst, + BT_LISTEN, + iso_match_pa_sync_flag, + NULL); if (!parent && ev) - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_big, ev); + parent = iso_get_sock(&hcon->src, + &hcon->dst, + BT_LISTEN, + iso_match_big, ev); } else if (test_bit(HCI_CONN_PA_SYNC_FAILED, &hcon->flags)) { ev2 = hci_recv_event_data(hcon->hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED); if (ev2) - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_sid, ev2); + parent = iso_get_sock(&hcon->src, + &hcon->dst, + BT_LISTEN, + iso_match_sid, ev2); } else if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) { ev3 = hci_recv_event_data(hcon->hdev, - HCI_EVT_LE_BIG_INFO_ADV_REPORT); + HCI_EV_LE_PER_ADV_REPORT); if (ev3) - parent = iso_get_sock_listen(&hcon->src, - &hcon->dst, - iso_match_sync_handle, ev3); + parent = iso_get_sock(&hcon->src, + &hcon->dst, + BT_LISTEN, + iso_match_sync_handle_pa_report, + ev3); } if (!parent) - parent = iso_get_sock_listen(&hcon->src, - BDADDR_ANY, NULL, NULL); + parent = iso_get_sock(&hcon->src, BDADDR_ANY, + BT_LISTEN, NULL, NULL); if (!parent) return; @@ -1839,7 +1829,6 @@ static void iso_conn_ready(struct iso_conn *conn) if (ev3) { iso_pi(sk)->qos = iso_pi(parent)->qos; - iso_pi(sk)->qos.bcast.encryption = ev3->encryption; hcon->iso_qos = iso_pi(sk)->qos; iso_pi(sk)->bc_num_bis = iso_pi(parent)->bc_num_bis; memcpy(iso_pi(sk)->bc_bis, iso_pi(parent)->bc_bis, ISO_MAX_NUM_BIS); @@ -1923,8 +1912,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) */ ev1 = hci_recv_event_data(hdev, HCI_EV_LE_PA_SYNC_ESTABLISHED); if (ev1) { - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sid, - ev1); + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN, + iso_match_sid, ev1); if (sk && !ev1->status) iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); @@ -1933,26 +1922,29 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev2 = hci_recv_event_data(hdev, HCI_EVT_LE_BIG_INFO_ADV_REPORT); if (ev2) { - /* Try to get PA sync listening socket, if it exists */ - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, - iso_match_pa_sync_flag, NULL); - - if (!sk) { - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, - iso_match_sync_handle, ev2); - - /* If PA Sync is in process of terminating, - * do not handle any more BIGInfo adv reports. - */ - - if (sk && test_bit(BT_SK_PA_SYNC_TERM, - &iso_pi(sk)->flags)) - return 0; + /* Check if BIGInfo report has already been handled */ + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECTED, + iso_match_sync_handle, ev2); + if (sk) { + sock_put(sk); + sk = NULL; + goto done; } + /* Try to get PA sync socket, if it exists */ + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_CONNECT2, + iso_match_sync_handle, ev2); + if (!sk) + sk = iso_get_sock(&hdev->bdaddr, bdaddr, + BT_LISTEN, + iso_match_sync_handle, + ev2); + if (sk) { int err; + iso_pi(sk)->qos.bcast.encryption = ev2->encryption; + if (ev2->num_bis < iso_pi(sk)->bc_num_bis) iso_pi(sk)->bc_num_bis = ev2->num_bis; @@ -1971,6 +1963,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) } } } + + goto done; } ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT); @@ -1979,8 +1973,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) u8 *base; struct hci_conn *hcon; - sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, - iso_match_sync_handle_pa_report, ev3); + sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN, + iso_match_sync_handle_pa_report, ev3); if (!sk) goto done; @@ -2029,7 +2023,8 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) hcon->le_per_adv_data_len = 0; } } else { - sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL); + sk = iso_get_sock(&hdev->bdaddr, BDADDR_ANY, + BT_LISTEN, NULL, NULL); } done: diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 84fc70862d78..5b509b767557 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -415,6 +415,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); + if (!conn) + return; + mutex_lock(&conn->chan_lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. @@ -454,6 +457,9 @@ struct l2cap_chan *l2cap_chan_create(void) /* Set default lock nesting level */ atomic_set(&chan->nesting, L2CAP_NESTING_NORMAL); + /* Available receive buffer space is initially unknown */ + chan->rx_avail = -1; + write_lock(&chan_list_lock); list_add(&chan->global_l, &chan_list); write_unlock(&chan_list_lock); @@ -535,6 +541,28 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) } EXPORT_SYMBOL_GPL(l2cap_chan_set_defaults); +static __u16 l2cap_le_rx_credits(struct l2cap_chan *chan) +{ + size_t sdu_len = chan->sdu ? chan->sdu->len : 0; + + if (chan->mps == 0) + return 0; + + /* If we don't know the available space in the receiver buffer, give + * enough credits for a full packet. + */ + if (chan->rx_avail == -1) + return (chan->imtu / chan->mps) + 1; + + /* If we know how much space is available in the receive buffer, give + * out as many credits as would fill the buffer. + */ + if (chan->rx_avail <= sdu_len) + return 0; + + return DIV_ROUND_UP(chan->rx_avail - sdu_len, chan->mps); +} + static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) { chan->sdu = NULL; @@ -543,8 +571,7 @@ static void l2cap_le_flowctl_init(struct l2cap_chan *chan, u16 tx_credits) chan->tx_credits = tx_credits; /* Derive MPS from connection MTU to stop HCI fragmentation */ chan->mps = min_t(u16, chan->imtu, chan->conn->mtu - L2CAP_HDR_SIZE); - /* Give enough credits for a full packet */ - chan->rx_credits = (chan->imtu / chan->mps) + 1; + chan->rx_credits = l2cap_le_rx_credits(chan); skb_queue_head_init(&chan->tx_q); } @@ -556,7 +583,7 @@ static void l2cap_ecred_init(struct l2cap_chan *chan, u16 tx_credits) /* L2CAP implementations shall support a minimum MPS of 64 octets */ if (chan->mps < L2CAP_ECRED_MIN_MPS) { chan->mps = L2CAP_ECRED_MIN_MPS; - chan->rx_credits = (chan->imtu / chan->mps) + 1; + chan->rx_credits = l2cap_le_rx_credits(chan); } } @@ -1257,7 +1284,7 @@ static void l2cap_le_connect(struct l2cap_chan *chan) struct l2cap_ecred_conn_data { struct { - struct l2cap_ecred_conn_req req; + struct l2cap_ecred_conn_req_hdr req; __le16 scid[5]; } __packed pdu; struct l2cap_chan *chan; @@ -3737,7 +3764,7 @@ static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data) struct l2cap_ecred_rsp_data { struct { - struct l2cap_ecred_conn_rsp rsp; + struct l2cap_ecred_conn_rsp_hdr rsp; __le16 scid[L2CAP_ECRED_MAX_CID]; } __packed pdu; int count; @@ -3746,6 +3773,8 @@ struct l2cap_ecred_rsp_data { static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) { struct l2cap_ecred_rsp_data *rsp = data; + struct l2cap_ecred_conn_rsp *rsp_flex = + container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr); if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) return; @@ -3755,7 +3784,7 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) /* Include all channels pending with the same ident */ if (!rsp->pdu.rsp.result) - rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid); + rsp_flex->dcid[rsp->count++] = cpu_to_le16(chan->scid); else l2cap_chan_del(chan, ECONNRESET); } @@ -3902,13 +3931,12 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, return 0; } -static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, - struct l2cap_cmd_hdr *cmd, - u8 *data, u8 rsp_code, u8 amp_id) +static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, + u8 *data, u8 rsp_code) { struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; - struct l2cap_chan *chan = NULL, *pchan; + struct l2cap_chan *chan = NULL, *pchan = NULL; int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); @@ -3921,7 +3949,7 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, &conn->hcon->dst, ACL_LINK); if (!pchan) { result = L2CAP_CR_BAD_PSM; - goto sendresp; + goto response; } mutex_lock(&conn->chan_lock); @@ -3983,17 +4011,8 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { - /* Force pending result for AMP controllers. - * The connection will succeed after the - * physical link is up. - */ - if (amp_id == AMP_ID_BREDR) { - l2cap_state_change(chan, BT_CONFIG); - result = L2CAP_CR_SUCCESS; - } else { - l2cap_state_change(chan, BT_CONNECT2); - result = L2CAP_CR_PEND; - } + l2cap_state_change(chan, BT_CONNECT2); + result = L2CAP_CR_PEND; status = L2CAP_CS_NO_INFO; } } else { @@ -4008,17 +4027,15 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, } response: - l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); - l2cap_chan_put(pchan); - -sendresp: rsp.scid = cpu_to_le16(scid); rsp.dcid = cpu_to_le16(dcid); rsp.result = cpu_to_le16(result); rsp.status = cpu_to_le16(status); l2cap_send_cmd(conn, cmd->ident, rsp_code, sizeof(rsp), &rsp); + if (!pchan) + return; + if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { struct l2cap_info_req info; info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); @@ -4041,7 +4058,9 @@ sendresp: chan->num_conf_req++; } - return chan; + l2cap_chan_unlock(pchan); + mutex_unlock(&conn->chan_lock); + l2cap_chan_put(pchan); } static int l2cap_connect_req(struct l2cap_conn *conn, @@ -4058,7 +4077,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn, mgmt_device_connected(hdev, hcon, NULL, 0); hci_dev_unlock(hdev); - l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0); + l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP); return 0; } @@ -4994,10 +5013,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, u8 *data) { struct l2cap_ecred_conn_req *req = (void *) data; - struct { - struct l2cap_ecred_conn_rsp rsp; - __le16 dcid[L2CAP_ECRED_MAX_CID]; - } __packed pdu; + DEFINE_RAW_FLEX(struct l2cap_ecred_conn_rsp, pdu, dcid, L2CAP_ECRED_MAX_CID); struct l2cap_chan *chan, *pchan; u16 mtu, mps; __le16 psm; @@ -5016,7 +5032,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); - if (num_scid > ARRAY_SIZE(pdu.dcid)) { + if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } @@ -5045,7 +5061,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); - memset(&pdu, 0, sizeof(pdu)); + memset(pdu, 0, sizeof(*pdu)); /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, @@ -5071,8 +5087,8 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("scid[%d] 0x%4.4x", i, scid); - pdu.dcid[i] = 0x0000; - len += sizeof(*pdu.dcid); + pdu->dcid[i] = 0x0000; + len += sizeof(*pdu->dcid); /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { @@ -5106,13 +5122,13 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, l2cap_ecred_init(chan, __le16_to_cpu(req->credits)); /* Init response */ - if (!pdu.rsp.credits) { - pdu.rsp.mtu = cpu_to_le16(chan->imtu); - pdu.rsp.mps = cpu_to_le16(chan->mps); - pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + if (!pdu->credits) { + pdu->mtu = cpu_to_le16(chan->imtu); + pdu->mps = cpu_to_le16(chan->mps); + pdu->credits = cpu_to_le16(chan->rx_credits); } - pdu.dcid[i] = cpu_to_le16(chan->scid); + pdu->dcid[i] = cpu_to_le16(chan->scid); __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); @@ -5134,13 +5150,13 @@ unlock: l2cap_chan_put(pchan); response: - pdu.rsp.result = cpu_to_le16(result); + pdu->result = cpu_to_le16(result); if (defer) return 0; l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, - sizeof(pdu.rsp) + len, &pdu); + sizeof(*pdu) + len, pdu); return 0; } @@ -6239,7 +6255,7 @@ static int l2cap_finish_move(struct l2cap_chan *chan) BT_DBG("chan %p", chan); chan->rx_state = L2CAP_RX_STATE_RECV; - chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu; + chan->conn->mtu = chan->conn->hcon->mtu; return l2cap_resegment(chan); } @@ -6306,7 +6322,7 @@ static int l2cap_rx_state_wait_f(struct l2cap_chan *chan, */ chan->next_tx_seq = control->reqseq; chan->unacked_frames = 0; - chan->conn->mtu = chan->conn->hcon->hdev->acl_mtu; + chan->conn->mtu = chan->conn->hcon->mtu; err = l2cap_resegment(chan); @@ -6511,9 +6527,7 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; struct l2cap_le_credits pkt; - u16 return_credits; - - return_credits = (chan->imtu / chan->mps) + 1; + u16 return_credits = l2cap_le_rx_credits(chan); if (chan->rx_credits >= return_credits) return; @@ -6532,6 +6546,19 @@ static void l2cap_chan_le_send_credits(struct l2cap_chan *chan) l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CREDITS, sizeof(pkt), &pkt); } +void l2cap_chan_rx_avail(struct l2cap_chan *chan, ssize_t rx_avail) +{ + if (chan->rx_avail == rx_avail) + return; + + BT_DBG("chan %p has %zd bytes avail for rx", chan, rx_avail); + + chan->rx_avail = rx_avail; + + if (chan->state == BT_CONNECTED) + l2cap_chan_le_send_credits(chan); +} + static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) { int err; @@ -6541,6 +6568,12 @@ static int l2cap_ecred_recv(struct l2cap_chan *chan, struct sk_buff *skb) /* Wait recv to confirm reception before updating the credits */ err = chan->ops->recv(chan, skb); + if (err < 0 && chan->rx_avail != -1) { + BT_ERR("Queueing received LE L2CAP data failed"); + l2cap_send_disconn_req(chan, ECONNRESET); + return err; + } + /* Update credits whenever an SDU is received */ l2cap_chan_le_send_credits(chan); @@ -6563,7 +6596,8 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) } chan->rx_credits--; - BT_DBG("rx_credits %u -> %u", chan->rx_credits + 1, chan->rx_credits); + BT_DBG("chan %p: rx_credits %u -> %u", + chan, chan->rx_credits + 1, chan->rx_credits); /* Update if remote had run out of credits, this should only happens * if the remote is not using the entire MPS. @@ -6846,18 +6880,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); - switch (hcon->type) { - case LE_LINK: - if (hcon->hdev->le_mtu) { - conn->mtu = hcon->hdev->le_mtu; - break; - } - fallthrough; - default: - conn->mtu = hcon->hdev->acl_mtu; - break; - } - + conn->mtu = hcon->mtu; conn->feat_mask = 0; conn->local_fixed_chan = L2CAP_FC_SIG_BREDR | L2CAP_FC_CONNLESS; @@ -7111,14 +7134,11 @@ EXPORT_SYMBOL_GPL(l2cap_chan_connect); static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; - struct { - struct l2cap_ecred_reconf_req req; - __le16 scid; - } pdu; + DEFINE_RAW_FLEX(struct l2cap_ecred_reconf_req, pdu, scid, 1); - pdu.req.mtu = cpu_to_le16(chan->imtu); - pdu.req.mps = cpu_to_le16(chan->mps); - pdu.scid = cpu_to_le16(chan->scid); + pdu->mtu = cpu_to_le16(chan->imtu); + pdu->mps = cpu_to_le16(chan->mps); + pdu->scid[0] = cpu_to_le16(chan->scid); chan->ident = l2cap_get_ident(conn); @@ -7462,10 +7482,6 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) struct l2cap_conn *conn = hcon->l2cap_data; int len; - /* For AMP controller do not create l2cap conn */ - if (!conn && hcon->hdev->dev_type != HCI_PRIMARY) - goto drop; - if (!conn) conn = l2cap_conn_add(hcon); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5cc83f906c12..6db60946c627 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -327,7 +327,7 @@ done: } static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; @@ -336,7 +336,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, lock_sock_nested(sk, L2CAP_NESTING_PARENT); - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); @@ -1131,6 +1131,34 @@ static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, return err; } +static void l2cap_publish_rx_avail(struct l2cap_chan *chan) +{ + struct sock *sk = chan->data; + ssize_t avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc); + int expected_skbs, skb_overhead; + + if (avail <= 0) { + l2cap_chan_rx_avail(chan, 0); + return; + } + + if (!chan->mps) { + l2cap_chan_rx_avail(chan, -1); + return; + } + + /* Correct available memory by estimated sk_buff overhead. + * This is significant due to small transfer sizes. However, accept + * at least one full packet if receive space is non-zero. + */ + expected_skbs = DIV_ROUND_UP(avail, chan->mps); + skb_overhead = expected_skbs * sizeof(struct sk_buff); + if (skb_overhead < avail) + l2cap_chan_rx_avail(chan, avail - skb_overhead); + else + l2cap_chan_rx_avail(chan, -1); +} + static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -1167,28 +1195,33 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, else err = bt_sock_recvmsg(sock, msg, len, flags); - if (pi->chan->mode != L2CAP_MODE_ERTM) + if (pi->chan->mode != L2CAP_MODE_ERTM && + pi->chan->mode != L2CAP_MODE_LE_FLOWCTL && + pi->chan->mode != L2CAP_MODE_EXT_FLOWCTL) return err; - /* Attempt to put pending rx data in the socket buffer */ - lock_sock(sk); - if (!test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state)) - goto done; + l2cap_publish_rx_avail(pi->chan); - if (pi->rx_busy_skb) { - if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb)) - pi->rx_busy_skb = NULL; - else + /* Attempt to put pending rx data in the socket buffer */ + while (!list_empty(&pi->rx_busy)) { + struct l2cap_rx_busy *rx_busy = + list_first_entry(&pi->rx_busy, + struct l2cap_rx_busy, + list); + if (__sock_queue_rcv_skb(sk, rx_busy->skb) < 0) goto done; + list_del(&rx_busy->list); + kfree(rx_busy); } /* Restore data flow when half of the receive buffer is * available. This avoids resending large numbers of * frames. */ - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1) + if (test_bit(CONN_LOCAL_BUSY, &pi->chan->conn_state) && + atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf >> 1) l2cap_chan_busy(pi->chan, 0); done: @@ -1449,17 +1482,20 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { struct sock *sk = chan->data; + struct l2cap_pinfo *pi = l2cap_pi(sk); int err; lock_sock(sk); - if (l2cap_pi(sk)->rx_busy_skb) { + if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) { err = -ENOMEM; goto done; } if (chan->mode != L2CAP_MODE_ERTM && - chan->mode != L2CAP_MODE_STREAMING) { + chan->mode != L2CAP_MODE_STREAMING && + chan->mode != L2CAP_MODE_LE_FLOWCTL && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) { /* Even if no filter is attached, we could potentially * get errors from security modules, etc. */ @@ -1470,7 +1506,9 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) err = __sock_queue_rcv_skb(sk, skb); - /* For ERTM, handle one skb that doesn't fit into the recv + l2cap_publish_rx_avail(chan); + + /* For ERTM and LE, handle a skb that doesn't fit into the recv * buffer. This is important to do because the data frames * have already been acked, so the skb cannot be discarded. * @@ -1479,8 +1517,18 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) * acked and reassembled until there is buffer space * available. */ - if (err < 0 && chan->mode == L2CAP_MODE_ERTM) { - l2cap_pi(sk)->rx_busy_skb = skb; + if (err < 0 && + (chan->mode == L2CAP_MODE_ERTM || + chan->mode == L2CAP_MODE_LE_FLOWCTL || + chan->mode == L2CAP_MODE_EXT_FLOWCTL)) { + struct l2cap_rx_busy *rx_busy = + kmalloc(sizeof(*rx_busy), GFP_KERNEL); + if (!rx_busy) { + err = -ENOMEM; + goto done; + } + rx_busy->skb = skb; + list_add_tail(&rx_busy->list, &pi->rx_busy); l2cap_chan_busy(chan, 1); err = 0; } @@ -1706,6 +1754,8 @@ static const struct l2cap_ops l2cap_chan_ops = { static void l2cap_sock_destruct(struct sock *sk) { + struct l2cap_rx_busy *rx_busy, *next; + BT_DBG("sk %p", sk); if (l2cap_pi(sk)->chan) { @@ -1713,9 +1763,10 @@ static void l2cap_sock_destruct(struct sock *sk) l2cap_chan_put(l2cap_pi(sk)->chan); } - if (l2cap_pi(sk)->rx_busy_skb) { - kfree_skb(l2cap_pi(sk)->rx_busy_skb); - l2cap_pi(sk)->rx_busy_skb = NULL; + list_for_each_entry_safe(rx_busy, next, &l2cap_pi(sk)->rx_busy, list) { + kfree_skb(rx_busy->skb); + list_del(&rx_busy->list); + kfree(rx_busy); } skb_queue_purge(&sk->sk_receive_queue); @@ -1799,6 +1850,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) chan->data = sk; chan->ops = &l2cap_chan_ops; + + l2cap_publish_rx_avail(chan); } static struct proto l2cap_proto = { @@ -1820,6 +1873,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, sk->sk_destruct = l2cap_sock_destruct; sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; + INIT_LIST_HEAD(&l2cap_pi(sk)->rx_busy); + chan = l2cap_chan_create(); if (!chan) { sk_free(sk); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 965f621ef865..80f220b7e19d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -443,8 +443,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(d, HCI_UNCONFIGURED)) + if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -468,8 +467,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY && - !hci_dev_test_flag(d, HCI_UNCONFIGURED)) { + if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } @@ -503,8 +501,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, count = 0; list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY && - hci_dev_test_flag(d, HCI_UNCONFIGURED)) + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) count++; } @@ -528,8 +525,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY && - hci_dev_test_flag(d, HCI_UNCONFIGURED)) { + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) { rp->index[count++] = cpu_to_le16(d->id); bt_dev_dbg(hdev, "Added hci%u", d->id); } @@ -561,10 +557,8 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, read_lock(&hci_dev_list_lock); count = 0; - list_for_each_entry(d, &hci_dev_list, list) { - if (d->dev_type == HCI_PRIMARY || d->dev_type == HCI_AMP) - count++; - } + list_for_each_entry(d, &hci_dev_list, list) + count++; rp = kmalloc(struct_size(rp, entry, count), GFP_ATOMIC); if (!rp) { @@ -585,16 +579,10 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev, if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks)) continue; - if (d->dev_type == HCI_PRIMARY) { - if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) - rp->entry[count].type = 0x01; - else - rp->entry[count].type = 0x00; - } else if (d->dev_type == HCI_AMP) { - rp->entry[count].type = 0x02; - } else { - continue; - } + if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) + rp->entry[count].type = 0x01; + else + rp->entry[count].type = 0x00; rp->entry[count].bus = d->bus; rp->entry[count++].index = cpu_to_le16(d->id); @@ -9331,23 +9319,14 @@ void mgmt_index_added(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - switch (hdev->dev_type) { - case HCI_PRIMARY: - if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, - NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); - ev.type = 0x01; - } else { - mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, - HCI_MGMT_INDEX_EVENTS); - ev.type = 0x00; - } - break; - case HCI_AMP: - ev.type = 0x02; - break; - default: - return; + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; } ev.bus = hdev->bus; @@ -9364,25 +9343,16 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - switch (hdev->dev_type) { - case HCI_PRIMARY: - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); + mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, - NULL, 0, HCI_MGMT_UNCONF_INDEX_EVENTS); - ev.type = 0x01; - } else { - mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, - HCI_MGMT_INDEX_EVENTS); - ev.type = 0x00; - } - break; - case HCI_AMP: - ev.type = 0x02; - break; - default: - return; + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { + mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_UNCONF_INDEX_EVENTS); + ev.type = 0x01; + } else { + mgmt_index_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, + HCI_MGMT_INDEX_EVENTS); + ev.type = 0x00; } ev.bus = hdev->bus; diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 9612c5d1b13f..d039683d3bdd 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -769,7 +769,7 @@ void msft_register(struct hci_dev *hdev) mutex_init(&msft->filter_lock); } -void msft_unregister(struct hci_dev *hdev) +void msft_release(struct hci_dev *hdev) { struct msft_data *msft = hdev->msft_data; diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index 2a63205b377b..fe538e9c91c0 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -14,7 +14,7 @@ bool msft_monitor_supported(struct hci_dev *hdev); void msft_register(struct hci_dev *hdev); -void msft_unregister(struct hci_dev *hdev); +void msft_release(struct hci_dev *hdev); void msft_do_open(struct hci_dev *hdev); void msft_do_close(struct hci_dev *hdev); void msft_vendor_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb); @@ -35,7 +35,7 @@ static inline bool msft_monitor_supported(struct hci_dev *hdev) } static inline void msft_register(struct hci_dev *hdev) {} -static inline void msft_unregister(struct hci_dev *hdev) {} +static inline void msft_release(struct hci_dev *hdev) {} static inline void msft_do_open(struct hci_dev *hdev) {} static inline void msft_do_close(struct hci_dev *hdev) {} static inline void msft_vendor_evt(struct hci_dev *hdev, void *data, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 29aa07e9db9d..37d63d768afb 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -468,8 +468,8 @@ done: return err; } -static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; @@ -483,7 +483,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f goto done; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 5d03c5440b06..a5ac160c592e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work) struct sock *sk; sco_conn_lock(conn); + if (!conn->hcon) { + sco_conn_unlock(conn); + return; + } sk = conn->sk; if (sk) sock_hold(sk); @@ -122,7 +126,6 @@ static void sco_sock_clear_timer(struct sock *sk) /* ---- SCO connections ---- */ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) { - struct hci_dev *hdev = hcon->hdev; struct sco_conn *conn = hcon->sco_data; if (conn) { @@ -140,9 +143,10 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) hcon->sco_data = conn; conn->hcon = hcon; + conn->mtu = hcon->mtu; - if (hdev->sco_mtu > 0) - conn->mtu = hdev->sco_mtu; + if (hcon->mtu > 0) + conn->mtu = hcon->mtu; else conn->mtu = 60; @@ -643,7 +647,7 @@ done: } static int sco_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; @@ -652,7 +656,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, lock_sock(sk); - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 25b75844891a..891cdf61c65a 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -79,6 +79,51 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) args->args[3], args->args[4]); } +static const struct bpf_ctx_arg_aux *find_ctx_arg_info(struct bpf_prog_aux *aux, int offset) +{ + int i; + + for (i = 0; i < aux->ctx_arg_info_size; i++) + if (aux->ctx_arg_info[i].offset == offset) + return &aux->ctx_arg_info[i]; + + return NULL; +} + +/* There is only one check at the moment: + * - zero should not be passed for pointer parameters not marked as nullable. + */ +static int check_test_run_args(struct bpf_prog *prog, struct bpf_dummy_ops_test_args *args) +{ + const struct btf_type *func_proto = prog->aux->attach_func_proto; + + for (u32 arg_no = 0; arg_no < btf_type_vlen(func_proto) ; ++arg_no) { + const struct btf_param *param = &btf_params(func_proto)[arg_no]; + const struct bpf_ctx_arg_aux *info; + const struct btf_type *t; + int offset; + + if (args->args[arg_no] != 0) + continue; + + /* Program is validated already, so there is no need + * to check if t is NULL. + */ + t = btf_type_skip_modifiers(bpf_dummy_ops_btf, param->type, NULL); + if (!btf_type_is_ptr(t)) + continue; + + offset = btf_ctx_arg_offset(bpf_dummy_ops_btf, func_proto, arg_no); + info = find_ctx_arg_info(prog->aux, offset); + if (info && (info->reg_type & PTR_MAYBE_NULL)) + continue; + + return -EINVAL; + } + + return 0; +} + extern const struct bpf_link_ops bpf_struct_ops_link_lops; int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, @@ -87,7 +132,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops; const struct btf_type *func_proto; struct bpf_dummy_ops_test_args *args; - struct bpf_tramp_links *tlinks; + struct bpf_tramp_links *tlinks = NULL; struct bpf_tramp_link *link = NULL; void *image = NULL; unsigned int op_idx; @@ -109,6 +154,10 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(args)) return PTR_ERR(args); + err = check_test_run_args(prog, args); + if (err) + goto out; + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); if (!tlinks) { err = -ENOMEM; @@ -232,7 +281,7 @@ static void bpf_dummy_unreg(void *kdata) { } -static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb) +static int bpf_dummy_ops__test_1(struct bpf_dummy_ops_state *cb__nullable) { return 0; } @@ -249,7 +298,7 @@ static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb) } static struct bpf_dummy_ops __bpf_bpf_dummy_ops = { - .test_1 = bpf_dummy_test_1, + .test_1 = bpf_dummy_ops__test_1, .test_2 = bpf_dummy_test_2, .test_sleepable = bpf_dummy_test_sleepable, }; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 61efeadaff8d..f6aad4ed2ab2 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -575,6 +575,13 @@ __bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d, return a + *b + c + d + (long)e + f + g; } +__bpf_kfunc int bpf_modify_return_test_tp(int nonce) +{ + trace_bpf_trigger_tp(nonce); + + return nonce; +} + int noinline bpf_fentry_shadow_test(int a) { return a + 1; @@ -622,6 +629,7 @@ __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) BTF_ID_FLAGS(func, bpf_modify_return_test2) +BTF_ID_FLAGS(func, bpf_modify_return_test_tp) BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) BTF_KFUNCS_END(bpf_test_modify_return_ids) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ab4d33e02014..fb1115857e49 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -27,6 +27,7 @@ EXPORT_SYMBOL_GPL(nf_br_ops); /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { + enum skb_drop_reason reason = pskb_may_pull_reason(skb, ETH_HLEN); struct net_bridge_mcast_port *pmctx_null = NULL; struct net_bridge *br = netdev_priv(dev); struct net_bridge_mcast *brmctx = &br->multicast_ctx; @@ -38,6 +39,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest; u16 vid = 0; + if (unlikely(reason != SKB_NOT_DROPPED_YET)) { + kfree_skb_reason(skb, reason); + return NETDEV_TX_OK; + } + memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); br_tc_skb_miss_set(skb, false); @@ -197,7 +203,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); /* this flag will be cleared if the MTU was automatically adjusted */ br_opt_toggle(br, BROPT_MTU_SET_BY_USER, true); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 7431f89e897b..d97064d460dc 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -258,6 +258,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; const unsigned char *src = eth_hdr(skb)->h_source; + struct sk_buff *nskb; if (!should_deliver(p, skb)) return; @@ -266,12 +267,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) { + __skb_push(skb, ETH_HLEN); + nskb = pskb_copy(skb, GFP_ATOMIC); + __skb_pull(skb, ETH_HLEN); + if (!nskb) { DEV_STATS_INC(dev, tx_dropped); return; } + skb = nskb; + __skb_pull(skb, ETH_HLEN); if (!is_broadcast_ether_addr(addr)) memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index ee680adcee17..3c66141d34d6 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -78,7 +78,7 @@ static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_v { struct net_bridge_vlan_group *vg = nbp_vlan_group(p); - if (v->state == state) + if (br_vlan_get_state(v) == state) return; br_vlan_set_state(v, state); @@ -100,11 +100,12 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, }; struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; - int err; + int err = 0; + rcu_read_lock(); vg = nbp_vlan_group(p); if (!vg) - return 0; + goto out; /* MSTI 0 (CST) state changes are notified via the regular * SWITCHDEV_ATTR_ID_PORT_STP_STATE. @@ -112,17 +113,20 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, if (msti) { err = switchdev_port_attr_set(p->dev, &attr, extack); if (err && err != -EOPNOTSUPP) - return err; + goto out; } - list_for_each_entry(v, &vg->vlan_list, vlist) { + err = 0; + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { if (v->brvlan->msti != msti) continue; br_mst_vlan_set_state(p, v, state); } - return 0; +out: + rcu_read_unlock(); + return err; } static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7948a9e7542c..bf30c50b5689 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1226,7 +1226,6 @@ static struct ctl_table brnf_table[] = { .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, - { } }; static inline void br_netfilter_sysctl_default(struct brnf_net *brnf) diff --git a/net/core/Makefile b/net/core/Makefile index 21d6fbc7e884..62be9aef2528 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o +obj-$(CONFIG_NET_IEEE8021Q_HELPERS) += ieee8021q_helpers.o obj-$(CONFIG_NET_SELFTESTS) += selftests.o obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o diff --git a/net/core/dev.c b/net/core/dev.c index e09aa3785c15..e1bb6d7856d9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -158,7 +158,6 @@ #include <net/page_pool/types.h> #include <net/page_pool/helpers.h> #include <net/rps.h> -#include <linux/phy_link_topology_core.h> #include "dev.h" #include "net-sysfs.h" @@ -940,6 +939,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) } EXPORT_SYMBOL(dev_get_by_napi_id); +static DEFINE_SEQLOCK(netdev_rename_lock); + +void netdev_copy_name(struct net_device *dev, char *name) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&netdev_rename_lock); + strscpy(name, dev->name, IFNAMSIZ); + } while (read_seqretry(&netdev_rename_lock, seq)); +} + /** * netdev_get_name - get a netdevice name, knowing its ifindex. * @net: network namespace @@ -951,7 +962,6 @@ int netdev_get_name(struct net *net, char *name, int ifindex) struct net_device *dev; int ret; - down_read(&devnet_rename_sem); rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); @@ -960,12 +970,11 @@ int netdev_get_name(struct net *net, char *name, int ifindex) goto out; } - strcpy(name, dev->name); + netdev_copy_name(dev, name); ret = 0; out: rcu_read_unlock(); - up_read(&devnet_rename_sem); return ret; } @@ -1217,7 +1226,10 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); + write_seqlock(&netdev_rename_lock); err = dev_get_valid_name(net, dev, newname); + write_sequnlock(&netdev_rename_lock); + if (err < 0) { up_write(&devnet_rename_sem); return err; @@ -1257,7 +1269,9 @@ rollback: if (err >= 0) { err = ret; down_write(&devnet_rename_sem); + write_seqlock(&netdev_rename_lock); memcpy(dev->name, oldname, IFNAMSIZ); + write_sequnlock(&netdev_rename_lock); memcpy(oldname, newname, IFNAMSIZ); WRITE_ONCE(dev->name_assign_type, old_assign_type); old_assign_type = NET_NAME_RENAMED; @@ -4450,7 +4464,6 @@ EXPORT_SYMBOL(__dev_direct_xmit); *************************************************************************/ static DEFINE_PER_CPU(struct task_struct *, backlog_napi); -unsigned int sysctl_skb_defer_max __read_mostly = 64; int weight_p __read_mostly = 64; /* old backlog weight */ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ @@ -6517,7 +6530,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } } - dev->threaded = threaded; + WRITE_ONCE(dev->threaded, threaded); /* Make sure kthread is created before THREADED bit * is set. @@ -6608,7 +6621,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, * threaded mode will not be enabled in napi_enable(). */ if (dev->threaded && napi_kthread_create(napi)) - dev->threaded = 0; + dev->threaded = false; netif_napi_set_irq(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight); @@ -8530,27 +8543,29 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags; + unsigned int promiscuity, flags; kuid_t uid; kgid_t gid; ASSERT_RTNL(); - dev->flags |= IFF_PROMISC; - dev->promiscuity += inc; - if (dev->promiscuity == 0) { + promiscuity = dev->promiscuity + inc; + if (promiscuity == 0) { /* * Avoid overflow. * If inc causes overflow, untouch promisc and return error. */ - if (inc < 0) - dev->flags &= ~IFF_PROMISC; - else { - dev->promiscuity -= inc; + if (unlikely(inc > 0)) { netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n"); return -EOVERFLOW; } + flags = old_flags & ~IFF_PROMISC; + } else { + flags = old_flags | IFF_PROMISC; } - if (dev->flags != old_flags) { + WRITE_ONCE(dev->promiscuity, promiscuity); + if (flags != old_flags) { + WRITE_ONCE(dev->flags, flags); netdev_info(dev, "%s promiscuous mode\n", dev->flags & IFF_PROMISC ? "entered" : "left"); if (audit_enabled) { @@ -8601,25 +8616,27 @@ EXPORT_SYMBOL(dev_set_promiscuity); static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags, old_gflags = dev->gflags; + unsigned int allmulti, flags; ASSERT_RTNL(); - dev->flags |= IFF_ALLMULTI; - dev->allmulti += inc; - if (dev->allmulti == 0) { + allmulti = dev->allmulti + inc; + if (allmulti == 0) { /* * Avoid overflow. * If inc causes overflow, untouch allmulti and return error. */ - if (inc < 0) - dev->flags &= ~IFF_ALLMULTI; - else { - dev->allmulti -= inc; + if (unlikely(inc > 0)) { netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n"); return -EOVERFLOW; } + flags = old_flags & ~IFF_ALLMULTI; + } else { + flags = old_flags | IFF_ALLMULTI; } - if (dev->flags ^ old_flags) { + WRITE_ONCE(dev->allmulti, allmulti); + if (flags != old_flags) { + WRITE_ONCE(dev->flags, flags); netdev_info(dev, "%s allmulticast mode\n", dev->flags & IFF_ALLMULTI ? "entered" : "left"); dev_change_rx_flags(dev, IFF_ALLMULTI); @@ -8945,7 +8962,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) return -ERANGE; if (new_len != orig_len) { - dev->tx_queue_len = new_len; + WRITE_ONCE(dev->tx_queue_len, new_len); res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); res = notifier_to_errno(res); if (res) @@ -8959,7 +8976,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) err_rollback: netdev_err(dev, "refused to change device tx_queue_len\n"); - dev->tx_queue_len = orig_len; + WRITE_ONCE(dev->tx_queue_len, orig_len); return res; } @@ -9205,7 +9222,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) netif_carrier_off(dev); else netif_carrier_on(dev); - dev->proto_down = proto_down; + WRITE_ONCE(dev->proto_down, proto_down); return 0; } @@ -9219,18 +9236,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, u32 value) { + u32 proto_down_reason; int b; if (!mask) { - dev->proto_down_reason = value; + proto_down_reason = value; } else { + proto_down_reason = dev->proto_down_reason; for_each_set_bit(b, &mask, 32) { if (value & (1 << b)) - dev->proto_down_reason |= BIT(b); + proto_down_reason |= BIT(b); else - dev->proto_down_reason &= ~BIT(b); + proto_down_reason &= ~BIT(b); } } + WRITE_ONCE(dev->proto_down_reason, proto_down_reason); } struct bpf_xdp_link { @@ -10566,8 +10586,9 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) rebroadcast_time = jiffies; } + rcu_barrier(); + if (!wait) { - rcu_barrier(); wait = WAIT_REFS_MIN_MSECS; } else { msleep(wait); @@ -10976,12 +10997,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif - dev->link_topo = phy_link_topo_create(dev); - if (IS_ERR(dev->link_topo)) { - dev->link_topo = NULL; - goto free_all; - } - dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11070,8 +11085,6 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; - phy_link_topo_destroy(dev->link_topo); - /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { @@ -11403,8 +11416,12 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, dev_net_set(dev, net); dev->ifindex = new_ifindex; - if (new_name[0]) /* Rename the netdev to prepared name */ + if (new_name[0]) { + /* Rename the netdev to prepared name */ + write_seqlock(&netdev_rename_lock); strscpy(dev->name, new_name, IFNAMSIZ); + write_sequnlock(&netdev_rename_lock); + } /* Fixup kobjects */ dev_set_uevent_suppress(&dev->dev, 1); diff --git a/net/core/dev.h b/net/core/dev.h index 8572d2c8dc4a..b7b518bc2be5 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -36,7 +36,6 @@ int dev_addr_init(struct net_device *dev); void dev_addr_check(struct net_device *dev); /* sysctls not referred to from outside net/core/ */ -extern unsigned int sysctl_skb_defer_max; extern int netdev_unregister_timeout_secs; extern int weight_p; extern int dev_weight_rx_bias; diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index 0ccfd5fa5cb9..6a0482e676d3 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -47,7 +47,8 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache, /* the cache already hold a dst reference; it can't go away */ dst_hold(dst); - if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) || + if (unlikely(!time_after(idst->refresh_ts, + READ_ONCE(dst_cache->reset_ts)) || (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) { dst_cache_per_cpu_dst_set(idst, NULL, 0); dst_release(dst); @@ -83,7 +84,7 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) return NULL; *saddr = idst->in_saddr.s_addr; - return container_of(dst, struct rtable, dst); + return dst_rtable(dst); } EXPORT_SYMBOL_GPL(dst_cache_get_ip4); @@ -111,8 +112,8 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, return; idst = this_cpu_ptr(dst_cache->cache); - dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst, - rt6_get_cookie((struct rt6_info *)dst)); + dst_cache_per_cpu_dst_set(idst, dst, + rt6_get_cookie(dst_rt6_info(dst))); idst->in6_saddr = *saddr; } EXPORT_SYMBOL_GPL(dst_cache_set_ip6); @@ -170,7 +171,7 @@ void dst_cache_reset_now(struct dst_cache *dst_cache) if (!dst_cache->cache) return; - dst_cache->reset_ts = jiffies; + dst_cache_reset(dst_cache); for_each_possible_cpu(i) { struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i); struct dst_entry *dst = idst->dst; diff --git a/net/core/filter.c b/net/core/filter.c index 294670d3850d..2510464692af 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,6 +87,9 @@ #include "dev.h" +/* Keep the struct bpf_fib_lookup small so that it fits into a cacheline */ +static_assert(sizeof(struct bpf_fib_lookup) == 64, "struct bpf_fib_lookup size check"); + static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2215,7 +2218,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, rcu_read_lock(); if (!nh) { dst = skb_dst(skb); - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); } else { nexthop = &nh->ipv6_nh; @@ -2314,8 +2317,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, rcu_read_lock(); if (!nh) { - struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = container_of(dst, struct rtable, dst); + struct rtable *rt = skb_rtable(skb); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); } else if (nh->nh_family == AF_INET6) { @@ -4360,10 +4362,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; struct bpf_map *map; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (unlikely(!xdpf)) { @@ -4375,11 +4379,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_enqueue_multi(xdpf, dev, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_enqueue(fwd, xdpf, dev); } @@ -4442,9 +4455,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, - void *fwd, - enum bpf_map_type map_type, u32 map_id) + struct bpf_prog *xdp_prog, void *fwd, + enum bpf_map_type map_type, u32 map_id, + u32 flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_map *map; @@ -4454,11 +4467,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_redirect_multi(dev, skb, xdp_prog, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_generic_redirect(fwd, skb, xdp_prog); } @@ -4495,9 +4517,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { @@ -4517,7 +4541,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, return 0; } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags); err: _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; @@ -5886,7 +5910,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); } else { - fl4.flowi4_mark = 0; + if (flags & BPF_FIB_LOOKUP_MARK) + fl4.flowi4_mark = params->mark; + else + fl4.flowi4_mark = 0; fl4.flowi4_secid = 0; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); @@ -6029,7 +6056,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res, strict); } else { - fl6.flowi6_mark = 0; + if (flags & BPF_FIB_LOOKUP_MARK) + fl6.flowi6_mark = params->mark; + else + fl6.flowi6_mark = 0; fl6.flowi6_secid = 0; fl6.flowi6_tun_key.tun_id = 0; fl6.flowi6_uid = sock_net_uid(net, NULL); @@ -6107,7 +6137,7 @@ set_fwd_params: #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ - BPF_FIB_LOOKUP_SRC) + BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_MARK) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/core/gro.c b/net/core/gro.c index 2459ab697f7f..b3b43de1a650 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -231,6 +231,33 @@ done: return 0; } +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + + /* sk ownership - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; + skb->sk = NULL; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; +} + static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { @@ -331,8 +358,6 @@ static void gro_list_prepare(const struct list_head *head, list_for_each_entry(p, head, list) { unsigned long diffs; - NAPI_GRO_CB(p)->flush = 0; - if (hash != skb_get_hash_raw(p)) { NAPI_GRO_CB(p)->same_flow = 0; continue; @@ -372,6 +397,7 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) const skb_frag_t *frag0; unsigned int headlen; + NAPI_GRO_CB(skb)->network_offset = 0; NAPI_GRO_CB(skb)->data_offset = 0; headlen = skb_headlen(skb); NAPI_GRO_CB(skb)->frag0 = skb->data; @@ -471,7 +497,6 @@ found_ptype: sizeof(u32))); /* Avoid slow unaligned acc */ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); - NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->count = 1; if (unlikely(skb_is_gso(skb))) { NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; diff --git a/net/core/hotdata.c b/net/core/hotdata.c index c8a7a451c18a..d0aaaaa556f2 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include <net/hotdata.h> #include <linux/cache.h> #include <linux/jiffies.h> #include <linux/list.h> - +#include <net/hotdata.h> +#include <net/proto_memory.h> struct net_hotdata net_hotdata __cacheline_aligned = { .offload_base = LIST_HEAD_INIT(net_hotdata.offload_base), @@ -18,5 +18,8 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .max_backlog = 1000, .dev_tx_weight = 64, .dev_rx_weight = 64, + .sysctl_max_skb_frags = MAX_SKB_FRAGS, + .sysctl_skb_defer_max = 64, + .sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE }; EXPORT_SYMBOL(net_hotdata); diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c new file mode 100644 index 000000000000..759a9b9f3f89 --- /dev/null +++ b/net/core/ieee8021q_helpers.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + +#include <linux/array_size.h> +#include <linux/printk.h> +#include <linux/types.h> +#include <net/dscp.h> +#include <net/ieee8021q.h> + +/* The following arrays map Traffic Types (TT) to traffic classes (TC) for + * different number of queues as shown in the example provided by + * IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and + * Table I-1 "Traffic type to traffic class mapping". + */ +static const u8 ieee8021q_8queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, + [IEEE8021Q_TT_BE] = 1, + [IEEE8021Q_TT_EE] = 2, + [IEEE8021Q_TT_CA] = 3, + [IEEE8021Q_TT_VI] = 4, + [IEEE8021Q_TT_VO] = 5, + [IEEE8021Q_TT_IC] = 6, + [IEEE8021Q_TT_NC] = 7, +}; + +static const u8 ieee8021q_7queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, + [IEEE8021Q_TT_BE] = 1, + [IEEE8021Q_TT_EE] = 2, + [IEEE8021Q_TT_CA] = 3, + [IEEE8021Q_TT_VI] = 4, [IEEE8021Q_TT_VO] = 4, + [IEEE8021Q_TT_IC] = 5, + [IEEE8021Q_TT_NC] = 6, +}; + +static const u8 ieee8021q_6queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, + [IEEE8021Q_TT_BE] = 1, + [IEEE8021Q_TT_EE] = 2, [IEEE8021Q_TT_CA] = 2, + [IEEE8021Q_TT_VI] = 3, [IEEE8021Q_TT_VO] = 3, + [IEEE8021Q_TT_IC] = 4, + [IEEE8021Q_TT_NC] = 5, +}; + +static const u8 ieee8021q_5queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0, + [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1, + [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2, + [IEEE8021Q_TT_IC] = 3, + [IEEE8021Q_TT_NC] = 4, +}; + +static const u8 ieee8021q_4queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0, + [IEEE8021Q_TT_EE] = 1, [IEEE8021Q_TT_CA] = 1, + [IEEE8021Q_TT_VI] = 2, [IEEE8021Q_TT_VO] = 2, + [IEEE8021Q_TT_IC] = 3, [IEEE8021Q_TT_NC] = 3, +}; + +static const u8 ieee8021q_3queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0, + [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0, + [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1, + [IEEE8021Q_TT_IC] = 2, [IEEE8021Q_TT_NC] = 2, +}; + +static const u8 ieee8021q_2queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0, + [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0, + [IEEE8021Q_TT_VI] = 1, [IEEE8021Q_TT_VO] = 1, + [IEEE8021Q_TT_IC] = 1, [IEEE8021Q_TT_NC] = 1, +}; + +static const u8 ieee8021q_1queue_tt_tc_map[] = { + [IEEE8021Q_TT_BK] = 0, [IEEE8021Q_TT_BE] = 0, + [IEEE8021Q_TT_EE] = 0, [IEEE8021Q_TT_CA] = 0, + [IEEE8021Q_TT_VI] = 0, [IEEE8021Q_TT_VO] = 0, + [IEEE8021Q_TT_IC] = 0, [IEEE8021Q_TT_NC] = 0, +}; + +/** + * ieee8021q_tt_to_tc - Map IEEE 802.1Q Traffic Type to Traffic Class + * @tt: IEEE 802.1Q Traffic Type + * @num_queues: Number of queues + * + * This function maps an IEEE 802.1Q Traffic Type to a Traffic Class (TC) based + * on the number of queues configured on the NIC. The mapping is based on the + * example provided by IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic + * class mapping" and Table I-1 "Traffic type to traffic class mapping". + * + * Return: Traffic Class corresponding to the given Traffic Type or negative + * value in case of error. + */ +int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues) +{ + if (tt < 0 || tt >= IEEE8021Q_TT_MAX) { + pr_err("Requested Traffic Type (%d) is out of range (%d)\n", tt, + IEEE8021Q_TT_MAX); + return -EINVAL; + } + + switch (num_queues) { + case 8: + compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_8queue_tt_tc_map != max - 1"); + return ieee8021q_8queue_tt_tc_map[tt]; + case 7: + compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_7queue_tt_tc_map != max - 1"); + + return ieee8021q_7queue_tt_tc_map[tt]; + case 6: + compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_6queue_tt_tc_map != max - 1"); + + return ieee8021q_6queue_tt_tc_map[tt]; + case 5: + compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_5queue_tt_tc_map != max - 1"); + + return ieee8021q_5queue_tt_tc_map[tt]; + case 4: + compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_4queue_tt_tc_map != max - 1"); + + return ieee8021q_4queue_tt_tc_map[tt]; + case 3: + compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_3queue_tt_tc_map != max - 1"); + + return ieee8021q_3queue_tt_tc_map[tt]; + case 2: + compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_2queue_tt_tc_map != max - 1"); + + return ieee8021q_2queue_tt_tc_map[tt]; + case 1: + compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) != + IEEE8021Q_TT_MAX - 1, + "ieee8021q_1queue_tt_tc_map != max - 1"); + + return ieee8021q_1queue_tt_tc_map[tt]; + } + + pr_err("Invalid number of queues %d\n", num_queues); + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(ieee8021q_tt_to_tc); + +/** + * ietf_dscp_to_ieee8021q_tt - Map IETF DSCP to IEEE 802.1Q Traffic Type + * @dscp: IETF DSCP value + * + * This function maps an IETF DSCP value to an IEEE 802.1Q Traffic Type (TT). + * Since there is no corresponding mapping between DSCP and IEEE 802.1Q Traffic + * Type, this function is inspired by the RFC8325 documentation which describe + * the mapping between DSCP and 802.11 User Priority (UP) values. + * + * Return: IEEE 802.1Q Traffic Type corresponding to the given DSCP value + */ +int ietf_dscp_to_ieee8021q_tt(u8 dscp) +{ + switch (dscp) { + case DSCP_CS0: + /* Comment from RFC8325: + * [RFC4594], Section 4.8, recommends High-Throughput Data be marked + * AF1x (that is, AF11, AF12, and AF13, according to the rules defined + * in [RFC2475]). + * + * By default (as described in Section 2.3), High-Throughput Data will + * map to UP 1 and, thus, to the Background Access Category (AC_BK), + * which is contrary to the intent expressed in [RFC4594]. + + * Unfortunately, there really is no corresponding fit for the High- + * Throughput Data service class within the constrained 4 Access + * Category [IEEE.802.11-2016] model. If the High-Throughput Data + * service class is assigned to the Best Effort Access Category (AC_BE), + * then it would contend with Low-Latency Data (while [RFC4594] + * recommends a distinction in servicing between these service classes) + * as well as with the default service class; alternatively, if it is + * assigned to the Background Access Category (AC_BK), then it would + * receive a less-then-best-effort service and contend with Low-Priority + * Data (as discussed in Section 4.2.10). + * + * As such, since there is no directly corresponding fit for the High- + * Throughout Data service class within the [IEEE.802.11-2016] model, it + * is generally RECOMMENDED to map High-Throughput Data to UP 0, thereby + * admitting it to the Best Effort Access Category (AC_BE). + * + * Note: The above text is from RFC8325 which is describing the mapping + * between DSCP and 802.11 User Priority (UP) values. The mapping + * between UP and IEEE 802.1Q Traffic Type is not defined in the RFC but + * the 802.11 AC_BK and AC_BE are closely related to the IEEE 802.1Q + * Traffic Types BE and BK. + */ + case DSCP_AF11: + case DSCP_AF12: + case DSCP_AF13: + return IEEE8021Q_TT_BE; + /* Comment from RFC8325: + * RFC3662 and RFC4594 both recommend Low-Priority Data be marked + * with DSCP CS1. The Low-Priority Data service class loosely + * corresponds to the [IEEE.802.11-2016] Background Access Category + */ + case DSCP_CS1: + return IEEE8021Q_TT_BK; + case DSCP_CS2: + case DSCP_AF21: + case DSCP_AF22: + case DSCP_AF23: + return IEEE8021Q_TT_EE; + case DSCP_CS3: + case DSCP_AF31: + case DSCP_AF32: + case DSCP_AF33: + return IEEE8021Q_TT_CA; + case DSCP_CS4: + case DSCP_AF41: + case DSCP_AF42: + case DSCP_AF43: + return IEEE8021Q_TT_VI; + case DSCP_CS5: + case DSCP_EF: + case DSCP_VOICE_ADMIT: + return IEEE8021Q_TT_VO; + case DSCP_CS6: + return IEEE8021Q_TT_IC; + case DSCP_CS7: + return IEEE8021Q_TT_NC; + } + + return SIMPLE_IETF_DSCP_TO_IEEE8021Q_TT(dscp); +} +EXPORT_SYMBOL_GPL(ietf_dscp_to_ieee8021q_tt); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index af270c202d9a..45fd88405b6b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3733,7 +3733,7 @@ static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; + struct ctl_table neigh_vars[NEIGH_VAR_MAX]; } neigh_sysctl_template __read_mostly = { .neigh_vars = { NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), @@ -3784,7 +3784,6 @@ static struct neigh_sysctl_table { .extra2 = SYSCTL_INT_MAX, .proc_handler = proc_dointvec_minmax, }, - {}, }, }; @@ -3812,8 +3811,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (dev) { dev_name_source = dev->name; /* Terminate the table early */ - memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, - sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1; } else { struct neigh_table *tbl = p->tbl; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1f7f09e56771..4c27a360c294 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -605,13 +605,13 @@ static ssize_t threaded_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; - if (!rtnl_trylock()) - return restart_syscall(); + rcu_read_lock(); if (dev_isalive(netdev)) - ret = sysfs_emit(buf, fmt_dec, netdev->threaded); + ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded)); + + rcu_read_unlock(); - rtnl_unlock(); return ret; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2f5190aa2f15..4f7a61688d18 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -69,12 +69,15 @@ DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { + unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); + unsigned int generic_size; struct net_generic *ng; - unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) - ng->s.len = max_gen_ptrs; + ng->s.len = gen_ptrs; return ng; } @@ -1308,7 +1311,11 @@ static int register_pernet_operations(struct list_head *list, if (error < 0) return error; *ops->id = error; - max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); + /* This does not require READ_ONCE as writers already hold + * pernet_ops_rwsem. But WRITE_ONCE is needed to protect + * net_alloc_generic. + */ + WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index dd6510f2c652..1f6ae6379e0f 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -489,7 +489,17 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_PACKETS, rx->packets) || netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_BYTES, rx->bytes) || - netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail)) + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_PACKETS, rx->hw_gro_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_BYTES, rx->hw_gro_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS, rx->hw_gro_wire_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES, rx->hw_gro_wire_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS, rx->hw_drop_ratelimits)) return -EMSGSIZE; return 0; } @@ -498,7 +508,18 @@ static int netdev_nl_stats_write_tx(struct sk_buff *rsp, struct netdev_queue_stats_tx *tx) { if (netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_PACKETS, tx->packets) || - netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes)) + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_BYTES, tx->bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROPS, tx->hw_drops) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_ERRORS, tx->hw_drop_errors) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_CSUM_NONE, tx->csum_none) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_NEEDS_CSUM, tx->needs_csum) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_PACKETS, tx->hw_gso_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_BYTES, tx->hw_gso_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS, tx->hw_gso_wire_packets) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES, tx->hw_gso_wire_bytes) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS, tx->hw_drop_ratelimits) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_STOP, tx->stop) || + netdev_stat_put(rsp, NETDEV_A_QSTATS_TX_WAKE, tx->wake)) return -EMSGSIZE; return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 543007f159f9..55bcacf67df3 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -316,7 +316,7 @@ static int netpoll_owner_active(struct net_device *dev) struct napi_struct *napi; list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { - if (napi->poll_owner == smp_processor_id()) + if (READ_ONCE(napi->poll_owner) == smp_processor_id()) return 1; } return 0; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 273c24429bce..f4444b4e39e6 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -5,6 +5,7 @@ * Copyright (C) 2016 Red Hat, Inc. */ +#include <linux/error-injection.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> @@ -172,19 +173,29 @@ static void page_pool_producer_unlock(struct page_pool *pool, spin_unlock_bh(&pool->ring.producer_lock); } +static void page_pool_struct_check(void) +{ + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); + CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); + CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long)); +} + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params, int cpuid) { unsigned int ring_qsize = 1024; /* Default */ + page_pool_struct_check(); + memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); pool->cpuid = cpuid; /* Validate only known flags were used */ - if (pool->p.flags & ~(PP_FLAG_ALL)) + if (pool->slow.flags & ~PP_FLAG_ALL) return -EINVAL; if (pool->p.pool_size) @@ -198,22 +209,26 @@ static int page_pool_init(struct page_pool *pool, * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, * which is the XDP_TX use-case. */ - if (pool->p.flags & PP_FLAG_DMA_MAP) { + if (pool->slow.flags & PP_FLAG_DMA_MAP) { if ((pool->p.dma_dir != DMA_FROM_DEVICE) && (pool->p.dma_dir != DMA_BIDIRECTIONAL)) return -EINVAL; + + pool->dma_map = true; } - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) { + if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { /* In order to request DMA-sync-for-device the page * needs to be mapped */ - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!(pool->slow.flags & PP_FLAG_DMA_MAP)) return -EINVAL; if (!pool->p.max_len) return -EINVAL; + pool->dma_sync = true; + /* pool->p.offset has to be set according to the address * offset used by the DMA engine to start copying rx data */ @@ -222,7 +237,7 @@ static int page_pool_init(struct page_pool *pool, pool->has_init_callback = !!pool->slow.init_callback; #ifdef CONFIG_PAGE_POOL_STATS - if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) { + if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) { pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) return -ENOMEM; @@ -232,12 +247,13 @@ static int page_pool_init(struct page_pool *pool, * (also percpu) page pool instance. */ pool->recycle_stats = &pp_system_recycle_stats; + pool->system = true; } #endif if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { #ifdef CONFIG_PAGE_POOL_STATS - if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + if (!pool->system) free_percpu(pool->recycle_stats); #endif return -ENOMEM; @@ -248,7 +264,7 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->p.flags & PP_FLAG_DMA_MAP) + if (pool->dma_map) get_device(pool->p.dev); return 0; @@ -258,11 +274,11 @@ static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); - if (pool->p.flags & PP_FLAG_DMA_MAP) + if (pool->dma_map) put_device(pool->p.dev); #ifdef CONFIG_PAGE_POOL_STATS - if (!(pool->p.flags & PP_FLAG_SYSTEM_POOL)) + if (!pool->system) free_percpu(pool->recycle_stats); #endif } @@ -383,16 +399,26 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) return page; } -static void page_pool_dma_sync_for_device(const struct page_pool *pool, - const struct page *page, - unsigned int dma_sync_size) +static void __page_pool_dma_sync_for_device(const struct page_pool *pool, + const struct page *page, + u32 dma_sync_size) { +#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) dma_addr_t dma_addr = page_pool_get_dma_addr(page); dma_sync_size = min(dma_sync_size, pool->p.max_len); - dma_sync_single_range_for_device(pool->p.dev, dma_addr, - pool->p.offset, dma_sync_size, - pool->p.dma_dir); + __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, + dma_sync_size, pool->p.dma_dir); +#endif +} + +static __always_inline void +page_pool_dma_sync_for_device(const struct page_pool *pool, + const struct page *page, + u32 dma_sync_size) +{ + if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) + __page_pool_dma_sync_for_device(pool, page, dma_sync_size); } static bool page_pool_dma_map(struct page_pool *pool, struct page *page) @@ -414,8 +440,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (page_pool_set_dma_addr(page, dma)) goto unmap_failed; - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, pool->p.max_len); + page_pool_dma_sync_for_device(pool, page, pool->p.max_len); return true; @@ -460,8 +485,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if ((pool->p.flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) { put_page(page); return NULL; } @@ -481,8 +505,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; - unsigned int pp_flags = pool->p.flags; unsigned int pp_order = pool->p.order; + bool dma_map = pool->dma_map; struct page *page; int i, nr_pages; @@ -507,8 +531,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { page = pool->alloc.cache[i]; - if ((pp_flags & PP_FLAG_DMA_MAP) && - unlikely(!page_pool_dma_map(pool, page))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, page))) { put_page(page); continue; } @@ -550,6 +573,7 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) return page; } EXPORT_SYMBOL(page_pool_alloc_pages); +ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); /* Calculate distance between two u32 values, valid if distance is below 2^(31) * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution @@ -580,7 +604,7 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) { dma_addr_t dma; - if (!(pool->p.flags & PP_FLAG_DMA_MAP)) + if (!pool->dma_map) /* Always account for inflight pages, even if we didn't * map them */ @@ -663,7 +687,7 @@ static bool __page_pool_page_can_be_recycled(const struct page *page) } /* If the page refcnt == 1, this will try to recycle the page. - * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for + * If pool->dma_sync is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). * If the page refcnt != 1, then the page will be returned to memory * subsystem. @@ -686,9 +710,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, if (likely(__page_pool_page_can_be_recycled(page))) { /* Read barrier done in page_ref_count / READ_ONCE */ - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, - dma_sync_size); + page_pool_dma_sync_for_device(pool, page, dma_sync_size); if (allow_direct && page_pool_recycle_in_cache(page, pool)) return NULL; @@ -827,9 +849,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, return NULL; if (__page_pool_page_can_be_recycled(page)) { - if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) - page_pool_dma_sync_for_device(pool, page, -1); - + page_pool_dma_sync_for_device(pool, page, -1); return page; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 283e42f48af6..b86b0a87367d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1036,8 +1036,8 @@ static size_t rtnl_proto_down_size(const struct net_device *dev) { size_t size = nla_total_size(1); - if (dev->proto_down_reason) - size += nla_total_size(0) + nla_total_size(4); + /* Assume dev->proto_down_reason is not zero. */ + size += nla_total_size(0) + nla_total_size(4); return size; } @@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, static u32 rtnl_xdp_prog_skb(struct net_device *dev) { const struct bpf_prog *generic_xdp_prog; + u32 res = 0; - ASSERT_RTNL(); + rcu_read_lock(); + generic_xdp_prog = rcu_dereference(dev->xdp_prog); + if (generic_xdp_prog) + res = generic_xdp_prog->aux->id; + rcu_read_unlock(); - generic_xdp_prog = rtnl_dereference(dev->xdp_prog); - if (!generic_xdp_prog) - return 0; - return generic_xdp_prog->aux->id; + return res; } static u32 rtnl_xdp_prog_drv(struct net_device *dev) @@ -1603,7 +1605,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) - ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); + ret = nla_put_u32(skb, IFLA_MASTER, + READ_ONCE(upper_dev->ifindex)); rcu_read_unlock(); return ret; @@ -1736,10 +1739,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb, struct nlattr *pr; u32 preason; - if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) + if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down))) goto nla_put_failure; - preason = dev->proto_down_reason; + preason = READ_ONCE(dev->proto_down_reason); if (!preason) return 0; @@ -1812,6 +1815,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, u32 event, int *new_nsid, int new_ifindex, int tgt_netnsid, gfp_t gfp) { + char devname[IFNAMSIZ]; struct ifinfomsg *ifm; struct nlmsghdr *nlh; struct Qdisc *qdisc; @@ -1824,41 +1828,51 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ifm = nlmsg_data(nlh); ifm->ifi_family = AF_UNSPEC; ifm->__ifi_pad = 0; - ifm->ifi_type = dev->type; - ifm->ifi_index = dev->ifindex; + ifm->ifi_type = READ_ONCE(dev->type); + ifm->ifi_index = READ_ONCE(dev->ifindex); ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = change; if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) goto nla_put_failure; - qdisc = rtnl_dereference(dev->qdisc); - if (nla_put_string(skb, IFLA_IFNAME, dev->name) || - nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || + netdev_copy_name(dev, devname); + if (nla_put_string(skb, IFLA_IFNAME, devname)) + goto nla_put_failure; + + if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) || nla_put_u8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN) || - nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) || - nla_put_u32(skb, IFLA_MTU, dev->mtu) || - nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) || - nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) || - nla_put_u32(skb, IFLA_GROUP, dev->group) || - nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || - nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) || - nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || - nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || - nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || - nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || - nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) || - nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) || - nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || - nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || + netif_running(dev) ? READ_ONCE(dev->operstate) : + IF_OPER_DOWN) || + nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) || + nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) || + nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) || + nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) || + nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) || + nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) || + nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) || + nla_put_u32(skb, IFLA_NUM_TX_QUEUES, + READ_ONCE(dev->num_tx_queues)) || + nla_put_u32(skb, IFLA_GSO_MAX_SEGS, + READ_ONCE(dev->gso_max_segs)) || + nla_put_u32(skb, IFLA_GSO_MAX_SIZE, + READ_ONCE(dev->gso_max_size)) || + nla_put_u32(skb, IFLA_GRO_MAX_SIZE, + READ_ONCE(dev->gro_max_size)) || + nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, + READ_ONCE(dev->gso_ipv4_max_size)) || + nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, + READ_ONCE(dev->gro_ipv4_max_size)) || + nla_put_u32(skb, IFLA_TSO_MAX_SIZE, + READ_ONCE(dev->tso_max_size)) || + nla_put_u32(skb, IFLA_TSO_MAX_SEGS, + READ_ONCE(dev->tso_max_segs)) || #ifdef CONFIG_RPS - nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || + nla_put_u32(skb, IFLA_NUM_RX_QUEUES, + READ_ONCE(dev->num_rx_queues)) || #endif put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || - (qdisc && - nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) || nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_up_count) + @@ -1909,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; } - if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp)) - goto nla_put_failure; - if (new_nsid && nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) goto nla_put_failure; @@ -1924,6 +1935,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; rcu_read_lock(); + if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC)) + goto nla_put_failure_rcu; + qdisc = rcu_dereference(dev->qdisc); + if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) + goto nla_put_failure_rcu; if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) goto nla_put_failure_rcu; if (rtnl_fill_link_ifmap(skb, dev)) @@ -2530,7 +2546,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { if (nla_type(attr) != IFLA_VF_VLAN_INFO || - nla_len(attr) < NLA_HDRLEN) { + nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) { return -EINVAL; } if (len >= MAX_VLAN_LIST_LEN) @@ -5961,19 +5977,17 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; - int h, s_h, err, s_idx, s_idxattr, s_prividx; struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; struct if_stats_msg *ifsm; - struct hlist_head *head; + struct { + unsigned long ifindex; + int idxattr; + int prividx; + } *ctx = (void *)cb->ctx; struct net_device *dev; - int idx = 0; - - s_h = cb->args[0]; - s_idx = cb->args[1]; - s_idxattr = cb->args[2]; - s_prividx = cb->args[3]; + int err; cb->seq = net->dev_base_seq; @@ -5992,39 +6006,26 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - flags, &filters, - &s_idxattr, &s_prividx, - extack); - /* If we ran out of room on the first message, - * we're in trouble - */ - WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + for_each_netdev_dump(net, dev, ctx->ifindex) { + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + flags, &filters, + &ctx->idxattr, &ctx->prividx, + extack); + /* If we ran out of room on the first message, + * we're in trouble. + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); - if (err < 0) - goto out; - s_prividx = 0; - s_idxattr = 0; - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } + if (err < 0) + break; + ctx->prividx = 0; + ctx->idxattr = 0; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } -out: - cb->args[3] = s_prividx; - cb->args[2] = s_idxattr; - cb->args[1] = idx; - cb->args[0] = h; - return skb->len; + return err; } void rtnl_offload_xstats_notify(struct net_device *dev) diff --git a/net/core/scm.c b/net/core/scm.c index 5763f3320358..4f6a14babe5a 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -91,6 +91,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) fpl->user = NULL; #if IS_ENABLED(CONFIG_UNIX) fpl->inflight = false; + fpl->dead = false; fpl->edges = NULL; INIT_LIST_HEAD(&fpl->vertices); #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0c8b82750000..466999a7515e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -109,9 +109,6 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; #define SKB_SMALL_HEAD_HEADROOM \ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) -int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; -EXPORT_SYMBOL(sysctl_max_skb_frags); - /* kcm_write_msgs() relies on casting paged frags to bio_vec to use * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the * netmem is a page. @@ -907,6 +904,11 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } +static bool is_pp_page(struct page *page) +{ + return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; +} + int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { @@ -1028,6 +1030,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) return napi_pp_put_page(virt_to_page(data)); } +/** + * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb + * @skb: page pool aware skb + * + * Increase the fragment reference count (pp_ref_count) of a skb. This is + * intended to gain fragment references only for page pool aware skbs, + * i.e. when skb->pp_recycle is true, and not for fragments in a + * non-pp-recycling skb. It has a fallback to increase references on normal + * pages, as page pool aware skbs may also have normal page fragments. + */ +static int skb_pp_frag_ref(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo; + struct page *head_page; + int i; + + if (!skb->pp_recycle) + return -EINVAL; + + shinfo = skb_shinfo(skb); + + for (i = 0; i < shinfo->nr_frags; i++) { + head_page = compound_head(skb_frag_page(&shinfo->frags[i])); + if (likely(is_pp_page(head_page))) + page_pool_ref_page(head_page); + else + page_ref_inc(head_page); + } + return 0; +} + static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) @@ -2079,11 +2112,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { - int headerlen = skb_headroom(skb); - unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + struct sk_buff *n; + unsigned int size; + int headerlen; + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + + headerlen = skb_headroom(skb); + size = skb_end_offset(skb) + skb->data_len; + n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -2411,12 +2450,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask, skb_alloc_rx_flag(skb), - NUMA_NO_NODE); - int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; + struct sk_buff *n; + int oldheadroom; + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + + oldheadroom = skb_headroom(skb); + n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (!n) return NULL; @@ -4152,7 +4196,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - __skb_frag_ref(fragfrom, skb->pp_recycle); + __skb_frag_ref(fragfrom); skb_frag_page_copy(fragto, fragfrom); skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); @@ -4802,7 +4846,7 @@ normal: } *nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag; - __skb_frag_ref(nskb_frag, nskb->pp_recycle); + __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); if (pos < offset) { @@ -5933,8 +5977,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - for (i = 0; i < from_shinfo->nr_frags; i++) - __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle); + if (skb_pp_frag_ref(from)) { + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i]); + } to->truesize += delta; to->len += len; @@ -6988,7 +7034,7 @@ nodefer: kfree_skb_napi_cache(skb); DEBUG_NET_WARN_ON_ONCE(skb->destructor); sd = &per_cpu(softnet_data, cpu); - defer_max = READ_ONCE(sysctl_skb_defer_max); + defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max); if (READ_ONCE(sd->defer_count) >= defer_max) goto nodefer; @@ -7040,7 +7086,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, ssize_t maxsize, gfp_t gfp) { - size_t frag_limit = READ_ONCE(sysctl_max_skb_frags); + size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags); struct page *pages[8], **ppages = pages; ssize_t spliced = 0, ret = 0; unsigned int i; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 4d75ef9d24bf..fd20aae30be2 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,11 +1226,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) { - read_lock_bh(&sk->sk_callback_lock); + if (psock) sk_psock_data_ready(sk, psock); - read_unlock_bh(&sk->sk_callback_lock); - } rcu_read_unlock(); } } diff --git a/net/core/sock.c b/net/core/sock.c index fe9195186c13..8629f9aecf91 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -127,6 +127,7 @@ #include <net/net_namespace.h> #include <net/request_sock.h> #include <net/sock.h> +#include <net/proto_memory.h> #include <linux/net_tstamp.h> #include <net/xfrm.h> #include <linux/ipsec.h> @@ -283,7 +284,6 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; -int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE; int sysctl_tstamp_allow_data __read_mostly = 1; @@ -3241,8 +3241,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2) } EXPORT_SYMBOL(sock_no_socketpair); -int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +int sock_no_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { return -EOPNOTSUPP; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 8598466a3805..9402889840bf 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -24,8 +24,16 @@ struct bpf_stab { #define SOCK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) +/* This mutex is used to + * - protect race between prog/link attach/detach and link prog update, and + * - protect race between releasing and accessing map in bpf_link. + * A single global mutex lock is used since it is expected contention is low. + */ +static DEFINE_MUTEX(sockmap_mutex); + static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); + struct bpf_prog *old, struct bpf_link *link, + u32 which); static struct sk_psock_progs *sock_map_progs(struct bpf_map *map); static struct bpf_map *sock_map_alloc(union bpf_attr *attr) @@ -71,7 +79,9 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type); + mutex_unlock(&sockmap_mutex); fdput(f); return ret; } @@ -103,7 +113,9 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) goto put_prog; } - ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type); + mutex_unlock(&sockmap_mutex); put_prog: bpf_prog_put(prog); put_map: @@ -1460,55 +1472,84 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) return NULL; } -static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog, - u32 which) +static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog, + struct bpf_link ***plink, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **cur_pprog; + struct bpf_link **cur_plink; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - *pprog = &progs->msg_parser; + cur_pprog = &progs->msg_parser; + cur_plink = &progs->msg_parser_link; break; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) case BPF_SK_SKB_STREAM_PARSER: - *pprog = &progs->stream_parser; + cur_pprog = &progs->stream_parser; + cur_plink = &progs->stream_parser_link; break; #endif case BPF_SK_SKB_STREAM_VERDICT: if (progs->skb_verdict) return -EBUSY; - *pprog = &progs->stream_verdict; + cur_pprog = &progs->stream_verdict; + cur_plink = &progs->stream_verdict_link; break; case BPF_SK_SKB_VERDICT: if (progs->stream_verdict) return -EBUSY; - *pprog = &progs->skb_verdict; + cur_pprog = &progs->skb_verdict; + cur_plink = &progs->skb_verdict_link; break; default: return -EOPNOTSUPP; } + *pprog = cur_pprog; + if (plink) + *plink = cur_plink; return 0; } +/* Handle the following four cases: + * prog_attach: prog != NULL, old == NULL, link == NULL + * prog_detach: prog == NULL, old != NULL, link == NULL + * link_attach: prog != NULL, old == NULL, link != NULL + * link_detach: prog == NULL, old != NULL, link != NULL + */ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which) + struct bpf_prog *old, struct bpf_link *link, + u32 which) { struct bpf_prog **pprog; + struct bpf_link **plink; int ret; - ret = sock_map_prog_lookup(map, &pprog, which); + ret = sock_map_prog_link_lookup(map, &pprog, &plink, which); if (ret) return ret; - if (old) - return psock_replace_prog(pprog, prog, old); + /* for prog_attach/prog_detach/link_attach, return error if a bpf_link + * exists for that prog. + */ + if ((!link || prog) && *plink) + return -EBUSY; - psock_set_prog(pprog, prog); - return 0; + if (old) { + ret = psock_replace_prog(pprog, prog, old); + if (!ret) + *plink = NULL; + } else { + psock_set_prog(pprog, prog); + if (link) + *plink = link; + } + + return ret; } int sock_map_bpf_prog_query(const union bpf_attr *attr, @@ -1533,7 +1574,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr, rcu_read_lock(); - ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type); + ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type); if (ret) goto end; @@ -1663,6 +1704,196 @@ void sock_map_close(struct sock *sk, long timeout) } EXPORT_SYMBOL_GPL(sock_map_close); +struct sockmap_link { + struct bpf_link link; + struct bpf_map *map; + enum bpf_attach_type attach_type; +}; + +static void sock_map_link_release(struct bpf_link *link) +{ + struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + + mutex_lock(&sockmap_mutex); + if (!sockmap_link->map) + goto out; + + WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link, + sockmap_link->attach_type)); + + bpf_map_put_with_uref(sockmap_link->map); + sockmap_link->map = NULL; +out: + mutex_unlock(&sockmap_mutex); +} + +static int sock_map_link_detach(struct bpf_link *link) +{ + sock_map_link_release(link); + return 0; +} + +static void sock_map_link_dealloc(struct bpf_link *link) +{ + kfree(link); +} + +/* Handle the following two cases: + * case 1: link != NULL, prog != NULL, old != NULL + * case 2: link != NULL, prog != NULL, old == NULL + */ +static int sock_map_link_update_prog(struct bpf_link *link, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + struct bpf_prog **pprog, *old_link_prog; + struct bpf_link **plink; + int ret = 0; + + mutex_lock(&sockmap_mutex); + + /* If old prog is not NULL, ensure old prog is the same as link->prog. */ + if (old && link->prog != old) { + ret = -EPERM; + goto out; + } + /* Ensure link->prog has the same type/attach_type as the new prog. */ + if (link->prog->type != prog->type || + link->prog->expected_attach_type != prog->expected_attach_type) { + ret = -EINVAL; + goto out; + } + + ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink, + sockmap_link->attach_type); + if (ret) + goto out; + + /* return error if the stored bpf_link does not match the incoming bpf_link. */ + if (link != *plink) { + ret = -EBUSY; + goto out; + } + + if (old) { + ret = psock_replace_prog(pprog, prog, old); + if (ret) + goto out; + } else { + psock_set_prog(pprog, prog); + } + + bpf_prog_inc(prog); + old_link_prog = xchg(&link->prog, prog); + bpf_prog_put(old_link_prog); + +out: + mutex_unlock(&sockmap_mutex); + return ret; +} + +static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link) +{ + u32 map_id = 0; + + mutex_lock(&sockmap_mutex); + if (sockmap_link->map) + map_id = sockmap_link->map->id; + mutex_unlock(&sockmap_mutex); + return map_id; +} + +static int sock_map_link_fill_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + u32 map_id = sock_map_link_get_map_id(sockmap_link); + + info->sockmap.map_id = map_id; + info->sockmap.attach_type = sockmap_link->attach_type; + return 0; +} + +static void sock_map_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + u32 map_id = sock_map_link_get_map_id(sockmap_link); + + seq_printf(seq, "map_id:\t%u\n", map_id); + seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type); +} + +static const struct bpf_link_ops sock_map_link_ops = { + .release = sock_map_link_release, + .dealloc = sock_map_link_dealloc, + .detach = sock_map_link_detach, + .update_prog = sock_map_link_update_prog, + .fill_link_info = sock_map_link_fill_info, + .show_fdinfo = sock_map_link_show_fdinfo, +}; + +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct bpf_link_primer link_primer; + struct sockmap_link *sockmap_link; + enum bpf_attach_type attach_type; + struct bpf_map *map; + int ret; + + if (attr->link_create.flags) + return -EINVAL; + + map = bpf_map_get_with_uref(attr->link_create.target_fd); + if (IS_ERR(map)) + return PTR_ERR(map); + if (map->map_type != BPF_MAP_TYPE_SOCKMAP && map->map_type != BPF_MAP_TYPE_SOCKHASH) { + ret = -EINVAL; + goto out; + } + + sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER); + if (!sockmap_link) { + ret = -ENOMEM; + goto out; + } + + attach_type = attr->link_create.attach_type; + bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog); + sockmap_link->map = map; + sockmap_link->attach_type = attach_type; + + ret = bpf_link_prime(&sockmap_link->link, &link_primer); + if (ret) { + kfree(sockmap_link); + goto out; + } + + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type); + mutex_unlock(&sockmap_mutex); + if (ret) { + bpf_link_cleanup(&link_primer); + goto out; + } + + /* Increase refcnt for the prog since when old prog is replaced with + * psock_replace_prog() and psock_set_prog() its refcnt will be decreased. + * + * Actually, we do not need to increase refcnt for the prog since bpf_link + * will hold a reference. But in order to have less complexity w.r.t. + * replacing/setting prog, let us increase the refcnt to make things simpler. + */ + bpf_prog_inc(prog); + + return bpf_link_settle(&link_primer); + +out: + bpf_map_put_with_uref(map); + return ret; +} + static int sock_map_iter_attach_target(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 903ab4a51c17..c9fb9ad87485 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,6 +24,7 @@ #include <net/busy_poll.h> #include <net/pkt_sched.h> #include <net/hotdata.h> +#include <net/proto_memory.h> #include <net/rps.h> #include "dev.h" @@ -415,7 +416,7 @@ static struct ctl_table net_core_table[] = { }, { .procname = "mem_pcpu_rsv", - .data = &sysctl_mem_pcpu_rsv, + .data = &net_hotdata.sysctl_mem_pcpu_rsv, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -595,7 +596,7 @@ static struct ctl_table net_core_table[] = { }, { .procname = "max_skb_frags", - .data = &sysctl_max_skb_frags, + .data = &net_hotdata.sysctl_max_skb_frags, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -654,13 +655,12 @@ static struct ctl_table net_core_table[] = { }, { .procname = "skb_defer_max", - .data = &sysctl_skb_defer_max, + .data = &net_hotdata.sysctl_skb_defer_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, - { } }; static struct ctl_table netns_core_table[] = { @@ -697,7 +697,6 @@ static struct ctl_table netns_core_table[] = { .extra2 = SYSCTL_ONE, .proc_handler = proc_dou8vec_minmax, }, - { } }; static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) @@ -715,20 +714,21 @@ __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl, *tmp; + size_t table_size = ARRAY_SIZE(netns_core_table); + struct ctl_table *tbl; tbl = netns_core_table; if (!net_eq(net, &init_net)) { + int i; tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; - for (tmp = tbl; tmp->procname; tmp++) - tmp->data += (char *)net - (char *)&init_net; + for (i = 0; i < table_size; ++i) + tbl[i].data += (char *)net - (char *)&init_net; } - net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, - ARRAY_SIZE(netns_core_table)); + net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); if (net->core.sysctl_hdr == NULL) goto err_reg; diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 4d9823d6dced..d6b30700af67 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -353,6 +353,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) /** * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm * @sk: socket to perform estimator on + * @mrtt: measured RTT * * This code is almost identical with TCP's tcp_rtt_estimator(), since * - it has a higher sampling frequency (recommended by RFC 1323), diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9fc9cea4c251..ff41bd6f99c3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -24,6 +24,7 @@ #include <net/xfrm.h> #include <net/secure_seq.h> #include <net/netns/generic.h> +#include <net/rstreason.h> #include "ackvec.h" #include "ccid.h" @@ -521,7 +522,8 @@ out: return err; } -static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, + enum sk_rst_reason reason) { int err; const struct iphdr *rxiph; @@ -706,7 +708,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); kfree_skb(skb); return 0; } @@ -869,7 +871,7 @@ lookup: if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } else { sock_put(sk); @@ -909,7 +911,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(sk, skb); + dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c8ca703dc331..85f4b8fdbe5e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include <net/secure_seq.h> #include <net/netns/generic.h> #include <net/sock.h> +#include <net/rstreason.h> #include "dccp.h" #include "ipv6.h" @@ -256,7 +257,8 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet_rsk(req)->pktopts); } -static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) +static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, + enum sk_rst_reason reason) { const struct ipv6hdr *rxip6h; struct sk_buff *skb; @@ -656,7 +658,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); @@ -762,7 +764,7 @@ lookup: if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } else { sock_put(sk); @@ -801,7 +803,7 @@ no_dccp_socket: if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v6_ctl_send_reset(sk, skb); + dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 64d805b27add..251a57cf5822 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -15,6 +15,7 @@ #include <net/sock.h> #include <net/xfrm.h> #include <net/inet_timewait_sock.h> +#include <net/rstreason.h> #include "ackvec.h" #include "ccid.h" @@ -202,7 +203,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); inet_csk_reqsk_queue_drop(sk, req); out: diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index ee8d4f5afa72..3fc474d6e57d 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -90,8 +90,6 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, - - { } }; static struct ctl_table_header *dccp_table_header; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2f347cd37316..12521a7d4048 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1511,8 +1511,7 @@ static int dsa_switch_probe(struct dsa_switch *ds) ds->ops->phylink_mac_config || ds->ops->phylink_mac_finish || ds->ops->phylink_mac_link_down || - ds->ops->phylink_mac_link_up || - ds->ops->adjust_link) + ds->ops->phylink_mac_link_up) return -EINVAL; } diff --git a/net/dsa/port.c b/net/dsa/port.c index c6febc3d96d9..9a249d4ac3a5 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1535,25 +1535,6 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, cpu_dp->tag_ops = tag_ops; } -static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) -{ - struct device_node *phy_dn; - struct phy_device *phydev; - - phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0); - if (!phy_dn) - return NULL; - - phydev = of_phy_find_device(phy_dn); - if (!phydev) { - of_node_put(phy_dn); - return ERR_PTR(-EPROBE_DEFER); - } - - of_node_put(phy_dn); - return phydev; -} - static struct phylink_pcs * dsa_port_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) @@ -1616,17 +1597,10 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, phy_interface_t interface) { struct dsa_port *dp = dsa_phylink_to_port(config); - struct phy_device *phydev = NULL; struct dsa_switch *ds = dp->ds; - if (dsa_port_is_user(dp)) - phydev = dp->user->phydev; - - if (!ds->ops->phylink_mac_link_down) { - if (ds->ops->adjust_link && phydev) - ds->ops->adjust_link(ds, dp->index, phydev); + if (!ds->ops->phylink_mac_link_down) return; - } ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface); } @@ -1641,11 +1615,8 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, struct dsa_port *dp = dsa_phylink_to_port(config); struct dsa_switch *ds = dp->ds; - if (!ds->ops->phylink_mac_link_up) { - if (ds->ops->adjust_link && phydev) - ds->ops->adjust_link(ds, dp->index, phydev); + if (!ds->ops->phylink_mac_link_up) return; - } ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev, speed, duplex, tx_pause, rx_pause); @@ -1708,78 +1679,6 @@ void dsa_port_phylink_destroy(struct dsa_port *dp) dp->pl = NULL; } -static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable) -{ - struct dsa_switch *ds = dp->ds; - struct phy_device *phydev; - int port = dp->index; - int err = 0; - - phydev = dsa_port_get_phy_device(dp); - if (!phydev) - return 0; - - if (IS_ERR(phydev)) - return PTR_ERR(phydev); - - if (enable) { - err = genphy_resume(phydev); - if (err < 0) - goto err_put_dev; - - err = genphy_read_status(phydev); - if (err < 0) - goto err_put_dev; - } else { - err = genphy_suspend(phydev); - if (err < 0) - goto err_put_dev; - } - - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port, phydev); - - dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev)); - -err_put_dev: - put_device(&phydev->mdio.dev); - return err; -} - -static int dsa_shared_port_fixed_link_register_of(struct dsa_port *dp) -{ - struct device_node *dn = dp->dn; - struct dsa_switch *ds = dp->ds; - struct phy_device *phydev; - int port = dp->index; - phy_interface_t mode; - int err; - - err = of_phy_register_fixed_link(dn); - if (err) { - dev_err(ds->dev, - "failed to register the fixed PHY of port %d\n", - port); - return err; - } - - phydev = of_phy_find_device(dn); - - err = of_get_phy_mode(dn, &mode); - if (err) - mode = PHY_INTERFACE_MODE_NA; - phydev->interface = mode; - - genphy_read_status(phydev); - - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port, phydev); - - put_device(&phydev->mdio.dev); - - return 0; -} - static int dsa_shared_port_phylink_register(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; @@ -1983,44 +1882,28 @@ int dsa_shared_port_link_register_of(struct dsa_port *dp) dsa_switches_apply_workarounds)) return -EINVAL; - if (!ds->ops->adjust_link) { - if (missing_link_description) { - dev_warn(ds->dev, - "Skipping phylink registration for %s port %d\n", - dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index); - } else { - dsa_shared_port_link_down(dp); + if (missing_link_description) { + dev_warn(ds->dev, + "Skipping phylink registration for %s port %d\n", + dsa_port_is_cpu(dp) ? "CPU" : "DSA", dp->index); + } else { + dsa_shared_port_link_down(dp); - return dsa_shared_port_phylink_register(dp); - } - return 0; + return dsa_shared_port_phylink_register(dp); } - dev_warn(ds->dev, - "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n"); - - if (of_phy_is_fixed_link(dp->dn)) - return dsa_shared_port_fixed_link_register_of(dp); - else - return dsa_shared_port_setup_phy_of(dp, true); + return 0; } void dsa_shared_port_link_unregister_of(struct dsa_port *dp) { - struct dsa_switch *ds = dp->ds; - - if (!ds->ops->adjust_link && dp->pl) { + if (dp->pl) { rtnl_lock(); phylink_disconnect_phy(dp->pl); rtnl_unlock(); dsa_port_phylink_destroy(dp); return; } - - if (of_phy_is_fixed_link(dp->dn)) - of_phy_deregister_fixed_link(dp->dn); - else - dsa_shared_port_setup_phy_of(dp, false); } int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr, diff --git a/net/dsa/trace.h b/net/dsa/trace.h index 567f29a39707..83f3e5f78491 100644 --- a/net/dsa/trace.h +++ b/net/dsa/trace.h @@ -39,8 +39,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_hw, ), TP_fast_assign( - __assign_str(dev, dev_name(dp->ds->dev)); - __assign_str(kind, dsa_port_kind(dp)); + __assign_str(dev); + __assign_str(kind); __entry->port = dp->index; ether_addr_copy(__entry->addr, addr); __entry->vid = vid; @@ -98,8 +98,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_op_refcount, ), TP_fast_assign( - __assign_str(dev, dev_name(dp->ds->dev)); - __assign_str(kind, dsa_port_kind(dp)); + __assign_str(dev); + __assign_str(kind); __entry->port = dp->index; ether_addr_copy(__entry->addr, addr); __entry->vid = vid; @@ -157,8 +157,8 @@ DECLARE_EVENT_CLASS(dsa_port_addr_del_not_found, ), TP_fast_assign( - __assign_str(dev, dev_name(dp->ds->dev)); - __assign_str(kind, dsa_port_kind(dp)); + __assign_str(dev); + __assign_str(kind); __entry->port = dp->index; ether_addr_copy(__entry->addr, addr); __entry->vid = vid; @@ -199,7 +199,7 @@ TRACE_EVENT(dsa_lag_fdb_add_hw, ), TP_fast_assign( - __assign_str(dev, lag_dev->name); + __assign_str(dev); ether_addr_copy(__entry->addr, addr); __entry->vid = vid; dsa_db_print(db, __entry->db_buf); @@ -227,7 +227,7 @@ TRACE_EVENT(dsa_lag_fdb_add_bump, ), TP_fast_assign( - __assign_str(dev, lag_dev->name); + __assign_str(dev); ether_addr_copy(__entry->addr, addr); __entry->vid = vid; dsa_db_print(db, __entry->db_buf); @@ -255,7 +255,7 @@ TRACE_EVENT(dsa_lag_fdb_del_hw, ), TP_fast_assign( - __assign_str(dev, lag_dev->name); + __assign_str(dev); ether_addr_copy(__entry->addr, addr); __entry->vid = vid; dsa_db_print(db, __entry->db_buf); @@ -283,7 +283,7 @@ TRACE_EVENT(dsa_lag_fdb_del_drop, ), TP_fast_assign( - __assign_str(dev, lag_dev->name); + __assign_str(dev); ether_addr_copy(__entry->addr, addr); __entry->vid = vid; dsa_db_print(db, __entry->db_buf); @@ -310,7 +310,7 @@ TRACE_EVENT(dsa_lag_fdb_del_not_found, ), TP_fast_assign( - __assign_str(dev, lag_dev->name); + __assign_str(dev); ether_addr_copy(__entry->addr, addr); __entry->vid = vid; dsa_db_print(db, __entry->db_buf); @@ -338,8 +338,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_hw, ), TP_fast_assign( - __assign_str(dev, dev_name(dp->ds->dev)); - __assign_str(kind, dsa_port_kind(dp)); + __assign_str(dev); + __assign_str(kind); __entry->port = dp->index; __entry->vid = vlan->vid; __entry->flags = vlan->flags; @@ -383,8 +383,8 @@ DECLARE_EVENT_CLASS(dsa_vlan_op_refcount, ), TP_fast_assign( - __assign_str(dev, dev_name(dp->ds->dev)); - __assign_str(kind, dsa_port_kind(dp)); + __assign_str(dev); + __assign_str(kind); __entry->port = dp->index; __entry->vid = vlan->vid; __entry->flags = vlan->flags; @@ -426,8 +426,8 @@ TRACE_EVENT(dsa_vlan_del_not_found, ), TP_fast_assign( - __assign_str(dev, dev_name(dp->ds->dev)); - __assign_str(kind, dsa_port_kind(dp)); + __assign_str(dev); + __assign_str(kind); __entry->port = dp->index; __entry->vid = vlan->vid; ), diff --git a/net/dsa/user.c b/net/dsa/user.c index c94b868855aa..867c5fe9a4da 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -2120,7 +2120,7 @@ int dsa_user_change_mtu(struct net_device *dev, int new_mtu) if (err) goto out_port_failed; - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); dsa_bridge_mtu_normalization(dp); @@ -2137,6 +2137,32 @@ out_conduit_failed: } static int __maybe_unused +dsa_user_dcbnl_set_apptrust(struct net_device *dev, u8 *sel, int nsel) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->port_set_apptrust) + return -EOPNOTSUPP; + + return ds->ops->port_set_apptrust(ds, port, sel, nsel); +} + +static int __maybe_unused +dsa_user_dcbnl_get_apptrust(struct net_device *dev, u8 *sel, int *nsel) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->port_get_apptrust) + return -EOPNOTSUPP; + + return ds->ops->port_get_apptrust(ds, port, sel, nsel); +} + +static int __maybe_unused dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) { struct dsa_port *dp = dsa_user_to_port(dev); @@ -2163,6 +2189,58 @@ dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) return 0; } +/* Update the DSCP prio entries on all user ports of the switch in case + * the switch supports global DSCP prio instead of per port DSCP prios. + */ +static int dsa_user_dcbnl_ieee_global_dscp_setdel(struct net_device *dev, + struct dcb_app *app, bool del) +{ + int (*setdel)(struct net_device *dev, struct dcb_app *app); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; + int err, restore_err; + + if (del) + setdel = dcb_ieee_delapp; + else + setdel = dcb_ieee_setapp; + + dsa_switch_for_each_user_port(other_dp, ds) { + struct net_device *user = other_dp->user; + + if (!user || user == dev) + continue; + + err = setdel(user, app); + if (err) + goto err_try_to_restore; + } + + return 0; + +err_try_to_restore: + + /* Revert logic to restore previous state of app entries */ + if (!del) + setdel = dcb_ieee_delapp; + else + setdel = dcb_ieee_setapp; + + dsa_switch_for_each_user_port_continue_reverse(other_dp, ds) { + struct net_device *user = other_dp->user; + + if (!user || user == dev) + continue; + + restore_err = setdel(user, app); + if (restore_err) + netdev_err(user, "Failed to restore DSCP prio entry configuration\n"); + } + + return err; +} + static int __maybe_unused dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) { @@ -2194,6 +2272,17 @@ dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) return err; } + if (!ds->dscp_prio_mapping_is_global) + return 0; + + err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, false); + if (err) { + if (ds->ops->port_del_dscp_prio) + ds->ops->port_del_dscp_prio(ds, port, dscp, new_prio); + dcb_ieee_delapp(dev, app); + return err; + } + return 0; } @@ -2264,6 +2353,18 @@ dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) return err; } + if (!ds->dscp_prio_mapping_is_global) + return 0; + + err = dsa_user_dcbnl_ieee_global_dscp_setdel(dev, app, true); + if (err) { + if (ds->ops->port_add_dscp_prio) + ds->ops->port_add_dscp_prio(ds, port, dscp, + app->priority); + dcb_ieee_setapp(dev, app); + return err; + } + return 0; } @@ -2376,6 +2477,8 @@ static const struct ethtool_ops dsa_user_ethtool_ops = { static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = { .ieee_setapp = dsa_user_dcbnl_ieee_setapp, .ieee_delapp = dsa_user_dcbnl_ieee_delapp, + .dcbnl_setapptrust = dsa_user_dcbnl_set_apptrust, + .dcbnl_getapptrust = dsa_user_dcbnl_get_apptrust, }; static void dsa_user_get_stats64(struct net_device *dev, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 563e94e0cbd8..bd04f28d5cf4 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -4,7 +4,6 @@ #include <linux/ethtool_netlink.h> #include <linux/pm_runtime.h> #include "netlink.h" -#include <linux/phy_link_topology.h> static struct genl_family ethtool_genl_family; @@ -31,24 +30,6 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; -const struct nla_policy ethnl_header_policy_phy[] = { - [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 }, - [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING, - .len = ALTIFNAMSIZ - 1 }, - [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, - ETHTOOL_FLAGS_BASIC), - [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), -}; - -const struct nla_policy ethnl_header_policy_phy_stats[] = { - [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 }, - [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING, - .len = ALTIFNAMSIZ - 1 }, - [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, - ETHTOOL_FLAGS_STATS), - [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), -}; - int ethnl_ops_begin(struct net_device *dev) { int ret; @@ -108,9 +89,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, const struct nlattr *header, struct net *net, struct netlink_ext_ack *extack, bool require_dev) { - struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)]; + struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)]; const struct nlattr *devname_attr; - struct phy_device *phydev = NULL; struct net_device *dev = NULL; u32 flags = 0; int ret; @@ -124,7 +104,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, /* No validation here, command policy should have a nested policy set * for the header, therefore validation should have already been done. */ - ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header, + ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header, NULL, extack); if (ret < 0) return ret; @@ -165,30 +145,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } - if (dev) { - if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { - struct nlattr *phy_id; - - phy_id = tb[ETHTOOL_A_HEADER_PHY_INDEX]; - phydev = phy_link_topo_get_phy(dev->link_topo, - nla_get_u32(phy_id)); - if (!phydev) { - NL_SET_BAD_ATTR(extack, phy_id); - return -ENODEV; - } - } else { - /* If we need a PHY but no phy index is specified, fallback - * to dev->phydev - */ - phydev = dev->phydev; - } - } else if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { - NL_SET_ERR_MSG_ATTR(extack, header, - "can't target a PHY without a netdev"); - return -EINVAL; - } - - req_info->phydev = phydev; req_info->dev = dev; req_info->flags = flags; return 0; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index d57a890b5d9e..9a333a8d04c1 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -250,7 +250,6 @@ static inline unsigned int ethnl_reply_header_size(void) * @dev: network device the request is for (may be null) * @dev_tracker: refcount tracker for @dev reference * @flags: request flags common for all request types - * @phydev: phy_device connected to @dev this request is for (may be null) * * This is a common base for request specific structures holding data from * parsed userspace request. These always embed struct ethnl_req_info at @@ -260,7 +259,6 @@ struct ethnl_req_info { struct net_device *dev; netdevice_tracker dev_tracker; u32 flags; - struct phy_device *phydev; }; static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) @@ -397,12 +395,9 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; -extern const struct ethnl_request_ops ethnl_phy_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; -extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1]; -extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1]; extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1]; extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1]; extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1]; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e9d45133d641..e6904288d40d 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -61,39 +61,36 @@ static bool hsr_check_carrier(struct hsr_port *master) return false; } -static void hsr_check_announce(struct net_device *hsr_dev, - unsigned char old_operstate) +static void hsr_check_announce(struct net_device *hsr_dev) { struct hsr_priv *hsr; hsr = netdev_priv(hsr_dev); - - if (READ_ONCE(hsr_dev->operstate) == IF_OPER_UP && old_operstate != IF_OPER_UP) { - /* Went up */ - hsr->announce_count = 0; - mod_timer(&hsr->announce_timer, - jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); + if (netif_running(hsr_dev) && netif_oper_up(hsr_dev)) { + /* Enable announce timer and start sending supervisory frames */ + if (!timer_pending(&hsr->announce_timer)) { + hsr->announce_count = 0; + mod_timer(&hsr->announce_timer, jiffies + + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); + } + } else { + /* Deactivate the announce timer */ + timer_delete(&hsr->announce_timer); } - - if (READ_ONCE(hsr_dev->operstate) != IF_OPER_UP && old_operstate == IF_OPER_UP) - /* Went down */ - del_timer(&hsr->announce_timer); } void hsr_check_carrier_and_operstate(struct hsr_priv *hsr) { struct hsr_port *master; - unsigned char old_operstate; bool has_carrier; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); /* netif_stacked_transfer_operstate() cannot be used here since * it doesn't set IF_OPER_LOWERLAYERDOWN (?) */ - old_operstate = READ_ONCE(master->dev->operstate); has_carrier = hsr_check_carrier(master); hsr_set_operstate(master, has_carrier); - hsr_check_announce(master->dev, old_operstate); + hsr_check_announce(master->dev); } int hsr_get_max_mtu(struct hsr_priv *hsr) @@ -123,7 +120,7 @@ static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } @@ -146,6 +143,9 @@ static int hsr_dev_open(struct net_device *dev) case HSR_PT_SLAVE_B: designation = "Slave B"; break; + case HSR_PT_INTERLINK: + designation = "Interlink"; + break; default: designation = "Unknown"; } @@ -285,6 +285,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, struct hsr_priv *hsr = master->hsr; __u8 type = HSR_TLV_LIFE_CHECK; struct hsr_sup_payload *hsr_sp; + struct hsr_sup_tlv *hsr_stlv; struct hsr_sup_tag *hsr_stag; struct sk_buff *skb; @@ -324,6 +325,16 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); + if (hsr->redbox) { + hsr_stlv = skb_put(skb, sizeof(struct hsr_sup_tlv)); + hsr_stlv->HSR_TLV_type = PRP_TLV_REDBOX_MAC; + hsr_stlv->HSR_TLV_length = sizeof(struct hsr_sup_payload); + + /* Payload: MacAddressRedBox */ + hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); + ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox); + } + if (skb_put_padto(skb, ETH_ZLEN)) { spin_unlock_bh(&hsr->seqnr_lock); return; @@ -405,6 +416,10 @@ void hsr_del_ports(struct hsr_priv *hsr) if (port) hsr_del_port(port); + port = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK); + if (port) + hsr_del_port(port); + port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); if (port) hsr_del_port(port); @@ -534,8 +549,8 @@ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { }; int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec, u8 protocol_version, - struct netlink_ext_ack *extack) + struct net_device *interlink, unsigned char multicast_spec, + u8 protocol_version, struct netlink_ext_ack *extack) { bool unregister = false; struct hsr_priv *hsr; @@ -544,6 +559,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr = netdev_priv(hsr_dev); INIT_LIST_HEAD(&hsr->ports); INIT_LIST_HEAD(&hsr->node_db); + INIT_LIST_HEAD(&hsr->proxy_node_db); spin_lock_init(&hsr->list_lock); eth_hw_addr_set(hsr_dev, slave[0]->dev_addr); @@ -569,9 +585,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], /* Overflow soon to find bugs easier: */ hsr->sequence_nr = HSR_SEQNR_START; hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; + hsr->interlink_sequence_nr = HSR_SEQNR_START; timer_setup(&hsr->announce_timer, hsr_announce, 0); timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); + timer_setup(&hsr->prune_proxy_timer, hsr_prune_proxy_nodes, 0); ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; @@ -604,6 +622,17 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res) goto err_unregister; + if (interlink) { + res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack); + if (res) + goto err_unregister; + + hsr->redbox = true; + ether_addr_copy(hsr->macaddress_redbox, interlink->dev_addr); + mod_timer(&hsr->prune_proxy_timer, + jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD)); + } + hsr_debugfs_init(hsr, hsr_dev); mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD)); diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 9060c92168f9..655284095b78 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -16,8 +16,8 @@ void hsr_del_ports(struct hsr_priv *hsr); void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], - unsigned char multicast_spec, u8 protocol_version, - struct netlink_ext_ack *extack); + struct net_device *interlink, unsigned char multicast_spec, + u8 protocol_version, struct netlink_ext_ack *extack); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); int hsr_get_max_mtu(struct hsr_priv *hsr); #endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 5d68cb181695..05a61b8286ec 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -377,6 +377,15 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, */ ether_addr_copy(eth_hdr(skb)->h_source, port->dev->dev_addr); } + + /* When HSR node is used as RedBox - the frame received from HSR ring + * requires source MAC address (SA) replacement to one which can be + * recognized by SAN devices (otherwise, frames are dropped by switch) + */ + if (port->type == HSR_PT_INTERLINK) + ether_addr_copy(eth_hdr(skb)->h_source, + port->hsr->macaddress_redbox); + return dev_queue_xmit(skb); } @@ -390,9 +399,57 @@ bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) { + struct sk_buff *skb; + if (port->dev->features & NETIF_F_HW_HSR_FWD) return prp_drop_frame(frame, port); + /* RedBox specific frames dropping policies + * + * Do not send HSR supervisory frames to SAN devices + */ + if (frame->is_supervision && port->type == HSR_PT_INTERLINK) + return true; + + /* Do not forward to other HSR port (A or B) unicast frames which + * are addressed to interlink port (and are in the ProxyNodeTable). + */ + skb = frame->skb_hsr; + if (skb && prp_drop_frame(frame, port) && + is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + hsr_is_node_in_db(&port->hsr->proxy_node_db, + eth_hdr(skb)->h_dest)) { + return true; + } + + /* Do not forward to port C (Interlink) frames from nodes A and B + * if DA is in NodeTable. + */ + if ((frame->port_rcv->type == HSR_PT_SLAVE_A || + frame->port_rcv->type == HSR_PT_SLAVE_B) && + port->type == HSR_PT_INTERLINK) { + skb = frame->skb_hsr; + if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + hsr_is_node_in_db(&port->hsr->node_db, + eth_hdr(skb)->h_dest)) { + return true; + } + } + + /* Do not forward to port A and B unicast frames received on the + * interlink port if it is addressed to one of nodes registered in + * the ProxyNodeTable. + */ + if ((port->type == HSR_PT_SLAVE_A || port->type == HSR_PT_SLAVE_B) && + frame->port_rcv->type == HSR_PT_INTERLINK) { + skb = frame->skb_std; + if (skb && is_unicast_ether_addr(eth_hdr(skb)->h_dest) && + hsr_is_node_in_db(&port->hsr->proxy_node_db, + eth_hdr(skb)->h_dest)) { + return true; + } + } + return false; } @@ -448,13 +505,14 @@ static void hsr_forward_do(struct hsr_frame_info *frame) } /* Check if frame is to be dropped. Eg. for PRP no forward - * between ports. + * between ports, or sending HSR supervision to RedBox. */ if (hsr->proto_ops->drop_frame && hsr->proto_ops->drop_frame(frame, port)) continue; - if (port->type != HSR_PT_MASTER) + if (port->type == HSR_PT_SLAVE_A || + port->type == HSR_PT_SLAVE_B) skb = hsr->proto_ops->create_tagged_frame(frame, port); else skb = hsr->proto_ops->get_untagged_frame(frame, port); @@ -469,7 +527,9 @@ static void hsr_forward_do(struct hsr_frame_info *frame) hsr_deliver_master(skb, port->dev, frame->node_src); } else { if (!hsr_xmit(skb, port, frame)) - sent = true; + if (port->type == HSR_PT_SLAVE_A || + port->type == HSR_PT_SLAVE_B) + sent = true; } } } @@ -503,10 +563,12 @@ static void handle_std_frame(struct sk_buff *skb, frame->skb_prp = NULL; frame->skb_std = skb; - if (port->type != HSR_PT_MASTER) { + if (port->type != HSR_PT_MASTER) frame->is_from_san = true; - } else { - /* Sequence nr for the master node */ + + if (port->type == HSR_PT_MASTER || + port->type == HSR_PT_INTERLINK) { + /* Sequence nr for the master/interlink node */ lockdep_assert_held(&hsr->seqnr_lock); frame->sequence_nr = hsr->sequence_nr; hsr->sequence_nr++; @@ -564,6 +626,7 @@ static int fill_frame_info(struct hsr_frame_info *frame, { struct hsr_priv *hsr = port->hsr; struct hsr_vlan_ethhdr *vlan_hdr; + struct list_head *n_db; struct ethhdr *ethhdr; __be16 proto; int ret; @@ -574,9 +637,13 @@ static int fill_frame_info(struct hsr_frame_info *frame, memset(frame, 0, sizeof(*frame)); frame->is_supervision = is_supervision_frame(port->hsr, skb); - frame->node_src = hsr_get_node(port, &hsr->node_db, skb, - frame->is_supervision, - port->type); + + n_db = &hsr->node_db; + if (port->type == HSR_PT_INTERLINK) + n_db = &hsr->proxy_node_db; + + frame->node_src = hsr_get_node(port, n_db, skb, + frame->is_supervision, port->type); if (!frame->node_src) return -1; /* Unknown node and !is_supervision, or no mem */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 26329db09210..614df9649794 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -71,6 +71,14 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db, return NULL; } +/* Check if node for a given MAC address is already present in data base + */ +bool hsr_is_node_in_db(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]) +{ + return !!find_node_by_addr_A(node_db, addr); +} + /* Helper for device init; the self_node is used in hsr_rcv() to recognize * frames from self that's been looped over the HSR ring. */ @@ -223,6 +231,15 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, } } + /* Check if required node is not in proxy nodes table */ + list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) { + if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) { + if (hsr->proto_ops->update_san_info) + hsr->proto_ops->update_san_info(node, is_sup); + return node; + } + } + /* Everyone may create a node entry, connected node to a HSR/PRP * device. */ @@ -418,6 +435,10 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); + if (!node_dst && port->hsr->redbox) + node_dst = find_node_by_addr_A(&port->hsr->proxy_node_db, + eth_hdr(skb)->h_dest); + if (!node_dst) { if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) netdev_err(skb->dev, "%s: Unknown node\n", __func__); @@ -561,6 +582,37 @@ void hsr_prune_nodes(struct timer_list *t) jiffies + msecs_to_jiffies(PRUNE_PERIOD)); } +void hsr_prune_proxy_nodes(struct timer_list *t) +{ + struct hsr_priv *hsr = from_timer(hsr, t, prune_proxy_timer); + unsigned long timestamp; + struct hsr_node *node; + struct hsr_node *tmp; + + spin_lock_bh(&hsr->list_lock); + list_for_each_entry_safe(node, tmp, &hsr->proxy_node_db, mac_list) { + timestamp = node->time_in[HSR_PT_INTERLINK]; + + /* Prune old entries */ + if (time_is_before_jiffies(timestamp + + msecs_to_jiffies(HSR_PROXY_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr, node->macaddress_A); + if (!node->removed) { + list_del_rcu(&node->mac_list); + node->removed = true; + /* Note that we need to free this entry later: */ + kfree_rcu(node, rcu_head); + } + } + } + + spin_unlock_bh(&hsr->list_lock); + + /* Restart timer */ + mod_timer(&hsr->prune_proxy_timer, + jiffies + msecs_to_jiffies(PRUNE_PROXY_PERIOD)); +} + void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, unsigned char addr[ETH_ALEN]) { diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index b23556251d62..7619e31c1d2d 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -46,6 +46,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, u16 sequence_nr); void hsr_prune_nodes(struct timer_list *t); +void hsr_prune_proxy_nodes(struct timer_list *t); int hsr_create_self_node(struct hsr_priv *hsr, const unsigned char addr_a[ETH_ALEN], @@ -67,6 +68,9 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port, struct hsr_node *node); void prp_update_san_info(struct hsr_node *node, bool is_sup); +bool hsr_is_node_in_db(struct list_head *node_db, + const unsigned char addr[ETH_ALEN]); + struct hsr_node { struct list_head mac_list; /* Protect R/W access to seq_out */ diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 9756e657bab9..d7ae32473c41 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -96,7 +96,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, break; /* Handled in ndo_change_mtu() */ mtu_max = hsr_get_max_mtu(port->hsr); master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER); - master->dev->mtu = mtu_max; + WRITE_ONCE(master->dev->mtu, mtu_max); break; case NETDEV_UNREGISTER: if (!is_hsr_master(dev)) { diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 18e01791ad79..23850b16d1ea 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -21,6 +21,7 @@ */ #define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */ #define HSR_NODE_FORGET_TIME 60000 /* ms */ +#define HSR_PROXY_NODE_FORGET_TIME 60000 /* ms */ #define HSR_ANNOUNCE_INTERVAL 100 /* ms */ #define HSR_ENTRY_FORGET_TIME 400 /* ms */ @@ -35,6 +36,7 @@ * HSR_NODE_FORGET_TIME? */ #define PRUNE_PERIOD 3000 /* ms */ +#define PRUNE_PROXY_PERIOD 3000 /* ms */ #define HSR_TLV_EOT 0 /* End of TLVs */ #define HSR_TLV_ANNOUNCE 22 #define HSR_TLV_LIFE_CHECK 23 @@ -192,11 +194,14 @@ struct hsr_priv { struct rcu_head rcu_head; struct list_head ports; struct list_head node_db; /* Known HSR nodes */ + struct list_head proxy_node_db; /* RedBox HSR proxy nodes */ struct hsr_self_node __rcu *self_node; /* MACs of slaves */ struct timer_list announce_timer; /* Supervision frame dispatch */ struct timer_list prune_timer; + struct timer_list prune_proxy_timer; int announce_count; u16 sequence_nr; + u16 interlink_sequence_nr; /* Interlink port seq_nr */ u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ spinlock_t seqnr_lock; /* locking for sequence_nr */ @@ -209,6 +214,8 @@ struct hsr_priv { * of lan_id */ bool fwd_offloaded; /* Forwarding offloaded to HW */ + bool redbox; /* Device supports HSR RedBox */ + unsigned char macaddress_redbox[ETH_ALEN]; unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16)); /* Align to u16 boundary to avoid unaligned access * in ether_addr_equal diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 78fe40eb9f01..898f18c6da53 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,6 +23,7 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN }, [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, [IFLA_HSR_PROTOCOL] = { .type = NLA_U8 }, + [IFLA_HSR_INTERLINK] = { .type = NLA_U32 }, }; /* Here, it seems a netdevice has already been allocated for us, and the @@ -35,8 +36,8 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, enum hsr_version proto_version; unsigned char multicast_spec; u8 proto = HSR_PROTOCOL_HSR; - struct net_device *link[2]; + struct net_device *link[2], *interlink = NULL; if (!data) { NL_SET_ERR_MSG_MOD(extack, "No slave devices specified"); return -EINVAL; @@ -67,6 +68,20 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; } + if (data[IFLA_HSR_INTERLINK]) + interlink = __dev_get_by_index(src_net, + nla_get_u32(data[IFLA_HSR_INTERLINK])); + + if (interlink && interlink == link[0]) { + NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave1 are the same"); + return -EINVAL; + } + + if (interlink && interlink == link[1]) { + NL_SET_ERR_MSG_MOD(extack, "Interlink and Slave2 are the same"); + return -EINVAL; + } + if (!data[IFLA_HSR_MULTICAST_SPEC]) multicast_spec = 0; else @@ -96,10 +111,17 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, } } - if (proto == HSR_PROTOCOL_PRP) + if (proto == HSR_PROTOCOL_PRP) { proto_version = PRP_V1; + if (interlink) { + NL_SET_ERR_MSG_MOD(extack, + "Interlink only works with HSR"); + return -EINVAL; + } + } - return hsr_dev_finalize(dev, link, multicast_spec, proto_version, extack); + return hsr_dev_finalize(dev, link, interlink, multicast_spec, + proto_version, extack); } static void hsr_dellink(struct net_device *dev, struct list_head *head) @@ -107,6 +129,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) struct hsr_priv *hsr = netdev_priv(dev); del_timer_sync(&hsr->prune_timer); + del_timer_sync(&hsr->prune_proxy_timer); del_timer_sync(&hsr->announce_timer); hsr_debugfs_term(hsr); @@ -114,6 +137,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) hsr_del_self_node(hsr); hsr_del_nodes(&hsr->node_db); + hsr_del_nodes(&hsr->proxy_node_db); unregister_netdevice_queue(dev, head); } diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 1b6457f357bd..af6cf64a00e0 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -55,6 +55,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) protocol = eth_hdr(skb)->h_proto; if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) && + port->type != HSR_PT_INTERLINK && hsr->proto_ops->invalid_dan_ingress_frame && hsr->proto_ops->invalid_dan_ingress_frame(protocol)) goto finish_pass; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 2a983cf450da..56ef873828f4 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -338,7 +338,6 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; /* secret interval has been deprecated */ @@ -351,7 +350,6 @@ static struct ctl_table lowpan_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) @@ -370,10 +368,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) goto err_alloc; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - table[0].procname = NULL; + if (net->user_ns != &init_user_ns) table_size = 0; - } } table[0].data = &ieee802154_lowpan->fqdir->high_thresh; diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index 62aa6465253a..591ce0a16fc0 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -75,7 +75,7 @@ TRACE_EVENT(802154_rdev_add_virtual_intf, ), TP_fast_assign( WPAN_PHY_ASSIGN; - __assign_str(vir_intf_name, name ? name : "<noname>"); + __assign_str(vir_intf_name); __entry->type = type; __entry->extended_addr = extended_addr; ), diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a7cfeda28bb2..e03ba4a21c39 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -758,7 +758,9 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new sock_rps_record_flow(newsk); WARN_ON(!((1 << newsk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | - TCPF_CLOSE_WAIT | TCPF_CLOSE))); + TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | + TCPF_CLOSING | TCPF_CLOSE_WAIT | + TCPF_CLOSE))); if (test_bit(SOCK_SUPPORT_ZC, &sock->flags)) set_bit(SOCK_SUPPORT_ZC, &newsock->flags); @@ -771,16 +773,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new * Accept a pending connection. The TCP layer now gives BSD semantics. */ -int inet_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +int inet_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk1 = sock->sk, *sk2; - int err = -EINVAL; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ - sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); + arg->err = -EINVAL; + sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg); if (!sk2) - return err; + return arg->err; lock_sock(sk2); __inet_accept(sock, newsock, sk2); @@ -1307,8 +1309,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) int inet_sk_rebuild_header(struct sock *sk) { + struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0)); struct inet_sock *inet = inet_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); __be32 daddr; struct ip_options_rcu *inet_opt; struct flowi4 *fl4; @@ -1482,7 +1484,6 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) struct sk_buff *p; unsigned int hlen; unsigned int off; - unsigned int id; int flush = 1; int proto; @@ -1508,13 +1509,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) goto out; NAPI_GRO_CB(skb)->proto = proto; - id = ntohl(*(__be32 *)&iph->id); - flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); - id >>= 16; + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (ntohl(*(__be32 *)&iph->id) & ~IP_DF)); list_for_each_entry(p, head, list) { struct iphdr *iph2; - u16 flush_id; if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -1531,48 +1529,10 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) NAPI_GRO_CB(p)->same_flow = 0; continue; } - - /* All fields must match except length and checksum. */ - NAPI_GRO_CB(p)->flush |= - (iph->ttl ^ iph2->ttl) | - (iph->tos ^ iph2->tos) | - ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); - - NAPI_GRO_CB(p)->flush |= flush; - - /* We need to store of the IP ID check to be included later - * when we can verify that this packet does in fact belong - * to a given flow. - */ - flush_id = (u16)(id - ntohs(iph2->id)); - - /* This bit of code makes it much easier for us to identify - * the cases where we are doing atomic vs non-atomic IP ID - * checks. Specifically an atomic check can return IP ID - * values 0 - 0xFFFF, while a non-atomic check can only - * return 0 or 0xFFFF. - */ - if (!NAPI_GRO_CB(p)->is_atomic || - !(iph->frag_off & htons(IP_DF))) { - flush_id ^= NAPI_GRO_CB(p)->count; - flush_id = flush_id ? 0xFFFF : 0; - } - - /* If the previous IP ID value was based on an atomic - * datagram we can overwrite the value and ignore it. - */ - if (NAPI_GRO_CB(skb)->is_atomic) - NAPI_GRO_CB(p)->flush_id = flush_id; - else - NAPI_GRO_CB(p)->flush_id |= flush_id; } - NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF)); NAPI_GRO_CB(skb)->flush |= flush; - skb_set_network_header(skb, off); - /* The above will be needed by the transport layer if there is one - * immediately following this IP hdr. - */ + NAPI_GRO_CB(skb)->inner_network_offset = off; /* Note : No need to call skb_gro_postpull_rcsum() here, * as we already checked checksum over ipv4 header was 0 diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index ab82ca104496..11c1519b3699 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1003,6 +1003,55 @@ out_of_mem: * User level interface (ioctl) */ +static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r, + bool getarp) +{ + struct net_device *dev; + + if (getarp) + dev = dev_get_by_name_rcu(net, r->arp_dev); + else + dev = __dev_get_by_name(net, r->arp_dev); + if (!dev) + return ERR_PTR(-ENODEV); + + /* Mmmm... It is wrong... ARPHRD_NETROM == 0 */ + if (!r->arp_ha.sa_family) + r->arp_ha.sa_family = dev->type; + + if ((r->arp_flags & ATF_COM) && r->arp_ha.sa_family != dev->type) + return ERR_PTR(-EINVAL); + + return dev; +} + +static struct net_device *arp_req_dev(struct net *net, struct arpreq *r) +{ + struct net_device *dev; + struct rtable *rt; + __be32 ip; + + if (r->arp_dev[0]) + return arp_req_dev_by_name(net, r, false); + + if (r->arp_flags & ATF_PUBL) + return NULL; + + ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; + + rt = ip_route_output(net, ip, 0, 0, 0, RT_SCOPE_LINK); + if (IS_ERR(rt)) + return ERR_CAST(rt); + + dev = rt->dst.dev; + ip_rt_put(rt); + + if (!dev) + return ERR_PTR(-EINVAL); + + return dev; +} + /* * Set (create) an ARP cache entry. */ @@ -1023,11 +1072,8 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on) static int arp_req_set_public(struct net *net, struct arpreq *r, struct net_device *dev) { - __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; - if (mask && mask != htonl(0xFFFFFFFF)) - return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, r->arp_ha.sa_data); @@ -1035,6 +1081,8 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, return -ENODEV; } if (mask) { + __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; + if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1)) return -ENOBUFS; return 0; @@ -1043,30 +1091,20 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, return arp_req_set_proxy(net, dev, 1); } -static int arp_req_set(struct net *net, struct arpreq *r, - struct net_device *dev) +static int arp_req_set(struct net *net, struct arpreq *r) { - __be32 ip; struct neighbour *neigh; + struct net_device *dev; + __be32 ip; int err; + dev = arp_req_dev(net, r); + if (IS_ERR(dev)) + return PTR_ERR(dev); + if (r->arp_flags & ATF_PUBL) return arp_req_set_public(net, r, dev); - ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; - if (r->arp_flags & ATF_PERM) - r->arp_flags |= ATF_COM; - if (!dev) { - struct rtable *rt = ip_route_output(net, ip, 0, 0, 0, - RT_SCOPE_LINK); - - if (IS_ERR(rt)) - return PTR_ERR(rt); - dev = rt->dst.dev; - ip_rt_put(rt); - if (!dev) - return -EINVAL; - } switch (dev->type) { #if IS_ENABLED(CONFIG_FDDI) case ARPHRD_FDDI: @@ -1088,12 +1126,18 @@ static int arp_req_set(struct net *net, struct arpreq *r, break; } + ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; + neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev); err = PTR_ERR(neigh); if (!IS_ERR(neigh)) { unsigned int state = NUD_STALE; - if (r->arp_flags & ATF_PERM) + + if (r->arp_flags & ATF_PERM) { + r->arp_flags |= ATF_COM; state = NUD_PERMANENT; + } + err = neigh_update(neigh, (r->arp_flags & ATF_COM) ? r->arp_ha.sa_data : NULL, state, NEIGH_UPDATE_F_OVERRIDE | @@ -1117,27 +1161,40 @@ static unsigned int arp_state_to_flags(struct neighbour *neigh) * Get an ARP cache entry. */ -static int arp_req_get(struct arpreq *r, struct net_device *dev) +static int arp_req_get(struct net *net, struct arpreq *r) { __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; - int err = -ENXIO; + struct net_device *dev; + + if (!r->arp_dev[0]) + return -ENODEV; + + dev = arp_req_dev_by_name(net, r, true); + if (IS_ERR(dev)) + return PTR_ERR(dev); neigh = neigh_lookup(&arp_tbl, &ip, dev); - if (neigh) { - if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { - read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, - min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); - r->arp_flags = arp_state_to_flags(neigh); - read_unlock_bh(&neigh->lock); - r->arp_ha.sa_family = dev->type; - strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); - err = 0; - } + if (!neigh) + return -ENXIO; + + if (READ_ONCE(neigh->nud_state) & NUD_NOARP) { neigh_release(neigh); + return -ENXIO; } - return err; + + read_lock_bh(&neigh->lock); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); + r->arp_flags = arp_state_to_flags(neigh); + read_unlock_bh(&neigh->lock); + + neigh_release(neigh); + + r->arp_ha.sa_family = dev->type; + netdev_copy_name(dev, r->arp_dev); + + return 0; } int arp_invalidate(struct net_device *dev, __be32 ip, bool force) @@ -1168,37 +1225,31 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force) static int arp_req_delete_public(struct net *net, struct arpreq *r, struct net_device *dev) { - __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; - if (mask == htonl(0xFFFFFFFF)) - return pneigh_delete(&arp_tbl, net, &ip, dev); + if (mask) { + __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; - if (mask) - return -EINVAL; + return pneigh_delete(&arp_tbl, net, &ip, dev); + } return arp_req_set_proxy(net, dev, 0); } -static int arp_req_delete(struct net *net, struct arpreq *r, - struct net_device *dev) +static int arp_req_delete(struct net *net, struct arpreq *r) { + struct net_device *dev; __be32 ip; + dev = arp_req_dev(net, r); + if (IS_ERR(dev)) + return PTR_ERR(dev); + if (r->arp_flags & ATF_PUBL) return arp_req_delete_public(net, r, dev); ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; - if (!dev) { - struct rtable *rt = ip_route_output(net, ip, 0, 0, 0, - RT_SCOPE_LINK); - if (IS_ERR(rt)) - return PTR_ERR(rt); - dev = rt->dst.dev; - ip_rt_put(rt); - if (!dev) - return -EINVAL; - } + return arp_invalidate(dev, ip, true); } @@ -1208,9 +1259,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r, int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) { - int err; struct arpreq r; - struct net_device *dev = NULL; + __be32 *netmask; + int err; switch (cmd) { case SIOCDARP: @@ -1233,42 +1284,34 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!(r.arp_flags & ATF_PUBL) && (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB))) return -EINVAL; + + netmask = &((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr; if (!(r.arp_flags & ATF_NETMASK)) - ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = - htonl(0xFFFFFFFFUL); - rtnl_lock(); - if (r.arp_dev[0]) { - err = -ENODEV; - dev = __dev_get_by_name(net, r.arp_dev); - if (!dev) - goto out; - - /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ - if (!r.arp_ha.sa_family) - r.arp_ha.sa_family = dev->type; - err = -EINVAL; - if ((r.arp_flags & ATF_COM) && r.arp_ha.sa_family != dev->type) - goto out; - } else if (cmd == SIOCGARP) { - err = -ENODEV; - goto out; - } + *netmask = htonl(0xFFFFFFFFUL); + else if (*netmask && *netmask != htonl(0xFFFFFFFFUL)) + return -EINVAL; switch (cmd) { case SIOCDARP: - err = arp_req_delete(net, &r, dev); + rtnl_lock(); + err = arp_req_delete(net, &r); + rtnl_unlock(); break; case SIOCSARP: - err = arp_req_set(net, &r, dev); + rtnl_lock(); + err = arp_req_set(net, &r); + rtnl_unlock(); break; case SIOCGARP: - err = arp_req_get(&r, dev); + rcu_read_lock(); + err = arp_req_get(net, &r); + rcu_read_unlock(); + + if (!err && copy_to_user(arg, &r, sizeof(r))) + err = -EFAULT; break; } -out: - rtnl_unlock(); - if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) - err = -EFAULT; + return err; } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 7f518ea5f4ac..18227757ec0c 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -107,6 +107,9 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, case offsetof(struct tcp_sock, snd_cwnd_cnt): end = offsetofend(struct tcp_sock, snd_cwnd_cnt); break; + case offsetof(struct tcp_sock, snd_cwnd_stamp): + end = offsetofend(struct tcp_sock, snd_cwnd_stamp); + break; case offsetof(struct tcp_sock, snd_ssthresh): end = offsetofend(struct tcp_sock, snd_ssthresh); break; @@ -307,7 +310,8 @@ static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk) return 0; } -static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs) +static void bpf_tcp_ca_cong_control(struct sock *sk, u32 ack, int flag, + const struct rate_sample *rs) { } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 8b17d83e5fde..dd6d46015058 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1815,6 +1815,7 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, * @sk: the socket * @doi_def: the CIPSO DOI to use * @secattr: the specific security attributes of the socket + * @sk_locked: true if caller holds the socket lock * * Description: * Set the CIPSO option on the given socket using the DOI definition and @@ -1826,7 +1827,8 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, */ int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { int ret_val = -EPERM; unsigned char *buf = NULL; @@ -1876,8 +1878,7 @@ int cipso_v4_sock_setattr(struct sock *sk, sk_inet = inet_sk(sk); - old = rcu_dereference_protected(sk_inet->inet_opt, - lockdep_sock_is_held(sk)); + old = rcu_dereference_protected(sk_inet->inet_opt, sk_locked); if (inet_test_bit(IS_ICSK, sk)) { sk_conn = inet_csk(sk); if (old) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7592f242336b..96accde527da 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -224,6 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(void) static void inet_rcu_free_ifa(struct rcu_head *head) { struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); + if (ifa->ifa_dev) in_dev_put(ifa->ifa_dev); kfree(ifa); @@ -231,7 +232,11 @@ static void inet_rcu_free_ifa(struct rcu_head *head) static void inet_free_ifa(struct in_ifaddr *ifa) { - call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); + /* Our reference to ifa->ifa_dev must be freed ASAP + * to release the reference to the netdev the same way. + * in_dev_put() -> in_dev_finish_destroy() -> netdev_put() + */ + call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa); } static void in_dev_free_rcu(struct rcu_head *head) @@ -1683,6 +1688,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, struct nlmsghdr *nlh; unsigned long tstamp; u32 preferred, valid; + u32 flags; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm), args->flags); @@ -1692,7 +1698,13 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; - ifm->ifa_flags = READ_ONCE(ifa->ifa_flags); + + flags = READ_ONCE(ifa->ifa_flags); + /* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits. + * The 32bit value is given in IFA_FLAGS attribute. + */ + ifm->ifa_flags = (__u8)flags; + ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; @@ -1701,7 +1713,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, goto nla_put_failure; tstamp = READ_ONCE(ifa->ifa_tstamp); - if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { + if (!(flags & IFA_F_PERMANENT)) { preferred = READ_ONCE(ifa->ifa_preferred_lft); valid = READ_ONCE(ifa->ifa_valid_lft); if (preferred != INFINITY_LIFE_TIME) { @@ -1732,7 +1744,7 @@ static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || (ifa->ifa_proto && nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || - nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) || + nla_put_u32(skb, IFA_FLAGS, flags) || (ifa->ifa_rt_priority && nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, @@ -2515,7 +2527,7 @@ static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; + struct ctl_table devinet_vars[IPV4_DEVCONF_MAX]; } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", @@ -2578,7 +2590,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, if (!t) goto out; - for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { + for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) { t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; t->devinet_vars[i].extra1 = p; t->devinet_vars[i].extra2 = net; @@ -2652,7 +2664,6 @@ static struct ctl_table ctl_forward_entry[] = { .extra1 = &ipv4_devconf, .extra2 = &init_net, }, - { }, }; #endif diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index dff04580318f..3968d3f98e08 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -348,7 +348,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, __be16 dport) { struct udphdr *uh; - __be32 *udpdata32; unsigned int len; len = skb->len + esp->tailen - skb_transport_offset(skb); @@ -363,12 +362,6 @@ static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, *skb_mac_header(skb) = IPPROTO_UDP; - if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { - udpdata32 = (__be32 *)(uh + 1); - udpdata32[0] = udpdata32[1] = 0; - return (struct ip_esp_hdr *)(udpdata32 + 2); - } - return (struct ip_esp_hdr *)(uh + 1); } @@ -424,7 +417,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: esph = esp_output_udp_encap(skb, encap_type, esp, sport, dport); break; case TCP_ENCAP_ESPINTCP: @@ -776,7 +768,6 @@ int esp_input_done2(struct sk_buff *skb, int err) source = th->source; break; case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: source = uh->source; break; default: @@ -1180,9 +1171,6 @@ static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); - break; #ifdef CONFIG_INET_ESPINTCP case TCP_ENCAP_ESPINTCP: /* only the length field, TCP encap is done by diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 437e782b9663..ab6d0d98dbc3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -93,6 +93,8 @@ #include <net/ip_fib.h> #include <net/l3mdev.h> #include <net/addrconf.h> +#define CREATE_TRACE_POINTS +#include <trace/events/icmp.h> /* * Build xmit assembly blocks @@ -483,6 +485,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct icmp_bxm *param) { struct net_device *route_lookup_dev; + struct dst_entry *dst, *dst2; struct rtable *rt, *rt2; struct flowi4 fl4_dec; int err; @@ -508,16 +511,17 @@ static struct rtable *icmp_route_lookup(struct net *net, /* No need to clone since we're just using its address. */ rt2 = rt; - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, - flowi4_to_flowi(fl4), NULL, 0); - if (!IS_ERR(rt)) { + dst = xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(fl4), NULL, 0); + rt = dst_rtable(dst); + if (!IS_ERR(dst)) { if (rt != rt2) return rt; - } else if (PTR_ERR(rt) == -EPERM) { + } else if (PTR_ERR(dst) == -EPERM) { rt = NULL; - } else + } else { return rt; - + } err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); if (err) goto relookup_failed; @@ -551,19 +555,19 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, - flowi4_to_flowi(&fl4_dec), NULL, - XFRM_LOOKUP_ICMP); - if (!IS_ERR(rt2)) { + dst2 = xfrm_lookup(net, &rt2->dst, flowi4_to_flowi(&fl4_dec), NULL, + XFRM_LOOKUP_ICMP); + rt2 = dst_rtable(dst2); + if (!IS_ERR(dst2)) { dst_release(&rt->dst); memcpy(fl4, &fl4_dec, sizeof(*fl4)); rt = rt2; - } else if (PTR_ERR(rt2) == -EPERM) { + } else if (PTR_ERR(dst2) == -EPERM) { if (rt) dst_release(&rt->dst); return rt2; } else { - err = PTR_ERR(rt2); + err = PTR_ERR(dst2); goto relookup_failed; } return rt; @@ -768,6 +772,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, if (!fl4.saddr) fl4.saddr = htonl(INADDR_DUMMY); + trace_icmp_send(skb_in, type, code); + icmp_push_reply(sk, &icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3b38610958ee..d81f74ce0f02 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -661,7 +661,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) /* * This will accept the next outstanding connection. */ -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) +struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock_queue *queue = &icsk->icsk_accept_queue; @@ -680,7 +680,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) /* Find already established connection */ if (reqsk_queue_empty(queue)) { - long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ error = -EAGAIN; @@ -692,6 +692,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) goto out_err; } req = reqsk_queue_remove(queue, sk); + arg->is_empty = reqsk_queue_empty(queue); newsk = req->sk; if (sk->sk_protocol == IPPROTO_TCP && @@ -745,7 +746,7 @@ out: out_err: newsk = NULL; req = NULL; - *err = error; + arg->err = error; goto out; } EXPORT_SYMBOL(inet_csk_accept); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index cf88eca5f1b4..48d0d494185b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -565,7 +565,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); - if (twsk_unique(sk, sk2, twp)) + if (sk->sk_protocol == IPPROTO_TCP && + tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 534b98a0744a..08e2c92e25ab 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -580,7 +580,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &dist_min, }, - { } }; /* secret interval has been deprecated */ @@ -593,7 +592,6 @@ static struct ctl_table ip4_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; static int __net_init ip4_frags_ns_ctl_register(struct net *net) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index c3af965dc407..ba205473522e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -793,7 +793,7 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) dev->needed_headroom += len; if (set_mtu) - dev->mtu = max_t(int, dev->mtu - len, 68); + WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68)); if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) || (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) && diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 5e9c8156656a..d6fbcbd2358a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -616,7 +616,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, dst = skb_dst(skb); if (curr_dst != dst) { hint = ip_extract_route_hint(net, skb, - ((struct rtable *)dst)->rt_type); + dst_rtable(dst)->rt_type); /* dispatch old sublist */ if (!list_empty(&sublist)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1fe794967211..9500031a1f55 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,7 +198,7 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = (struct rtable *)dst; + struct rtable *rt = dst_rtable(dst); struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; @@ -475,7 +475,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, goto packet_routed; /* Make sure we can route this packet. */ - rt = (struct rtable *)__sk_dst_check(sk, 0); + rt = dst_rtable(__sk_dst_check(sk, 0)); if (!rt) { __be32 daddr; @@ -971,7 +971,7 @@ static int __ip_append_data(struct sock *sk, bool zc = false; unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; - struct rtable *rt = (struct rtable *)cork->dst; + struct rtable *rt = dst_rtable(cork->dst); bool paged, hold_tskey, extra_uref = false; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; @@ -1390,7 +1390,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ip_options *opt = NULL; - struct rtable *rt = (struct rtable *)cork->dst; + struct rtable *rt = dst_rtable(cork->dst); struct iphdr *iph; u8 pmtudisc, ttl; __be16 df = 0; @@ -1473,7 +1473,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * by icmp_hdr(skb)->type. */ if (sk->sk_type == SOCK_RAW && - !inet_test_bit(HDRINCL, sk)) + !(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH)) icmp_type = fl4->fl4_icmp_type; else icmp_type = icmp_hdr(skb)->type; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 177f40c3a8e8..bccef2fcf620 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -543,7 +543,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rt6_info *rt6; __be32 daddr; - rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : + rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) : NULL; daddr = md ? dst : tunnel->parms.iph.daddr; @@ -897,7 +897,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, t->fwmark = fwmark; mtu = ip_tunnel_bind_dev(dev); if (set_mtu) - dev->mtu = mtu; + WRITE_ONCE(dev->mtu, mtu); } dst_cache_reset(&t->dst_cache); netdev_state_change(dev); @@ -1082,7 +1082,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) new_mtu = max_mtu; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); @@ -1120,7 +1120,7 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - return tunnel->net; + return READ_ONCE(tunnel->net); } EXPORT_SYMBOL(ip_tunnel_get_link_net); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index b9062f4552ac..3ab908b74795 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -44,7 +44,7 @@ static int iptable_filter_table_init(struct net *net) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ipt_standard *)repl->entries)[1].target.verdict = - forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; + forward ? -NF_ACCEPT - 1 : NF_DROP - 1; err = ipt_register_table(net, &packet_filter, repl, filter_ops); kfree(repl); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 914bc9c35cc7..6c4664c681ca 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -33,6 +33,7 @@ #include <net/protocol.h> #include <net/tcp.h> #include <net/mptcp.h> +#include <net/proto_memory.h> #include <net/udp.h> #include <net/udplite.h> #include <linux/bottom_half.h> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dcb11f22cbf2..4cb43401e0e0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -612,6 +612,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); + fl4.fl4_icmp_type = 0; + fl4.fl4_icmp_code = 0; + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f89ff2e5a05b..5fd54103174f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -819,7 +819,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf u32 mark = skb->mark; __u8 tos = iph->tos; - rt = (struct rtable *) dst; + rt = dst_rtable(dst); __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); @@ -827,7 +827,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { - struct rtable *rt = (struct rtable *)dst; + struct rtable *rt = dst_rtable(dst); struct dst_entry *ret = dst; if (rt) { @@ -1044,7 +1044,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { - struct rtable *rt = (struct rtable *) dst; + struct rtable *rt = dst_rtable(dst); struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); @@ -1115,7 +1115,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); - rt = (struct rtable *)odst; + rt = dst_rtable(odst); if (odst->obsolete && !odst->ops->check(odst, 0)) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) @@ -1124,7 +1124,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) new = true; } - __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); + __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu); if (!dst_check(&rt->dst, 0)) { if (new) @@ -1181,7 +1181,7 @@ EXPORT_SYMBOL_GPL(ipv4_sk_redirect); INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - struct rtable *rt = (struct rtable *) dst; + struct rtable *rt = dst_rtable(dst); /* All IPV4 dsts are created with ->obsolete set to the value * DST_OBSOLETE_FORCE_CHK which forces validation calls down @@ -1516,10 +1516,8 @@ void rt_del_uncached_list(struct rtable *rt) static void ipv4_dst_destroy(struct dst_entry *dst) { - struct rtable *rt = (struct rtable *)dst; - ip_dst_metrics_put(dst); - rt_del_uncached_list(rt); + rt_del_uncached_list(dst_rtable(dst)); } void rt_flush_dev(struct net_device *dev) @@ -2820,7 +2818,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *ort = (struct rtable *) dst_orig; + struct rtable *ort = dst_rtable(dst_orig); struct rtable *rt; rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); @@ -2865,9 +2863,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, if (flp4->flowi4_proto) { flp4->flowi4_oif = rt->dst.dev->ifindex; - rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, - flowi4_to_flowi(flp4), - sk, 0); + rt = dst_rtable(xfrm_lookup_route(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0)); } return rt; @@ -3498,7 +3496,6 @@ static struct ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static const char ipv4_route_flush_procname[] = "flush"; @@ -3532,7 +3529,6 @@ static struct ctl_table ipv4_route_netns_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { }, }; static __net_init int sysctl_route_net_init(struct net *net) @@ -3550,16 +3546,14 @@ static __net_init int sysctl_route_net_init(struct net *net) /* Don't export non-whitelisted sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) { - if (tbl[0].procname != ipv4_route_flush_procname) { - tbl[0].procname = NULL; + if (tbl[0].procname != ipv4_route_flush_procname) table_size = 0; - } } /* Update the variables to point into the current struct net * except for the first element flush */ - for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++) + for (i = 1; i < table_size; i++) tbl[i].data += (void *)net - (void *)&init_net; } tbl[0].extra1 = net; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ce5d19978a26..162a0a3b6ba5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -575,7 +575,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, - { } }; static struct ctl_table ipv4_net_table[] = { @@ -1502,11 +1501,11 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, }, - { } }; static __net_init int ipv4_sysctl_init_net(struct net *net) { + size_t table_size = ARRAY_SIZE(ipv4_net_table); struct ctl_table *table; table = ipv4_net_table; @@ -1517,7 +1516,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + for (i = 0; i < table_size; i++) { if (table[i].data) { /* Update the variables to point into * the current struct net @@ -1533,7 +1532,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table, - ARRAY_SIZE(ipv4_net_table)); + table_size); if (!net->ipv4.ipv4_hdr) goto err_reg; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f23b97777ea5..681b54e1f3a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -272,13 +272,16 @@ #include <net/inet_common.h> #include <net/tcp.h> #include <net/mptcp.h> +#include <net/proto_memory.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/sock.h> +#include <net/rstreason.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/busy_poll.h> +#include <net/hotdata.h> #include <net/rps.h> /* Track pending CMSGs. */ @@ -1187,7 +1190,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i >= READ_ONCE(sysctl_max_skb_frags)) { + if (i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) { tcp_mark_push(tp, skb); goto new_segment; } @@ -2716,7 +2719,7 @@ void tcp_shutdown(struct sock *sk, int how) /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); @@ -2750,7 +2753,15 @@ static bool tcp_too_many_orphans(int shift) READ_ONCE(sysctl_tcp_max_orphans); } -bool tcp_check_oom(struct sock *sk, int shift) +static bool tcp_out_of_memory(const struct sock *sk) +{ + if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && + sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2)) + return true; + return false; +} + +bool tcp_check_oom(const struct sock *sk, int shift) { bool too_many_orphans, out_of_socket_memory; @@ -2811,7 +2822,8 @@ void __tcp_close(struct sock *sk, long timeout) /* Unread data was tossed, zap the connection. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, sk->sk_allocation); + tcp_send_active_reset(sk, sk->sk_allocation, + SK_RST_REASON_NOT_SPECIFIED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); @@ -2825,7 +2837,7 @@ void __tcp_close(struct sock *sk, long timeout) * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 - * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) + * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), @@ -2885,7 +2897,8 @@ adjudge_to_death: struct tcp_sock *tp = tcp_sk(sk); if (READ_ONCE(tp->linger2) < 0) { tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { @@ -2903,7 +2916,8 @@ adjudge_to_death: if (sk->sk_state != TCP_CLOSE) { if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } else if (!check_net(sock_net(sk))) { @@ -3007,7 +3021,7 @@ int tcp_disconnect(struct sock *sk, int flags) /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ - tcp_send_active_reset(sk, gfp_any()); + tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED); WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) WRITE_ONCE(sk->sk_err, ECONNRESET); @@ -4349,6 +4363,9 @@ zerocopy_rcv_out: return err; } + case TCP_IS_MPTCP: + val = 0; + break; default: return -ENOPROTOOPT; } @@ -4564,7 +4581,8 @@ int tcp_abort(struct sock *sk, int err) smp_wmb(); sk_error_report(sk); if (tcp_need_reset(sk->sk_state)) - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_done(sk); } diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 05dc2d05bc7c..760941e55153 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1024,7 +1024,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) bbr_update_gains(sk); } -__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs) +__bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) { struct bbr *bbr = inet_csk_ca(sk); u32 bw; @@ -1156,8 +1156,6 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { }; BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, bbr_init) BTF_ID_FLAGS(func, bbr_main) BTF_ID_FLAGS(func, bbr_sndbuf_expand) @@ -1166,8 +1164,6 @@ BTF_ID_FLAGS(func, bbr_cwnd_event) BTF_ID_FLAGS(func, bbr_ssthresh) BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) -#endif -#endif BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 44869ea089e3..5dbed91c6178 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -486,16 +486,12 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { }; BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, cubictcp_init) BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) BTF_ID_FLAGS(func, cubictcp_cong_avoid) BTF_ID_FLAGS(func, cubictcp_state) BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) -#endif -#endif BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index e33fbe4933e4..8a45a4aea933 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -58,7 +58,18 @@ struct dctcp { }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ -module_param(dctcp_shift_g, uint, 0644); + +static int dctcp_shift_g_set(const char *val, const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 0, 10); +} + +static const struct kernel_param_ops dctcp_shift_g_ops = { + .set = dctcp_shift_g_set, + .get = param_get_uint, +}; + +module_param_cb(dctcp_shift_g, &dctcp_shift_g_ops, &dctcp_shift_g, 0644); MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha"); static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; @@ -261,16 +272,12 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { }; BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE BTF_ID_FLAGS(func, dctcp_init) BTF_ID_FLAGS(func, dctcp_update_alpha) BTF_ID_FLAGS(func, dctcp_cwnd_event) BTF_ID_FLAGS(func, dctcp_ssthresh) BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) -#endif -#endif BTF_KFUNCS_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 384fa5e2f065..9c04a9c8be9d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -72,6 +72,7 @@ #include <linux/prefetch.h> #include <net/dst.h> #include <net/tcp.h> +#include <net/proto_memory.h> #include <net/inet_common.h> #include <linux/ipsec.h> #include <asm/unaligned.h> @@ -913,7 +914,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->rtt_seq = tp->snd_nxt; tp->mdev_max_us = tcp_rto_min_us(sk); - tcp_bpf_rtt(sk); + tcp_bpf_rtt(sk, mrtt_us, srtt); } } else { /* no previous measure. */ @@ -923,7 +924,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; - tcp_bpf_rtt(sk); + tcp_bpf_rtt(sk, mrtt_us, srtt); } tp->srtt_us = max(1U, srtt); } @@ -3541,7 +3542,7 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, const struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->cong_control) { - icsk->icsk_ca_ops->cong_control(sk, rs); + icsk->icsk_ca_ops->cong_control(sk, ack, flag, rs); return; } @@ -6768,6 +6769,8 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); + if (sk->sk_shutdown & SEND_SHUTDOWN) + tcp_shutdown(sk, SEND_SHUTDOWN); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e06f0cd04f7e..30ef0c8f5e92 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -70,6 +70,7 @@ #include <net/xfrm.h> #include <net/secure_seq.h> #include <net/busy_poll.h> +#include <net/rstreason.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -154,6 +155,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (tcptw->tw_ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), tcptw->tw_ts_recent_stamp)))) { + /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + * and releasing the bucket lock. + */ + if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) + return 0; + /* In case of repair and re-using TIME-WAIT sockets we still * want to be sure that it is safe as above but honor the * sequence numbers and time stamps set as part of the repair @@ -174,7 +181,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) tp->rx_opt.ts_recent = tcptw->tw_ts_recent; tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; } - sock_hold(sktw); + return 1; } @@ -723,7 +730,8 @@ out: * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct { @@ -869,7 +877,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) arg.bound_dev_if = sk->sk_bound_dev_if; - trace_tcp_send_reset(sk, skb); + trace_tcp_send_reset(sk, skb, reason); BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); @@ -1934,7 +1942,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v4_send_reset(rsk, skb); + tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason)); discard: kfree_skb_reason(skb, reason); /* Be careful here. If this function gets more complicated and @@ -2285,7 +2293,10 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v4_send_reset(nsk, skb); + enum sk_rst_reason rst_reason; + + rst_reason = sk_rst_convert_drop_reason(drop_reason); + tcp_v4_send_reset(nsk, skb, rst_reason); goto discard_and_relse; } sock_put(sk); @@ -2364,7 +2375,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v4_send_reset(NULL, skb); + tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); } discard_it: @@ -2416,7 +2427,7 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v4_send_reset(sk, skb); + tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS:; @@ -2426,7 +2437,6 @@ do_time_wait: static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), - .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 146c061145b4..b93619b2384b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -22,6 +22,7 @@ #include <net/tcp.h> #include <net/xfrm.h> #include <net/busy_poll.h> +#include <net/rstreason.h> static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -878,7 +879,7 @@ embryonic_reset: * avoid becoming vulnerable to outside attack aiming at * resetting legit local connections. */ - req->rsk_ops->send_reset(sk, skb); + req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_INVALID_SYN); } else if (fastopen) { /* received a valid RST pkt */ reqsk_fastopen_remove(sk, req, true); tcp_reset(sk, skb); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fab0973f995b..4b791e74529e 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 newip, + __be16 *oldport, __be16 newport) +{ + struct tcphdr *th; + struct iphdr *iph; + + if (*oldip == newip && *oldport == newport) + return; + + th = tcp_hdr(seg); + iph = ip_hdr(seg); + + inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); + *oldport = newport; + + csum_replace4(&iph->check, *oldip, newip); + *oldip = newip; +} + +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct tcphdr *th; + const struct iphdr *iph; + struct sk_buff *seg; + struct tcphdr *th2; + struct iphdr *iph2; + + seg = segs; + th = tcp_hdr(seg); + iph = ip_hdr(seg); + th2 = tcp_hdr(seg->next); + iph2 = ip_hdr(seg->next); + + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ip_hdr(seg); + + __tcpv4_gso_segment_csum(seg, + &iph2->saddr, iph->saddr, + &th2->source, th->source); + __tcpv4_gso_segment_csum(seg, + &iph2->daddr, iph->daddr, + &th2->dest, th->dest); + } + + return segs; +} + +static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv4_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp4_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); @@ -178,63 +245,76 @@ out: return segs; } -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) +struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th) { - struct sk_buff *pp = NULL; + struct tcphdr *th2; struct sk_buff *p; + + list_for_each_entry(p, head, list) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + return p; + } + + return NULL; +} + +struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) +{ + unsigned int thlen, hlen, off; struct tcphdr *th; - struct tcphdr *th2; - unsigned int len; - unsigned int thlen; - __be32 flags; - unsigned int mss = 1; - unsigned int hlen; - unsigned int off; - int flush = 1; - int i; off = skb_gro_offset(skb); hlen = off + sizeof(*th); th = skb_gro_header(skb, hlen, off); if (unlikely(!th)) - goto out; + return NULL; thlen = th->doff * 4; if (thlen < sizeof(*th)) - goto out; + return NULL; hlen = off + thlen; if (!skb_gro_may_pull(skb, hlen)) { th = skb_gro_header_slow(skb, hlen, off); if (unlikely(!th)) - goto out; + return NULL; } skb_gro_pull(skb, thlen); - len = skb_gro_len(skb); - flags = tcp_flag_word(th); - - list_for_each_entry(p, head, list) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; + return th; +} - th2 = tcp_hdr(p); +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th) +{ + unsigned int thlen = th->doff * 4; + struct sk_buff *pp = NULL; + struct sk_buff *p; + struct tcphdr *th2; + unsigned int len; + __be32 flags; + unsigned int mss = 1; + int flush = 1; + int i; - if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } + len = skb_gro_len(skb); + flags = tcp_flag_word(th); - goto found; - } - p = NULL; - goto out_check_final; + p = tcp_gro_lookup(head, th); + if (!p) + goto out_check_final; -found: - /* Include the IP ID check below from the inner most IP hdr */ - flush = NAPI_GRO_CB(p)->flush; - flush |= (__force int)(flags & TCP_FLAG_CWR); + th2 = tcp_hdr(p); + flush = (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); flush |= (__force int)(th->ack_seq ^ th2->ack_seq); @@ -242,16 +322,7 @@ found: flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); - /* When we receive our second frame we can made a decision on if we - * continue this flow as an atomic flow with a fixed ID or if we use - * an incrementing ID. - */ - if (NAPI_GRO_CB(p)->flush_id != 1 || - NAPI_GRO_CB(p)->count != 1 || - !NAPI_GRO_CB(p)->is_atomic) - flush |= NAPI_GRO_CB(p)->flush_id; - else - NAPI_GRO_CB(p)->is_atomic = false; + flush |= gro_receive_network_flush(th, th2, p); mss = skb_shinfo(p)->gso_size; @@ -267,6 +338,18 @@ found: flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); flush |= skb_cmp_decrypted(p, skb); + if (unlikely(NAPI_GRO_CB(p)->is_flist)) { + flush |= (__force int)(flags ^ tcp_flag_word(th2)); + flush |= skb->ip_summed != p->ip_summed; + flush |= skb->csum_level != p->csum_level; + flush |= NAPI_GRO_CB(p)->count >= 64; + + if (flush || skb_gro_receive_list(p, skb)) + mss = 1; + + goto out_check_final; + } + if (flush || skb_gro_receive(p, skb)) { mss = 1; goto out_check_final; @@ -288,7 +371,6 @@ out_check_final: if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) pp = p; -out: NAPI_GRO_CB(skb)->flush |= (flush != 0); return pp; @@ -314,30 +396,80 @@ void tcp_gro_complete(struct sk_buff *skb) } EXPORT_SYMBOL(tcp_gro_complete); +static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th) +{ + const struct iphdr *iph; + struct sk_buff *p; + struct sock *sk; + struct net *net; + int iif, sdif; + + if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) + return; + + p = tcp_gro_lookup(head, th); + if (p) { + NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; + return; + } + + inet_get_iif_sdif(skb, &iif, &sdif); + iph = skb_gro_network_header(skb); + net = dev_net(skb->dev); + sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->saddr, th->source, + iph->daddr, ntohs(th->dest), + iif, sdif); + NAPI_GRO_CB(skb)->is_flist = !sk; + if (sk) + sock_put(sk); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) { + struct tcphdr *th; + /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && skb_gro_checksum_validate(skb, IPPROTO_TCP, - inet_gro_compute_pseudo)) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + inet_gro_compute_pseudo)) + goto flush; + + th = tcp_gro_pull_header(skb); + if (!th) + goto flush; - return tcp_gro_receive(head, skb); + tcp4_check_fraglist_gro(head, skb, th); + + return tcp_gro_receive(head, skb, th); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct tcphdr *th = tcp_hdr(skb); + if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { + skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + __skb_incr_checksum_unnecessary(skb); + + return 0; + } + th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 | - (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID); + (NAPI_GRO_CB(skb)->ip_fixedid * SKB_GSO_TCP_FIXEDID); tcp_gro_complete(skb); return 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ce59e4499b66..95618d0e78e4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -39,6 +39,7 @@ #include <net/tcp.h> #include <net/mptcp.h> +#include <net/proto_memory.h> #include <linux/compiler.h> #include <linux/gfp.h> @@ -231,7 +232,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else - (*rcv_wnd) = min_t(u32, space, U16_MAX); + (*rcv_wnd) = space; if (init_rcv_wnd) *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); @@ -2403,6 +2404,21 @@ commit: return 0; } +/* tcp_mtu_probe() and tcp_grow_skb() can both eat an skb (src) if + * all its payload was moved to another one (dst). + * Make sure to transfer tcp_flags, eor, and tstamp. + */ +static void tcp_eat_one_skb(struct sock *sk, + struct sk_buff *dst, + struct sk_buff *src) +{ + TCP_SKB_CB(dst)->tcp_flags |= TCP_SKB_CB(src)->tcp_flags; + TCP_SKB_CB(dst)->eor = TCP_SKB_CB(src)->eor; + tcp_skb_collapse_tstamp(dst, src); + tcp_unlink_write_queue(src, sk); + tcp_wmem_free_skb(sk, src); +} + /* Create a new MTU probe if we are ready. * MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing @@ -2508,16 +2524,7 @@ static int tcp_mtu_probe(struct sock *sk) copy = min_t(int, skb->len, probe_size - len); if (skb->len <= copy) { - /* We've eaten all the data from this skb. - * Throw it away. */ - TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; - /* If this is the last SKB we copy and eor is set - * we need to propagate it to the new skb. - */ - TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; - tcp_skb_collapse_tstamp(nskb, skb); - tcp_unlink_write_queue(skb, sk); - tcp_wmem_free_skb(sk, skb); + tcp_eat_one_skb(sk, nskb, skb); } else { TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & ~(TCPHDR_FIN|TCPHDR_PSH); @@ -2705,11 +2712,10 @@ static void tcp_grow_skb(struct sock *sk, struct sk_buff *skb, int amount) TCP_SKB_CB(next_skb)->seq += nlen; if (!next_skb->len) { + /* In case FIN is set, we need to update end_seq */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; - TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor; - TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; - tcp_unlink_write_queue(next_skb, sk); - tcp_wmem_free_skb(sk, next_skb); + + tcp_eat_one_skb(sk, skb, next_skb); } } @@ -3595,7 +3601,9 @@ void tcp_send_fin(struct sock *sk) return; } } else { - skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | + __GFP_NOWARN)); if (unlikely(!skb)) return; @@ -3615,7 +3623,8 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by RFC 2525, section 2.17. -DaveM */ -void tcp_send_active_reset(struct sock *sk, gfp_t priority) +void tcp_send_active_reset(struct sock *sk, gfp_t priority, + enum sk_rst_reason reason) { struct sk_buff *skb; @@ -3640,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* skb of trace_tcp_send_reset() keeps the skb that caused RST, * skb here is different to the troublesome skb, so use NULL */ - trace_tcp_send_reset(sk, NULL); + trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED); } /* Send a crossed SYN-ACK during socket establishment. diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 976db57b95d4..83fe7f62f7f1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/gfp.h> #include <net/tcp.h> +#include <net/rstreason.h> static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { @@ -127,7 +128,8 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) (!tp->snd_wnd && !tp->packets_out)) do_reset = true; if (do_reset) - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; @@ -768,7 +770,7 @@ static void tcp_keepalive_timer (struct timer_list *t) goto out; } } - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); goto death; } @@ -795,7 +797,8 @@ static void tcp_keepalive_timer (struct timer_list *t) icsk->icsk_probes_out > 0) || (user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_write_err(sk); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6e2446295089..189c9113fe9a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -543,7 +543,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; @@ -1217,7 +1218,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (connected) - rt = (struct rtable *)sk_dst_check(sk, 0); + rt = dst_rtable(sk_dst_check(sk, 0)); if (!rt) { struct net *net = sock_net(sk); @@ -2711,8 +2712,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk); - fallthrough; - case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) WRITE_ONCE(up->encap_rcv, diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 3498dd1d0694..59448a2dbf2c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -433,33 +433,6 @@ out: return segs; } -static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) -{ - if (unlikely(p->len + skb->len >= 65536)) - return -E2BIG; - - if (NAPI_GRO_CB(p)->last == p) - skb_shinfo(p)->frag_list = skb; - else - NAPI_GRO_CB(p)->last->next = skb; - - skb_pull(skb, skb_gro_offset(skb)); - - NAPI_GRO_CB(p)->last = skb; - NAPI_GRO_CB(p)->count++; - p->data_len += skb->len; - - /* sk ownership - if any - completely transferred to the aggregated packet */ - skb->destructor = NULL; - skb->sk = NULL; - p->truesize += skb->truesize; - p->len += skb->len; - - NAPI_GRO_CB(skb)->same_flow = 1; - - return 0; -} - #define UDP_GRO_CNT_MAX 64 static struct sk_buff *udp_gro_receive_segment(struct list_head *head, @@ -471,6 +444,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *p; unsigned int ulen; int ret = 0; + int flush; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -504,13 +478,15 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return p; } + flush = gro_receive_network_flush(uh, uh2, p); + /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (ulen > ntohs(uh2->len)) { + if (ulen > ntohs(uh2->len) || flush) { pp = p; } else { if (NAPI_GRO_CB(skb)->is_flist) { @@ -718,7 +694,8 @@ EXPORT_SYMBOL(udp_gro_complete); INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index dae35101d189..a620618cc568 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -63,7 +63,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) ip_send_check(iph); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } @@ -113,19 +117,6 @@ static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull /* Must be an IKE packet.. pass it through */ return 1; break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return -EINVAL; - } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && - udpdata32[0] == 0 && udpdata32[1] == 0) { - - /* ESP Packet with Non-IKE marker */ - len = sizeof(struct udphdr) + 2 * sizeof(u32); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; } /* At this point we are sure that this is an ESPinUDP packet, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1dda59e0aeab..0294fef577fa 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -69,7 +69,7 @@ static int xfrm4_get_saddr(struct net *net, int oif, static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct rtable *rt = (struct rtable *)xdst->route; + struct rtable *rt = dst_rtable(xdst->route); const struct flowi4 *fl4 = &fl->u.ip4; xdst->u.rt.rt_iif = fl4->flowi4_iif; @@ -152,7 +152,6 @@ static struct ctl_table xfrm4_policy_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static __net_init int xfrm4_net_sysctl_init(struct net *net) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9aa0900abfa1..5c424a0e7232 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7184,14 +7184,12 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, - { - /* sentinel */ - } }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, struct inet6_dev *idev, struct ipv6_devconf *p) { + size_t table_size = ARRAY_SIZE(addrconf_sysctl); int i, ifindex; struct ctl_table *table; char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; @@ -7200,7 +7198,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, if (!table) goto out; - for (i = 0; table[i].data; i++) { + for (i = 0; i < table_size; i++) { table[i].data += (char *)p - (char *)&ipv6_devconf; /* If one of these is already set, then it is not safe to * overwrite either of them: this makes proc_dointvec_minmax @@ -7215,7 +7213,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); p->sysctl_header = register_net_sysctl_sz(net, path, table, - ARRAY_SIZE(addrconf_sysctl)); + table_size); if (!p->sysctl_header) goto free; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 0f2506e35359..0627c4c18d1a 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -252,9 +252,8 @@ static void aca_free_rcu(struct rcu_head *h) static void aca_put(struct ifacaddr6 *ac) { - if (refcount_dec_and_test(&ac->aca_refcnt)) { - call_rcu(&ac->rcu, aca_free_rcu); - } + if (refcount_dec_and_test(&ac->aca_refcnt)) + call_rcu_hurry(&ac->rcu, aca_free_rcu); } static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6bc0a84c8d05..34a9a5b9ed00 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -384,7 +384,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, __be16 dport) { struct udphdr *uh; - __be32 *udpdata32; unsigned int len; len = skb->len + esp->tailen - skb_transport_offset(skb); @@ -399,12 +398,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, *skb_mac_header(skb) = IPPROTO_UDP; - if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { - udpdata32 = (__be32 *)(uh + 1); - udpdata32[0] = udpdata32[1] = 0; - return (struct ip_esp_hdr *)(udpdata32 + 2); - } - return (struct ip_esp_hdr *)(uh + 1); } @@ -460,7 +453,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); break; case TCP_ENCAP_ESPINTCP: @@ -823,7 +815,6 @@ int esp6_input_done2(struct sk_buff *skb, int err) source = th->source; break; case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: source = uh->source; break; default: @@ -1233,9 +1224,6 @@ static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); - break; #ifdef CONFIG_INET6_ESPINTCP case TCP_ENCAP_ESPINTCP: /* only the length field, TCP encap is done by diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 52c04f0ac498..9e254de7462f 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -233,8 +233,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, rt = pol_lookup_func(lookup, net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); + + if (!idev) + goto again; err = fib6_rule_saddr(net, rule, flags, flp6, - ip6_dst_idev(&rt->dst)->dev); + idev->dev); if (err == -EAGAIN) goto again; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1635da07285f..7b31674644ef 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -212,7 +212,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = true; } else { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); int tmo = net->ipv6.sysctl.icmpv6_time; struct inet_peer *peer; @@ -241,7 +241,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, dst = ip6_route_output(net, sk, fl6); if (!dst->error) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct in6_addr prefsrc; rt6_get_prefsrc(rt, &prefsrc); @@ -616,7 +616,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), - &ipc6, &fl6, (struct rt6_info *)dst, + &ipc6, &fl6, dst_rt6_info(dst), MSG_DONTWAIT)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); @@ -803,7 +803,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, - (struct rt6_info *)dst, MSG_DONTWAIT)) { + dst_rt6_info(dst), MSG_DONTWAIT)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { @@ -1206,7 +1206,6 @@ static struct ctl_table ipv6_icmp_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - { }, }; struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 8c1ce78956ba..0601bad79822 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel( static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); - struct rt6_info *rt = (struct rt6_info *)orig_dst; + struct rt6_info *rt = dst_rt6_info(orig_dst); struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate); struct dst_entry *dst; int err = -EINVAL; @@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = orig_dst->dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst, + fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst), &ip6h->daddr); dst = ip6_route_output(net, NULL, &fl6); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 2e81383b663b..6db71bb1cd30 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -21,6 +21,7 @@ #include <net/secure_seq.h> #include <net/ip.h> #include <net/sock_reuseport.h> +#include <net/tcp.h> u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -289,7 +290,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); - if (twsk_unique(sk, sk2, twp)) + if (sk->sk_protocol == IPPROTO_TCP && + tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b41e35af69ea..bd5aff97d8b1 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -67,7 +67,7 @@ static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto) off += len; } - skb_gro_pull(skb, off - skb_network_offset(skb)); + skb_gro_pull(skb, off - skb_gro_receive_network_offset(skb)); return proto; } @@ -236,7 +236,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, if (unlikely(!iph)) goto out; - skb_set_network_header(skb, off); + NAPI_GRO_CB(skb)->inner_network_offset = off; flush += ntohs(iph->payload_len) != skb->len - hlen; @@ -259,7 +259,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, NAPI_GRO_CB(skb)->proto = proto; flush--; - nlen = skb_network_header_len(skb); + nlen = skb_gro_offset(skb) - off; list_for_each_entry(p, head, list) { const struct ipv6hdr *iph2; @@ -290,19 +290,8 @@ not_same_flow: nlen - sizeof(struct ipv6hdr))) goto not_same_flow; } - /* flush if Traffic Class fields are different */ - NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) | - (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); - NAPI_GRO_CB(p)->flush |= flush; - - /* If the previous IP ID value was based on an atomic - * datagram we can overwrite the value and ignore it. - */ - if (NAPI_GRO_CB(skb)->is_atomic) - NAPI_GRO_CB(p)->flush_id = 0; } - NAPI_GRO_CB(skb)->is_atomic = true; NAPI_GRO_CB(skb)->flush |= flush; skb_gro_postpull_rcsum(skb, iph, nlen); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b9dd3a66e423..27d8725445e3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -120,7 +120,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); rcu_read_lock(); - nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); + nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (unlikely(IS_ERR_OR_NULL(neigh))) { @@ -234,7 +234,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - if (unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { + if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; @@ -599,7 +599,7 @@ int ip6_forward(struct sk_buff *skb) * send a redirect. */ - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) target = &rt->rt6i_gateway; else @@ -856,7 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; bool mono_delivery_time = skb->mono_delivery_time; @@ -1063,7 +1063,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, return NULL; } - rt = (struct rt6_info *)dst; + rt = dst_rt6_info(dst); /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, @@ -1118,7 +1118,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct rt6_info *rt; *dst = ip6_route_output(net, sk, fl6); - rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; + rt = (*dst)->error ? NULL : dst_rt6_info(*dst); rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; @@ -1159,7 +1159,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - rt = (struct rt6_info *) *dst; + rt = dst_rt6_info(*dst); rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); @@ -1423,7 +1423,7 @@ static int __ip6_append_data(struct sock *sk, int offset = 0; bool zc = false; u32 tskey = 0; - struct rt6_info *rt = (struct rt6_info *)cork->dst; + struct rt6_info *rt = dst_rt6_info(cork->dst); bool paged, hold_tskey, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; @@ -1877,7 +1877,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; - struct rt6_info *rt = (struct rt6_info *)cork->base.dst; + struct rt6_info *rt = dst_rt6_info(cork->base.dst); struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; @@ -1933,7 +1933,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, u8 icmp6_type; if (sk->sk_socket->type == SOCK_RAW && - !inet_test_bit(HDRINCL, sk)) + !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH)) icmp6_type = fl6->fl6_icmp_type; else icmp6_type = icmp6_hdr(skb)->icmp6_type; @@ -1949,7 +1949,7 @@ out: int ip6_send_skb(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; err = ip6_local_out(net, skb->sk, skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 57bb3b3ea0c5..9dee0c127955 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1746,7 +1746,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) if (new_mtu > IP_MAX_MTU - dev->hard_header_len) return -EINVAL; } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL(ip6_tnl_change_mtu); @@ -2146,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); - return tunnel->net; + return READ_ONCE(tunnel->net); } EXPORT_SYMBOL(ip6_tnl_get_link_net); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 78344cf3867e..590737c27537 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -666,7 +666,8 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) dev->flags &= ~IFF_POINTOPOINT; if (keep_mtu && dev->mtu) { - dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu); + WRITE_ONCE(dev->mtu, + clamp(dev->mtu, dev->min_mtu, dev->max_mtu)); return; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index cb0ee81a068a..dd342e6ecf3f 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2273,7 +2273,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, int err; struct mr_table *mrt; struct mfc6_cache *cache; - struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ae134634c323..d914b23256ce 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1722,7 +1722,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) if (IS_ERR(dst)) return; - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { ND_PRINTK(2, warn, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index df785ebda0ca..e8992693e14a 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -43,7 +43,7 @@ static int ip6table_filter_table_init(struct net *net) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ip6t_standard *)repl->entries)[1].target.verdict = - forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; + forward ? -NF_ACCEPT - 1 : NF_DROP - 1; err = ip6t_register_table(net, &packet_filter, repl, filter_ops); kfree(repl); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index ce8c14d8aff5..5e1b50c6a44d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -62,7 +62,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - { } }; static int nf_ct_frag6_sysctl_register(struct net *net) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ef2059c88955..88b3fcacd4f9 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -154,7 +154,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false); if (IS_ERR(dst)) return PTR_ERR(dst); - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d896ca7b589..2eedf255600b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -598,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct ipv6hdr *iph; struct sk_buff *skb; int err; - struct rt6_info *rt = (struct rt6_info *)*dstp; + struct rt6_info *rt = dst_rt6_info(*dstp); int hlen = LL_RESERVED_SPACE(rt->dst.dev); int tlen = rt->dst.dev->needed_tailroom; @@ -917,7 +917,7 @@ back_from_confirm: ipc6.opt = opt; lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, - len, 0, &ipc6, &fl6, (struct rt6_info *)dst, + len, 0, &ipc6, &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ee95cdcc8747..327caca64257 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -369,7 +369,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) * the source of the fragment, with the Pointer field set to zero. */ nexthdr = hdr->nexthdr; - if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) { + if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); @@ -436,7 +436,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; /* secret interval has been deprecated */ @@ -449,7 +448,6 @@ static struct ctl_table ip6_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; static int __net_init ip6_frags_ns_sysctl_register(struct net *net) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1f4b935a0e57..bbc2a0dd9314 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -226,7 +226,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { - const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + const struct rt6_info *rt = dst_rt6_info(dst); return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), dst->dev, skb, daddr); @@ -234,8 +234,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) { + const struct rt6_info *rt = dst_rt6_info(dst); struct net_device *dev = dst->dev; - struct rt6_info *rt = (struct rt6_info *)dst; daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); if (!daddr) @@ -354,7 +354,7 @@ EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct fib6_info *from; struct inet6_dev *idev; @@ -373,7 +373,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt = dst_rt6_info(dst); struct inet6_dev *idev = rt->rt6i_idev; if (idev && idev->dev != blackhole_netdev) { @@ -1288,7 +1288,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); if (dst->error == 0) - return (struct rt6_info *) dst; + return dst_rt6_info(dst); dst_release(dst); @@ -2647,7 +2647,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, rcu_read_lock(); dst = ip6_route_output_flags_noref(net, sk, fl6, flags); - rt6 = (struct rt6_info *)dst; + rt6 = dst_rt6_info(dst); /* For dst cached in uncached_list, refcnt is already taken. */ if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) { dst = &net->ipv6.ip6_null_entry->dst; @@ -2661,7 +2661,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; + struct rt6_info *rt, *ort = dst_rt6_info(dst_orig); struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; @@ -2744,7 +2744,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, struct fib6_info *from; struct rt6_info *rt; - rt = container_of(dst, struct rt6_info, dst); + rt = dst_rt6_info(dst); if (rt->sernum) return rt6_is_valid(rt) ? dst : NULL; @@ -2772,7 +2772,7 @@ EXPORT_INDIRECT_CALLABLE(ip6_dst_check); static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *) dst; + struct rt6_info *rt = dst_rt6_info(dst); if (rt) { if (rt->rt6i_flags & RTF_CACHE) { @@ -2796,7 +2796,7 @@ static void ip6_link_failure(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); if (rt) { rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { @@ -2852,7 +2852,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, bool confirm_neigh) { const struct in6_addr *daddr, *saddr; - struct rt6_info *rt6 = (struct rt6_info *)dst; + struct rt6_info *rt6 = dst_rt6_info(dst); /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU) * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it. @@ -4174,7 +4174,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu } } - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_REJECT) { net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); return; @@ -4445,7 +4445,7 @@ static void rtmsg_to_fib6_config(struct net *net, .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? : RT6_TABLE_MAIN, .fc_ifindex = rtmsg->rtmsg_ifindex, - .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER, + .fc_metric = rtmsg->rtmsg_metric, .fc_expires = rtmsg->rtmsg_info, .fc_dst_len = rtmsg->rtmsg_dst_len, .fc_src_len = rtmsg->rtmsg_src_len, @@ -4475,6 +4475,9 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) rtnl_lock(); switch (cmd) { case SIOCADDRT: + /* Only do the default setting of fc_metric in route adding */ + if (cfg.fc_metric == 0) + cfg.fc_metric = IP6_RT_PRIO_USER; err = ip6_route_add(&cfg, GFP_KERNEL, NULL); break; case SIOCDELRT: @@ -5608,7 +5611,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - struct rt6_info *rt6 = (struct rt6_info *)dst; + struct rt6_info *rt6 = dst_rt6_info(dst); struct rt6key *rt6_dst, *rt6_src; u32 *pmetrics, table, rt6_flags; unsigned char nh_flags = 0; @@ -6111,7 +6114,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } - rt = container_of(dst, struct rt6_info, dst); + rt = dst_rt6_info(dst); if (rt->dst.error) { err = rt->dst.error; ip6_rt_put(rt); @@ -6428,7 +6431,6 @@ static struct ctl_table ipv6_route_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - { } }; struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) @@ -6452,10 +6454,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[1].procname = NULL; } return table; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 35508abd76f4..a31521e270f7 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -551,6 +551,8 @@ out_unregister_iptun: #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL out_unregister_genl: +#endif +#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC) genl_unregister_family(&seg6_genl_family); #endif out_unregister_pernet: @@ -564,8 +566,9 @@ void seg6_exit(void) seg6_hmac_exit(); #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_local_exit(); seg6_iptunnel_exit(); #endif - unregister_pernet_subsys(&ip6_segments_ops); genl_unregister_family(&seg6_genl_family); + unregister_pernet_subsys(&ip6_segments_ops); } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 861e0366f549..bbf5b84a70fc 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -356,6 +356,7 @@ static int seg6_hmac_init_algo(void) struct crypto_shash *tfm; struct shash_desc *shash; int i, alg_count, cpu; + int ret = -ENOMEM; alg_count = ARRAY_SIZE(hmac_algos); @@ -366,12 +367,14 @@ static int seg6_hmac_init_algo(void) algo = &hmac_algos[i]; algo->tfms = alloc_percpu(struct crypto_shash *); if (!algo->tfms) - return -ENOMEM; + goto error_out; for_each_possible_cpu(cpu) { tfm = crypto_alloc_shash(algo->name, 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); + if (IS_ERR(tfm)) { + ret = PTR_ERR(tfm); + goto error_out; + } p_tfm = per_cpu_ptr(algo->tfms, cpu); *p_tfm = tfm; } @@ -383,18 +386,22 @@ static int seg6_hmac_init_algo(void) algo->shashs = alloc_percpu(struct shash_desc *); if (!algo->shashs) - return -ENOMEM; + goto error_out; for_each_possible_cpu(cpu) { shash = kzalloc_node(shsize, GFP_KERNEL, cpu_to_node(cpu)); if (!shash) - return -ENOMEM; + goto error_out; *per_cpu_ptr(algo->shashs, cpu) = shash; } } return 0; + +error_out: + seg6_hmac_exit(); + return ret; } int __init seg6_hmac_init(void) @@ -412,22 +419,29 @@ int __net_init seg6_hmac_net_init(struct net *net) void seg6_hmac_exit(void) { struct seg6_hmac_algo *algo = NULL; + struct crypto_shash *tfm; + struct shash_desc *shash; int i, alg_count, cpu; alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { algo = &hmac_algos[i]; - for_each_possible_cpu(cpu) { - struct crypto_shash *tfm; - struct shash_desc *shash; - shash = *per_cpu_ptr(algo->shashs, cpu); - kfree(shash); - tfm = *per_cpu_ptr(algo->tfms, cpu); - crypto_free_shash(tfm); + if (algo->shashs) { + for_each_possible_cpu(cpu) { + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + } + free_percpu(algo->shashs); + } + + if (algo->tfms) { + for_each_possible_cpu(cpu) { + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); } - free_percpu(algo->tfms); - free_percpu(algo->shashs); } } EXPORT_SYMBOL(seg6_hmac_exit); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 03b877ff4558..a75df2ec8db0 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -459,10 +459,8 @@ static int seg6_input_core(struct net *net, struct sock *sk, int err; err = seg6_do_srh(skb); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } + if (unlikely(err)) + goto drop; slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); @@ -486,7 +484,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) - return err; + goto drop; if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, @@ -494,6 +492,9 @@ static int seg6_input_core(struct net *net, struct sock *sk, skb_dst(skb)->dev, seg6_input_finish); return seg6_input_finish(dev_net(skb->dev), NULL, skb); +drop: + kfree_skb(skb); + return err; } static int seg6_input_nf(struct sk_buff *skb) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 75de55f907b0..c060285ff47f 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -213,7 +213,6 @@ static struct ctl_table ipv6_table_template[] = { .proc_handler = proc_doulongvec_minmax, .extra2 = &ioam6_id_wide_max, }, - { } }; static struct ctl_table ipv6_rotable[] = { @@ -248,11 +247,11 @@ static struct ctl_table ipv6_rotable[] = { .proc_handler = proc_dointvec, }, #endif /* CONFIG_NETLABEL */ - { } }; static int __net_init ipv6_sysctl_net_init(struct net *net) { + size_t table_size = ARRAY_SIZE(ipv6_table_template); struct ctl_table *ipv6_table; struct ctl_table *ipv6_route_table; struct ctl_table *ipv6_icmp_table; @@ -264,7 +263,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) if (!ipv6_table) goto out; /* Update the variables to point into the current struct net */ - for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++) + for (i = 0; i < table_size; i++) ipv6_table[i].data += (void *)net - (void *)&init_net; ipv6_route_table = ipv6_route_sysctl_init(net); @@ -276,8 +275,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) goto out_ipv6_route_table; net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6", - ipv6_table, - ARRAY_SIZE(ipv6_table_template)); + ipv6_table, table_size); if (!net->ipv6.sysctl.hdr) goto out_ipv6_icmp_table; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb7c3caf4f85..4c3605485b68 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -60,6 +60,7 @@ #include <net/secure_seq.h> #include <net/hotdata.h> #include <net/busy_poll.h> +#include <net/rstreason.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -69,7 +70,8 @@ #include <trace/events/tcp.h> -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -95,11 +97,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; - sk->sk_rx_dst_cookie = rt6_get_cookie(rt); + sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } } @@ -1008,7 +1008,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 kfree_skb(buff); } -static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, + enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -1130,7 +1131,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) label = ip6_flowlabel(ipv6h); } - trace_tcp_send_reset(sk, skb); + trace_tcp_send_reset(sk, skb, reason); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, ipv6_get_dsfield(ipv6h), label, priority, txhash, @@ -1677,7 +1678,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - tcp_v6_send_reset(sk, skb); + tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: if (opt_skb) __kfree_skb(opt_skb); @@ -1862,7 +1863,10 @@ lookup: } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { - tcp_v6_send_reset(nsk, skb); + enum sk_rst_reason rst_reason; + + rst_reason = sk_rst_convert_drop_reason(drop_reason); + tcp_v6_send_reset(nsk, skb, rst_reason); goto discard_and_relse; } sock_put(sk); @@ -1939,7 +1943,7 @@ csum_error: bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { - tcp_v6_send_reset(NULL, skb); + tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); } discard_it: @@ -1995,7 +1999,7 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: - tcp_v6_send_reset(sk, skb); + tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS: @@ -2045,7 +2049,6 @@ void tcp_v6_early_demux(struct sk_buff *skb) static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), - .twsk_unique = tcp_twsk_unique, .twsk_destructor = tcp_twsk_destructor, }; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 4b07d1e6c952..23971903e66d 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -7,31 +7,84 @@ */ #include <linux/indirect_call_wrapper.h> #include <linux/skbuff.h> +#include <net/inet6_hashtables.h> #include <net/gro.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/ip6_checksum.h> #include "ip6_offload.h" +static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th) +{ +#if IS_ENABLED(CONFIG_IPV6) + const struct ipv6hdr *hdr; + struct sk_buff *p; + struct sock *sk; + struct net *net; + int iif, sdif; + + if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) + return; + + p = tcp_gro_lookup(head, th); + if (p) { + NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; + return; + } + + inet6_get_iif_sdif(skb, &iif, &sdif); + hdr = skb_gro_network_header(skb); + net = dev_net(skb->dev); + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + iif, sdif); + NAPI_GRO_CB(skb)->is_flist = !sk; + if (sk) + sock_put(sk); +#endif /* IS_ENABLED(CONFIG_IPV6) */ +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) { + struct tcphdr *th; + /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && skb_gro_checksum_validate(skb, IPPROTO_TCP, - ip6_gro_compute_pseudo)) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + ip6_gro_compute_pseudo)) + goto flush; - return tcp_gro_receive(head, skb); + th = tcp_gro_pull_header(skb); + if (!th) + goto flush; + + tcp6_check_fraglist_gro(head, skb, th); + + return tcp_gro_receive(head, skb, th); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) { - const struct ipv6hdr *iph = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct tcphdr *th = tcp_hdr(skb); + if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { + skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + __skb_incr_checksum_unnecessary(skb); + + return 0; + } + th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, &iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; @@ -40,6 +93,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) return 0; } +static void __tcpv6_gso_segment_csum(struct sk_buff *seg, + __be16 *oldport, __be16 newport) +{ + struct tcphdr *th; + + if (*oldport == newport) + return; + + th = tcp_hdr(seg); + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); + *oldport = newport; +} + +static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct tcphdr *th; + const struct ipv6hdr *iph; + struct sk_buff *seg; + struct tcphdr *th2; + struct ipv6hdr *iph2; + + seg = segs; + th = tcp_hdr(seg); + iph = ipv6_hdr(seg); + th2 = tcp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ipv6_hdr(seg); + + iph2->saddr = iph->saddr; + iph2->daddr = iph->daddr; + __tcpv6_gso_segment_csum(seg, &th2->source, th->source); + __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest); + } + + return segs; +} + +static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv6_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -51,6 +159,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp6_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 085ee236d9a1..c81a07ac0463 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -285,7 +285,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct ipv6hdr *iph = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; @@ -910,11 +911,8 @@ start_lookup: static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - if (udp_sk_rx_dst_set(sk, dst)) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - - sk->sk_rx_dst_cookie = rt6_get_cookie(rt); - } + if (udp_sk_rx_dst_set(sk, dst)) + sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and @@ -1585,7 +1583,7 @@ back_from_confirm: skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, - (struct rt6_info *)dst, + dst_rt6_info(dst), msg->msg_flags, &cork); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) @@ -1612,7 +1610,7 @@ do_append_data: ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, fl6, (struct rt6_info *)dst, + &ipc6, fl6, dst_rt6_info(dst), corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index bbd347de00b4..b41152dd4246 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -164,7 +164,8 @@ flush: INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a17d783dc7c0..4abc5e9d6322 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -58,7 +58,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } @@ -109,19 +113,6 @@ static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull /* Must be an IKE packet.. pass it through */ return 1; break; - case UDP_ENCAP_ESPINUDP_NON_IKE: - /* Check if this is a keepalive packet. If so, eat it. */ - if (len == 1 && udpdata[0] == 0xff) { - return -EINVAL; - } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && - udpdata32[0] == 0 && udpdata32[1] == 0) { - - /* ESP Packet with Non-IKE marker */ - len = sizeof(struct udphdr) + 2 * sizeof(u32); - } else - /* Must be an IKE packet.. pass it through */ - return 1; - break; } /* At this point we are sure that this is an ESPinUDP packet, @@ -279,6 +270,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, if (!x) continue; + if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); + xfrm_state_put(x); + x = NULL; + continue; + } + spin_lock(&x->lock); if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) && diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4891012b692f..cc885d3aa9e5 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -70,7 +70,7 @@ static int xfrm6_get_saddr(struct net *net, int oif, static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct rt6_info *rt = (struct rt6_info *)xdst->route; + struct rt6_info *rt = dst_rt6_info(xdst->route); xdst->u.dst.dev = dev; netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC); @@ -184,7 +184,6 @@ static struct ctl_table xfrm6_policy_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static int __net_init xfrm6_net_sysctl_init(struct net *net) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c951bb9cc2e0..c3b0b610b0aa 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -795,7 +795,7 @@ done: /* Accept a pending connection */ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *nsk; @@ -809,7 +809,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, goto done; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); /* Wait for an incoming connection */ add_wait_queue_exclusive(sk_sleep(sk), &wait); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 5e37a8ceebcb..b7bf34a5eb37 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -73,8 +73,42 @@ const struct bus_type iucv_bus = { }; EXPORT_SYMBOL(iucv_bus); -struct device *iucv_root; -EXPORT_SYMBOL(iucv_root); +static struct device *iucv_root; + +static void iucv_release_device(struct device *device) +{ + kfree(device); +} + +struct device *iucv_alloc_device(const struct attribute_group **attrs, + struct device_driver *driver, + void *priv, const char *fmt, ...) +{ + struct device *dev; + va_list vargs; + int rc; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + goto out_error; + va_start(vargs, fmt); + rc = dev_set_name(dev, fmt, vargs); + va_end(vargs); + if (rc) + goto out_error; + dev->bus = &iucv_bus; + dev->parent = iucv_root; + dev->driver = driver; + dev->groups = attrs; + dev->release = iucv_release_device; + dev_set_drvdata(dev, priv); + return dev; + +out_error: + kfree(dev); + return NULL; +} +EXPORT_SYMBOL(iucv_alloc_device); static int iucv_available; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8d21ff25f160..88a34db265d8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -794,6 +794,7 @@ static void l2tp_session_queue_purge(struct l2tp_session *session) static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) { struct l2tp_session *session = NULL; + struct l2tp_tunnel *orig_tunnel = tunnel; unsigned char *ptr, *optr; u16 hdrflags; u32 tunnel_id, session_id; @@ -819,13 +820,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Get L2TP header flags */ hdrflags = ntohs(*(__be16 *)ptr); - /* Check protocol version */ + /* Get protocol version */ version = hdrflags & L2TP_HDR_VER_MASK; - if (version != tunnel->version) { - pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", - tunnel->name, version, tunnel->version); - goto invalid; - } /* Get length of L2TP packet */ length = skb->len; @@ -837,7 +833,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Skip flags */ ptr += 2; - if (tunnel->version == L2TP_HDR_VER_2) { + if (version == L2TP_HDR_VER_2) { /* If length is present, skip it */ if (hdrflags & L2TP_HDRFLAG_L) ptr += 2; @@ -845,6 +841,20 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Extract tunnel and session ID */ tunnel_id = ntohs(*(__be16 *)ptr); ptr += 2; + + if (tunnel_id != tunnel->tunnel_id) { + /* We are receiving trafic for another tunnel, probably + * because we have several tunnels between the same + * IP/port quadruple, look it up. + */ + struct l2tp_tunnel *alt_tunnel; + + alt_tunnel = l2tp_tunnel_get(tunnel->l2tp_net, tunnel_id); + if (!alt_tunnel) + goto pass; + tunnel = alt_tunnel; + } + session_id = ntohs(*(__be16 *)ptr); ptr += 2; } else { @@ -854,6 +864,13 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) ptr += 4; } + /* Check protocol version */ + if (version != tunnel->version) { + pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", + tunnel->name, version, tunnel->version); + goto invalid; + } + /* Find the session context */ session = l2tp_tunnel_get_session(tunnel, session_id); if (!session || !session->recv_skb) { @@ -875,6 +892,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); + if (tunnel != orig_tunnel) + l2tp_tunnel_dec_refcount(tunnel); + return 0; invalid: @@ -884,25 +904,26 @@ pass: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); + if (tunnel != orig_tunnel) + l2tp_tunnel_dec_refcount(tunnel); + return 1; } -/* UDP encapsulation receive handler. See net/ipv4/udp.c. - * Return codes: - * 0 : success. - * <0: error - * >0: skb should be passed up to userspace as UDP. +/* UDP encapsulation receive and error receive handlers. + * See net/ipv4/udp.c for details. + * + * Note that these functions are called from inside an + * RCU-protected region, but without the socket being locked. + * + * Hence we use rcu_dereference_sk_user_data to access the + * tunnel data structure rather the usual l2tp_sk_to_tunnel + * accessor function. */ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - /* Note that this is called from the encap_rcv hook inside an - * RCU-protected region, but without the socket being locked. - * Hence we use rcu_dereference_sk_user_data to access the - * tunnel data structure rather the usual l2tp_sk_to_tunnel - * accessor function. - */ tunnel = rcu_dereference_sk_user_data(sk); if (!tunnel) goto pass_up; @@ -919,6 +940,29 @@ pass_up: } EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv); +static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) +{ + struct l2tp_tunnel *tunnel; + + tunnel = rcu_dereference_sk_user_data(sk); + if (!tunnel || tunnel->fd < 0) + return; + + sk->sk_err = err; + sk_error_report(sk); + + if (ip_hdr(skb)->version == IPVERSION) { + if (inet_test_bit(RECVERR, sk)) + return ip_icmp_error(sk, skb, err, port, info, payload); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (inet6_test_bit(RECVERR6, sk)) + return ipv6_icmp_error(sk, skb, err, port, info, payload); +#endif + } +} + /************************************************************************ * Transmit handling ***********************************************************************/ @@ -1493,6 +1537,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, .sk_user_data = tunnel, .encap_type = UDP_ENCAP_L2TPINUDP, .encap_rcv = l2tp_udp_encap_recv, + .encap_err_rcv = l2tp_udp_encap_err_recv, .encap_destroy = l2tp_udp_encap_destroy, }; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 39e487ccc468..8ba00ad433c2 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -127,6 +127,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, /* checksums verified by L2TP */ skb->ip_summed = CHECKSUM_NONE; + /* drop outer flow-hash */ + skb_clear_hash(skb); + skb_dst_drop(skb); nf_reset_ct(skb); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 970af3983d11..19c8cc5289d5 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -459,7 +459,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &inet->cork.fl.u.ip4; if (connected) - rt = (struct rtable *)__sk_dst_check(sk, 0); + rt = dst_rtable(__sk_dst_check(sk, 0)); rcu_read_lock(); if (!rt) { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 7bf14cf9ffaa..8780ec64f376 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -630,7 +630,7 @@ back_from_confirm: ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, - &fl6, (struct rt6_info *)dst, + &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) ip6_flush_pending_frames(sk); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index fde1140d899e..4eb52add7103 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -688,14 +688,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) * llc_ui_accept - accept a new incoming connection. * @sock: Socket which connections arrive on. * @newsock: Socket to move incoming connection to. - * @flags: User specified operational flags. - * @kern: If the socket is kernel internal + * @arg: User specified arguments * * Accept a new incoming connection. * Returns 0 upon success, negative otherwise. */ -static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int llc_ui_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk, *newsk; struct llc_sock *llc, *newllc; diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 8443a6d841b0..72e101135f8c 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -44,11 +44,6 @@ static struct ctl_table llc2_timeout_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { }, -}; - -static struct ctl_table llc_station_table[] = { - { }, }; static struct ctl_table_header *llc2_timeout_header; @@ -56,8 +51,9 @@ static struct ctl_table_header *llc_station_header; int __init llc_sysctl_init(void) { + struct ctl_table empty[1] = {}; llc2_timeout_header = register_net_sysctl(&init_net, "net/llc/llc2/timeout", llc2_timeout_table); - llc_station_header = register_net_sysctl(&init_net, "net/llc/station", llc_station_table); + llc_station_header = register_net_sysctl_sz(&init_net, "net/llc/station", empty, 0); if (!llc2_timeout_header || !llc_station_header) { llc_sysctl_exit(); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b08e5d7687e3..83ad6c9709fe 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2958,8 +2958,9 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(int) * NUM_NL80211_BANDS); - ieee80211_link_info_change_notify(sdata, &sdata->deflink, - BSS_CHANGED_MCAST_RATE); + if (ieee80211_sdata_running(sdata)) + ieee80211_link_info_change_notify(sdata, &sdata->deflink, + BSS_CHANGED_MCAST_RATE); return 0; } @@ -4016,7 +4017,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; } - link_data->csa_chanreq = chanreq; + link_data->csa_chanreq = chanreq; link_conf->csa_active = true; if (params->block_tx && @@ -4027,7 +4028,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } cfg80211_ch_switch_started_notify(sdata->dev, - &link_data->csa_chanreq.oper, 0, + &link_data->csa_chanreq.oper, link_id, params->count, params->block_tx); if (changed) { diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 9f5ffdc9db28..ecbb042dd043 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, if (!he_spr_ie_elem) return; + + he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control; data = he_spr_ie_elem->optional; if (he_spr_ie_elem->he_sr_control & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) - data++; + he_obss_pd->non_srg_max_offset = *data++; + if (he_spr_ie_elem->he_sr_control & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) { - he_obss_pd->max_offset = *data++; he_obss_pd->min_offset = *data++; + he_obss_pd->max_offset = *data++; + memcpy(he_obss_pd->bss_color_bitmap, data, 8); + data += 8; + memcpy(he_obss_pd->partial_bssid_bitmap, data, 8); he_obss_pd->enable = true; } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb62b7d4b4f7..3cedfdc9099b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1845,6 +1845,8 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata, void ieee80211_configure_filter(struct ieee80211_local *local); u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); +void ieee80211_handle_queued_frames(struct ieee80211_local *local); + u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local); int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, u64 *cookie, gfp_t gfp); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4eaea0a9975b..1132dea0e290 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -423,9 +423,8 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_ERP_SLOT; } -static void ieee80211_tasklet_handler(struct tasklet_struct *t) +void ieee80211_handle_queued_frames(struct ieee80211_local *local) { - struct ieee80211_local *local = from_tasklet(local, t, tasklet); struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -450,6 +449,13 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t) } } +static void ieee80211_tasklet_handler(struct tasklet_struct *t) +{ + struct ieee80211_local *local = from_tasklet(local, t, tasklet); + + ieee80211_handle_queued_frames(local); +} + static void ieee80211_restart_work(struct work_struct *work) { struct ieee80211_local *local = diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index cbc9b5e40cb3..6d4510221c98 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1776,6 +1776,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ifmsh->last_preq = jiffies; ifmsh->next_perr = jiffies; ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE; + ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index a6b62169f084..c0a5c75cddcb 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1017,10 +1017,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, */ void mesh_path_flush_pending(struct mesh_path *mpath) { + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct mesh_preq_queue *preq, *tmp; struct sk_buff *skb; while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) mesh_path_discard_frame(mpath->sdata, skb); + + spin_lock_bh(&ifmsh->mesh_preq_queue_lock); + list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) { + if (ether_addr_equal(mpath->dst, preq->dst)) { + list_del(&preq->list); + kfree(preq); + --ifmsh->preq_queue_len; + } + } + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); } /** diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 55e5497f8978..055a60e90979 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -111,7 +111,7 @@ ieee80211_parse_extension_element(u32 *crc, if (params->mode < IEEE80211_CONN_MODE_HE) break; if (len >= sizeof(*elems->he_spr) && - len >= ieee80211_he_spr_size(data)) + len >= ieee80211_he_spr_size(data) - 1) elems->he_spr = data; break; case WLAN_EID_EXT_HE_6GHZ_CAPA: diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 3da1c5c45035..8ecc4b710b0e 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -744,15 +744,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->hw_scan_ies_bufsize *= n_bands; } - local->hw_scan_req = kmalloc( - sizeof(*local->hw_scan_req) + - req->n_channels * sizeof(req->channels[0]) + - local->hw_scan_ies_bufsize, GFP_KERNEL); + local->hw_scan_req = kmalloc(struct_size(local->hw_scan_req, + req.channels, + req->n_channels) + + local->hw_scan_ies_bufsize, + GFP_KERNEL); if (!local->hw_scan_req) return -ENOMEM; local->hw_scan_req->req.ssids = req->ssids; local->hw_scan_req->req.n_ssids = req->n_ssids; + /* None of the channels are actually set + * up but let UBSAN know the boundaries. + */ + local->hw_scan_req->req.n_channels = req->n_channels; + ies = (u8 *)local->hw_scan_req + sizeof(*local->hw_scan_req) + req->n_channels * sizeof(req->channels[0]); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index da5fdd6f5c85..aa22f09e6d14 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1724,7 +1724,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) skb_queue_head_init(&pending); /* sync with ieee80211_tx_h_unicast_ps_buf */ - spin_lock(&sta->ps_lock); + spin_lock_bh(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; @@ -1753,7 +1753,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) */ clear_sta_flag(sta, WLAN_STA_PSPOLL); clear_sta_flag(sta, WLAN_STA_UAPSD); - spin_unlock(&sta->ps_lock); + spin_unlock_bh(&sta->ps_lock); atomic_dec(&ps->num_sta_ps); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 8e758b5074bd..b26aacfbc622 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -33,7 +33,7 @@ __string(vif_name, sdata->name) #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ - __assign_str(vif_name, sdata->name) + __assign_str(vif_name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0b893e958959..283bfc99417e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1567,6 +1567,8 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, void ieee80211_stop_device(struct ieee80211_local *local) { + ieee80211_handle_queued_frames(local); + ieee80211_led_radio(local, false); ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5d2012d1cf4a..2dc7a908a6bb 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1377,13 +1377,13 @@ static const struct ctl_table mpls_dev_table[] = { .proc_handler = mpls_conf_proc, .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), }, - { } }; static int mpls_dev_sysctl_register(struct net_device *dev, struct mpls_dev *mdev) { char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; + size_t table_size = ARRAY_SIZE(mpls_dev_table); struct net *net = dev_net(dev); struct ctl_table *table; int i; @@ -1395,7 +1395,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, /* Table data contains only offsets relative to the base of * the mdev at this point, so make them absolute. */ - for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) { + for (i = 0; i < table_size; i++) { table[i].data = (char *)mdev + (uintptr_t)table[i].data; table[i].extra1 = mdev; table[i].extra2 = net; @@ -1403,8 +1403,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); - mdev->sysctl = register_net_sysctl_sz(net, path, table, - ARRAY_SIZE(mpls_dev_table)); + mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size); if (!mdev->sysctl) goto free; @@ -2653,11 +2652,11 @@ static const struct ctl_table mpls_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &ttl_max, }, - { } }; static int mpls_net_init(struct net *net) { + size_t table_size = ARRAY_SIZE(mpls_table); struct ctl_table *table; int i; @@ -2673,11 +2672,11 @@ static int mpls_net_init(struct net *net) /* Table data contains only offsets relative to the base of * the mdev at this point, so make them absolute. */ - for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++) + for (i = 0; i < table_size; i++) table[i].data = (char *)net + (uintptr_t)table[i].data; net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table, - ARRAY_SIZE(mpls_table)); + table_size); if (net->mpls.ctl == NULL) { kfree(table); return -ENOMEM; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 8fc790f2a01b..4385fd3b13be 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -81,7 +81,7 @@ static int mpls_xmit(struct sk_buff *skb) ttl = net->mpls.default_ttl; else ttl = ip_hdr(skb)->ttl; - rt = (struct rtable *)dst; + rt = dst_rtable(dst); } else if (dst->ops->family == AF_INET6) { if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) ttl = tun_encap_info->default_ttl; @@ -90,7 +90,7 @@ static int mpls_xmit(struct sk_buff *skb) ttl = net->mpls.default_ttl; else ttl = ipv6_hdr(skb)->hop_limit; - rt6 = (struct rt6_info *)dst; + rt6 = dst_rt6_info(dst); } else { goto drop; } diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 8d661156ab8c..98b1dd498ff6 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -92,10 +92,65 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; pernet->pm_type = MPTCP_PM_TYPE_KERNEL; - strcpy(pernet->scheduler, "default"); + strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); } #ifdef CONFIG_SYSCTL +static int mptcp_set_scheduler(const struct net *net, const char *name) +{ + struct mptcp_pernet *pernet = mptcp_get_pernet(net); + struct mptcp_sched_ops *sched; + int ret = 0; + + rcu_read_lock(); + sched = mptcp_sched_find(name); + if (sched) + strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +static int proc_scheduler(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + const struct net *net = current->nsproxy->net_ns; + char val[MPTCP_SCHED_NAME_MAX]; + struct ctl_table tbl = { + .data = val, + .maxlen = MPTCP_SCHED_NAME_MAX, + }; + int ret; + + strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) + ret = mptcp_set_scheduler(net, val); + + return ret; +} + +static int proc_available_schedulers(struct ctl_table *ctl, + int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + + mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + kfree(tbl.data); + + return ret; +} + static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", @@ -148,7 +203,13 @@ static struct ctl_table mptcp_sysctl_table[] = { .procname = "scheduler", .maxlen = MPTCP_SCHED_NAME_MAX, .mode = 0644, - .proc_handler = proc_dostring, + .proc_handler = proc_scheduler, + }, + { + .procname = "available_schedulers", + .maxlen = MPTCP_SCHED_BUF_MAX, + .mode = 0644, + .proc_handler = proc_available_schedulers, }, { .procname = "close_timeout", @@ -156,7 +217,6 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - {} }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) @@ -178,7 +238,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[4].data = &pernet->stale_loss_cnt; table[5].data = &pernet->pm_type; table[6].data = &pernet->scheduler; - table[7].data = &pernet->close_timeout; + /* table[7] is for available_schedulers which is read-only info */ + table[8].data = &pernet->close_timeout; hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index dd7fd1f246b5..2704afd0dfe4 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <net/inet_common.h> + enum linux_mptcp_mib_field { MPTCP_MIB_NUM = 0, MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5c17d39146ea..7f53e022e27e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -14,6 +14,7 @@ #include "protocol.h" #include "mib.h" +#include "mptcp_pm_gen.h" static int pm_nl_pernet_id; diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 9f5d422d5ef6..f0a4590506c6 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -6,6 +6,7 @@ #include "protocol.h" #include "mib.h" +#include "mptcp_pm_gen.h" void mptcp_free_local_addr_list(struct mptcp_sock *msk) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f8bc34f0d973..7d44196ec5b6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -20,6 +20,7 @@ #include <net/transp_v6.h> #endif #include <net/mptcp.h> +#include <net/hotdata.h> #include <net/xfrm.h> #include <asm/ioctls.h> #include "protocol.h" @@ -1272,7 +1273,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); - if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) { + if (!can_coalesce && i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) { tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } @@ -2569,7 +2570,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) slow = lock_sock_fast(tcp_sk); if (tcp_sk->sk_state != TCP_CLOSE) { - tcp_send_active_reset(tcp_sk, GFP_ATOMIC); + mptcp_send_active_reset_reason(tcp_sk); tcp_set_state(tcp_sk, TCP_CLOSE); } unlock_sock_fast(tcp_sk, slow); @@ -2813,7 +2814,8 @@ static void mptcp_ca_reset(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); tcp_assign_congestion_control(sk); - strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name); + strscpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name, + sizeof(mptcp_sk(sk)->ca_name)); /* no need to keep a reference to the ops, the name will suffice */ tcp_cleanup_congestion_control(sk); @@ -3730,6 +3732,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); @@ -3877,11 +3882,10 @@ unlock: } static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct sock *ssk, *newsk; - int err; pr_debug("msk=%p", msk); @@ -3893,9 +3897,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, return -EINVAL; pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); - newsk = inet_csk_accept(ssk, flags, &err, kern); + newsk = inet_csk_accept(ssk, arg); if (!newsk) - return err; + return arg->err; pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); if (sk_is_mptcp(newsk)) { @@ -3916,7 +3920,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - newsk->sk_kern_sock = kern; + newsk->sk_kern_sock = arg->kern; lock_sock(newsk); __inet_accept(sock, newsock, newsk); @@ -3945,7 +3949,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, } } else { tcpfallback: - newsk->sk_kern_sock = kern; + newsk->sk_kern_sock = arg->kern; lock_sock(newsk); __inet_accept(sock, newsock, newsk); /* we are being invoked after accepting a non-mp-capable @@ -4165,7 +4169,7 @@ int __init mptcp_proto_v6_init(void) int err; mptcp_v6_prot = mptcp_prot; - strcpy(mptcp_v6_prot.name, "MPTCPv6"); + strscpy(mptcp_v6_prot.name, "MPTCPv6", sizeof(mptcp_v6_prot.name)); mptcp_v6_prot.slab = NULL; mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock); mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fdfa843e2d88..7aa47e2dd52b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -12,8 +12,7 @@ #include <net/inet_connection_sock.h> #include <uapi/linux/mptcp.h> #include <net/genetlink.h> - -#include "mptcp_pm_gen.h" +#include <net/rstreason.h> #define MPTCP_SUPPORTED_VERSION 1 @@ -311,6 +310,9 @@ struct mptcp_sock { free_first:1, rcvspace_init:1; u32 notsent_lowat; + int keepalive_cnt; + int keepalive_idle; + int keepalive_intvl; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -581,6 +583,43 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) WRITE_ONCE(subflow->local_id, -1); } +/* Convert reset reasons in MPTCP to enum sk_rst_reason type */ +static inline enum sk_rst_reason +sk_rst_convert_mptcp_reason(u32 reason) +{ + switch (reason) { + case MPTCP_RST_EUNSPEC: + return SK_RST_REASON_MPTCP_RST_EUNSPEC; + case MPTCP_RST_EMPTCP: + return SK_RST_REASON_MPTCP_RST_EMPTCP; + case MPTCP_RST_ERESOURCE: + return SK_RST_REASON_MPTCP_RST_ERESOURCE; + case MPTCP_RST_EPROHIBIT: + return SK_RST_REASON_MPTCP_RST_EPROHIBIT; + case MPTCP_RST_EWQ2BIG: + return SK_RST_REASON_MPTCP_RST_EWQ2BIG; + case MPTCP_RST_EBADPERF: + return SK_RST_REASON_MPTCP_RST_EBADPERF; + case MPTCP_RST_EMIDDLEBOX: + return SK_RST_REASON_MPTCP_RST_EMIDDLEBOX; + default: + /* It should not happen, or else errors may occur + * in MPTCP layer + */ + return SK_RST_REASON_ERROR; + } +} + +static inline void +mptcp_send_active_reset_reason(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + enum sk_rst_reason reason; + + reason = sk_rst_convert_mptcp_reason(subflow->reset_reason); + tcp_send_active_reset(sk, GFP_ATOMIC, reason); +} + static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { @@ -645,6 +684,7 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); +void mptcp_get_available_schedulers(char *buf, size_t maxlen); void __mptcp_subflow_fully_established(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 4ab0693c069c..4a7fd0508ad2 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -51,6 +51,28 @@ struct mptcp_sched_ops *mptcp_sched_find(const char *name) return ret; } +/* Build string with list of available scheduler values. + * Similar to tcp_get_available_congestion_control() + */ +void mptcp_get_available_schedulers(char *buf, size_t maxlen) +{ + struct mptcp_sched_ops *sched; + size_t offs = 0; + + rcu_read_lock(); + spin_lock(&mptcp_sched_list_lock); + list_for_each_entry_rcu(sched, &mptcp_sched_list, list) { + offs += snprintf(buf + offs, maxlen - offs, + "%s%s", + offs == 0 ? "" : " ", sched->name); + + if (WARN_ON_ONCE(offs >= maxlen)) + break; + } + spin_unlock(&mptcp_sched_list_lock); + rcu_read_unlock(); +} + int mptcp_register_scheduler(struct mptcp_sched_ops *sched) { if (!sched->get_subflow) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 1fea43f5b6f3..f9a4fb17b5b7 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -181,8 +181,6 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, switch (optname) { case SO_KEEPALIVE: - mptcp_sol_socket_sync_intval(msk, optname, val); - return 0; case SO_DEBUG: case SO_MARK: case SO_PRIORITY: @@ -618,12 +616,37 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t } if (ret == 0) - strcpy(msk->ca_name, name); + strscpy(msk->ca_name, name, sizeof(msk->ca_name)); release_sock(sk); return ret; } +static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, + int (*set_val)(struct sock *, int), + int *msk_val, int val) +{ + struct mptcp_subflow_context *subflow; + int err = 0; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int ret; + + lock_sock(ssk); + ret = set_val(ssk, val); + err = err ? : ret; + release_sock(ssk); + } + + if (!err) { + *msk_val = val; + sockopt_seq_inc(msk); + } + + return err; +} + static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; @@ -820,6 +843,22 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_NODELAY: ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); break; + case TCP_KEEPIDLE: + ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, + &tcp_sock_set_keepidle_locked, + &msk->keepalive_idle, val); + break; + case TCP_KEEPINTVL: + ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, + &tcp_sock_set_keepintvl, + &msk->keepalive_intvl, val); + break; + case TCP_KEEPCNT: + ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, + &tcp_sock_set_keepcnt, + &msk->keepalive_cnt, + val); + break; default: ret = -ENOPROTOOPT; } @@ -960,6 +999,10 @@ static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, in if (get_user(len, optlen)) return -EFAULT; + /* When used only to check if a fallback to TCP happened. */ + if (len == 0) + return 0; + len = min_t(unsigned int, len, sizeof(struct mptcp_info)); mptcp_diag_fill_info(msk, &m_info); @@ -1328,6 +1371,8 @@ static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, char __user *optval, int __user *optlen) { + struct sock *sk = (void *)msk; + switch (optname) { case TCP_ULP: case TCP_CONGESTION: @@ -1346,8 +1391,22 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->cork); case TCP_NODELAY: return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); + case TCP_KEEPIDLE: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_idle ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); + case TCP_KEEPINTVL: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_intvl ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); + case TCP_KEEPCNT: + return mptcp_put_int_option(msk, optval, optlen, + msk->keepalive_cnt ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); case TCP_NOTSENT_LOWAT: return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); + case TCP_IS_MPTCP: + return mptcp_put_int_option(msk, optval, optlen, 1); } return -EOPNOTSUPP; } @@ -1463,6 +1522,9 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) tcp_set_congestion_control(ssk, msk->ca_name, false, true); __tcp_sock_set_cork(ssk, !!msk->cork); __tcp_sock_set_nodelay(ssk, !!msk->nodelay); + tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); + tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); + tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b94d1dca1094..612c38570a64 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -20,6 +20,7 @@ #include <net/transp_v6.h> #endif #include <net/mptcp.h> + #include "protocol.h" #include "mib.h" @@ -286,6 +287,16 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, } EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); +static enum sk_rst_reason mptcp_get_rst_reason(const struct sk_buff *skb) +{ + const struct mptcp_ext *mpext = mptcp_get_ext(skb); + + if (!mpext) + return SK_RST_REASON_NOT_SPECIFIED; + + return sk_rst_convert_mptcp_reason(mpext->reset_reason); +} + static struct dst_entry *subflow_v4_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, @@ -308,7 +319,8 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, dst_release(dst); if (!req->syncookie) - tcp_request_sock_ops.send_reset(sk, skb); + tcp_request_sock_ops.send_reset(sk, skb, + mptcp_get_rst_reason(skb)); return NULL; } @@ -376,7 +388,8 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk, dst_release(dst); if (!req->syncookie) - tcp6_request_sock_ops.send_reset(sk, skb); + tcp6_request_sock_ops.send_reset(sk, skb, + mptcp_get_rst_reason(skb)); return NULL; } #endif @@ -412,7 +425,7 @@ void mptcp_subflow_reset(struct sock *ssk) /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_send_active_reset(ssk, GFP_ATOMIC); + mptcp_send_active_reset_reason(ssk); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) mptcp_schedule_work(sk); @@ -781,6 +794,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_subflow_request_sock *subflow_req; struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; + enum sk_rst_reason reason; struct mptcp_sock *owner; struct sock *child; @@ -899,7 +913,7 @@ create_child: } /* check for expected invariant - should never trigger, just help - * catching eariler subtle bugs + * catching earlier subtle bugs */ WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && (!mptcp_subflow_ctx(child) || @@ -911,7 +925,8 @@ dispose_child: tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); - req->rsk_ops->send_reset(sk, skb); + reason = mptcp_get_rst_reason(skb); + req->rsk_ops->send_reset(sk, skb, reason); /* The last child reference will be released by the caller */ return child; @@ -1104,6 +1119,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk, } if (mpext->data_fin == 1) { + u64 data_fin_seq; + if (data_len == 1) { bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, mpext->dsn64); @@ -1116,26 +1133,26 @@ static enum mapping_status get_mapping_status(struct sock *ssk, */ skb_ext_del(skb, SKB_EXT_MPTCP); return MAPPING_OK; - } else { - if (updated) - mptcp_schedule_work((struct sock *)msk); - - return MAPPING_DATA_FIN; } - } else { - u64 data_fin_seq = mpext->data_seq + data_len - 1; - /* If mpext->data_seq is a 32-bit value, data_fin_seq - * must also be limited to 32 bits. - */ - if (!mpext->dsn64) - data_fin_seq &= GENMASK_ULL(31, 0); + if (updated) + mptcp_schedule_work((struct sock *)msk); - mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); - pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", - data_fin_seq, mpext->dsn64); + return MAPPING_DATA_FIN; } + data_fin_seq = mpext->data_seq + data_len - 1; + + /* If mpext->data_seq is a 32-bit value, data_fin_seq must also + * be limited to 32 bits. + */ + if (!mpext->dsn64) + data_fin_seq &= GENMASK_ULL(31, 0); + + mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); + pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", + data_fin_seq, mpext->dsn64); + /* Adjust for DATA_FIN using 1 byte of sequence space */ data_len--; } @@ -1246,7 +1263,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); unsigned long fail_tout; - /* greceful failure can happen only on the MPC subflow */ + /* graceful failure can happen only on the MPC subflow */ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) return; @@ -1348,7 +1365,7 @@ reset: tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - tcp_send_active_reset(ssk, GFP_ATOMIC); + mptcp_send_active_reset_reason(ssk); WRITE_ONCE(subflow->data_avail, false); return false; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 143a341bbc0a..b6d0dcf3a5c3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -94,6 +94,7 @@ static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; int availmem; + int amemthresh; int nomem; int to_change = -1; @@ -105,7 +106,8 @@ static void update_defense_level(struct netns_ipvs *ipvs) /* si_swapinfo(&i); */ /* availmem = availmem - (i.totalswap - i.freeswap); */ - nomem = (availmem < ipvs->sysctl_amemthresh); + amemthresh = max(READ_ONCE(ipvs->sysctl_amemthresh), 0); + nomem = (availmem < amemthresh); local_bh_disable(); @@ -145,9 +147,8 @@ static void update_defense_level(struct netns_ipvs *ipvs) break; case 1: if (nomem) { - ipvs->drop_rate = ipvs->drop_counter - = ipvs->sysctl_amemthresh / - (ipvs->sysctl_amemthresh-availmem); + ipvs->drop_counter = amemthresh / (amemthresh - availmem); + ipvs->drop_rate = ipvs->drop_counter; ipvs->sysctl_drop_packet = 2; } else { ipvs->drop_rate = 0; @@ -155,9 +156,8 @@ static void update_defense_level(struct netns_ipvs *ipvs) break; case 2: if (nomem) { - ipvs->drop_rate = ipvs->drop_counter - = ipvs->sysctl_amemthresh / - (ipvs->sysctl_amemthresh-availmem); + ipvs->drop_counter = amemthresh / (amemthresh - availmem); + ipvs->drop_rate = ipvs->drop_counter; } else { ipvs->drop_rate = 0; ipvs->sysctl_drop_packet = 1; @@ -2263,7 +2263,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec, }, #endif - { } }; #endif @@ -4270,6 +4269,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) struct ctl_table *tbl; int idx, ret; size_t ctl_table_size = ARRAY_SIZE(vs_vars); + bool unpriv = net->user_ns != &init_user_ns; atomic_set(&ipvs->dropentry, 0); spin_lock_init(&ipvs->dropentry_lock); @@ -4284,12 +4284,6 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); if (tbl == NULL) return -ENOMEM; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - tbl[0].procname = NULL; - ctl_table_size = 0; - } } else tbl = vs_vars; /* Initialize sysctl defaults */ @@ -4315,10 +4309,17 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) ipvs->sysctl_sync_ports = 1; tbl[idx++].data = &ipvs->sysctl_sync_ports; tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; + ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; + if (unpriv) + tbl[idx].mode = 0444; tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; + ipvs->sysctl_sync_sock_size = 0; + if (unpriv) + tbl[idx].mode = 0444; tbl[idx++].data = &ipvs->sysctl_sync_sock_size; + tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; @@ -4341,15 +4342,22 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; + ipvs->sysctl_run_estimation = 1; + if (unpriv) + tbl[idx].mode = 0444; tbl[idx].extra2 = ipvs; tbl[idx++].data = &ipvs->sysctl_run_estimation; ipvs->est_cpulist_valid = 0; + if (unpriv) + tbl[idx].mode = 0444; tbl[idx].extra2 = ipvs; tbl[idx++].data = &ipvs->sysctl_est_cpulist; ipvs->sysctl_est_nice = IPVS_EST_NICE; + if (unpriv) + tbl[idx].mode = 0444; tbl[idx].extra2 = ipvs; tbl[idx++].data = &ipvs->sysctl_est_nice; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 8ceec7a2fa8f..2423513d701d 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -123,7 +123,6 @@ static struct ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; #endif @@ -563,10 +562,8 @@ static int __net_init __ip_vs_lblc_init(struct net *net) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - ipvs->lblc_ctl_table[0].procname = NULL; + if (net->user_ns != &init_user_ns) vars_table_size = 0; - } } else ipvs->lblc_ctl_table = vs_vars_table; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0fb64707213f..cdb1d4bf6761 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -294,7 +294,6 @@ static struct ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { } }; #endif @@ -749,10 +748,8 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - ipvs->lblcr_ctl_table[0].procname = NULL; + if (net->user_ns != &init_user_ns) vars_table_size = 0; - } } else ipvs->lblcr_ctl_table = vs_vars_table; ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 39b5fd6bbf65..3313bceb6cc9 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -180,7 +180,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb, (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && (addr_type & IPV6_ADDR_LOOPBACK); old_rt_is_local = __ip_vs_is_local_route6( - (struct rt6_info *)skb_dst(skb)); + dst_rt6_info(skb_dst(skb))); } else #endif { @@ -318,7 +318,7 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) - rt = (struct rtable *) dest_dst->dst_cache; + rt = dst_rtable(dest_dst->dst_cache); else { dest_dst = ip_vs_dest_dst_alloc(); spin_lock_bh(&dest->dst_lock); @@ -481,7 +481,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (dest) { dest_dst = __ip_vs_dst_check(dest); if (likely(dest_dst)) - rt = (struct rt6_info *) dest_dst->dst_cache; + rt = dst_rt6_info(dest_dst->dst_cache); else { u32 cookie; @@ -501,7 +501,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, ip_vs_dest_dst_free(dest_dst); goto err_unreach; } - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); cookie = rt6_get_cookie(rt); __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); spin_unlock_bh(&dest->dst_lock); @@ -517,7 +517,7 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, rt_mode); if (!dst) goto err_unreach; - rt = (struct rt6_info *) dst; + rt = dst_rt6_info(dst); } local = __ip_vs_is_local_route6(rt); @@ -862,7 +862,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_RDR); if (local < 0) goto tx_error; - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed @@ -1288,7 +1288,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (local) return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); tdev = rt->dst.dev; /* @@ -1590,7 +1590,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; - rt = (struct rt6_info *) skb_dst(skb); + rt = dst_rt6_info(skb_dst(skb)); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c63868666bd9..7ac20750c127 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1440,8 +1440,6 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) const struct nf_conntrack_l4proto *l4proto; u8 protonum = nf_ct_protonum(ct); - if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP) - return false; if (!test_bit(IPS_ASSURED_BIT, &ct->status)) return true; @@ -2024,7 +2022,7 @@ repeat: goto repeat; NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) + if (ret == NF_DROP) NF_CT_STAT_INC_ATOMIC(state->net, drop); ret = -ret; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index e2db1f4ec2df..ebc4f733bb2e 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -525,7 +525,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh); if (!dh) - return NF_DROP; + return -NF_ACCEPT; if (dccp_error(dh, skb, dataoff, state)) return -NF_ACCEPT; @@ -533,7 +533,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, /* pull again, including possible 48 bit sequences and subtype header */ dh = dccp_header_pointer(skb, dataoff, dh, &_dh); if (!dh) - return NF_DROP; + return -NF_ACCEPT; type = dh->dccph_type; if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state)) diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 1020d67600a9..327b8059025d 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -62,7 +62,9 @@ static const u_int8_t noct_valid_new[] = { [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, - [ICMPV6_MLD2_REPORT - 130] = 1 + [ICMPV6_MLD2_REPORT - 130] = 1, + [ICMPV6_MRDISC_ADV - 130] = 1, + [ICMPV6_MRDISC_SOL - 130] = 1 }; bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index bb9dea676ec1..74112e9c5dab 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -616,11 +616,9 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_LWTUNNEL, #endif - __NF_SYSCTL_CT_LAST_SYSCTL, + NF_SYSCTL_CT_LAST_SYSCTL, }; -#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1) - static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_MAX] = { .procname = "nf_conntrack_max", @@ -957,7 +955,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = nf_hooks_lwtunnel_sysctl_handler, }, #endif - {} }; static struct ctl_table nf_ct_netfilter_table[] = { @@ -968,7 +965,6 @@ static struct ctl_table nf_ct_netfilter_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index a0571339239c..5c1ff07eaee0 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -77,12 +77,8 @@ EXPORT_SYMBOL_GPL(flow_offload_alloc); static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) { - const struct rt6_info *rt; - - if (flow_tuple->l3proto == NFPROTO_IPV6) { - rt = (const struct rt6_info *)flow_tuple->dst_cache; - return rt6_get_cookie(rt); - } + if (flow_tuple->l3proto == NFPROTO_IPV6) + return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); return 0; } diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 5383bed3d3e0..c2c005234dcd 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -434,7 +434,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { - rt = (struct rtable *)tuplehash->tuple.dst_cache; + rt = dst_rtable(tuplehash->tuple.dst_cache); memset(skb->cb, 0, sizeof(struct inet_skb_parm)); IPCB(skb)->iif = skb->dev->ifindex; IPCB(skb)->flags = IPSKB_FORWARDED; @@ -446,7 +446,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: - rt = (struct rtable *)tuplehash->tuple.dst_cache; + rt = dst_rtable(tuplehash->tuple.dst_cache); outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); @@ -729,7 +729,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) { - rt = (struct rt6_info *)tuplehash->tuple.dst_cache; + rt = dst_rt6_info(tuplehash->tuple.dst_cache); memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); IP6CB(skb)->iif = skb->dev->ifindex; IP6CB(skb)->flags = IP6SKB_FORWARDED; @@ -741,7 +741,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, switch (tuplehash->tuple.xmit_type) { case FLOW_OFFLOAD_XMIT_NEIGH: - rt = (struct rt6_info *)tuplehash->tuple.dst_cache; + rt = dst_rt6_info(tuplehash->tuple.dst_cache); outdev = rt->dst.dev; skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index efedd2f13ac7..769fd7680fac 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -395,7 +395,7 @@ static const struct seq_operations nflog_seq_ops = { #ifdef CONFIG_SYSCTL static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; -static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; +static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO]; static struct ctl_table_header *nf_log_sysctl_fhdr; static struct ctl_table nf_log_sysctl_ftable[] = { @@ -406,7 +406,6 @@ static struct ctl_table nf_log_sysctl_ftable[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; static int nf_log_proc_dostring(struct ctl_table *table, int write, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 167074283ea9..be3b4c90d2ed 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3333,7 +3333,7 @@ err_expr_parse: return ERR_PTR(err); } -int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) +int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp) { int err; @@ -3341,7 +3341,7 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) return -EINVAL; dst->ops = src->ops; - err = src->ops->clone(dst, src); + err = src->ops->clone(dst, src, gfp); if (err < 0) return err; @@ -6525,7 +6525,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, if (!expr) goto err_expr; - err = nft_expr_clone(expr, set->exprs[i]); + err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT); if (err < 0) { kfree(expr); goto err_expr; @@ -6564,7 +6564,7 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx, for (i = 0; i < num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); - err = nft_expr_clone(expr, expr_array[i]); + err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT); if (err < 0) goto err_elem_expr_setup; @@ -7776,6 +7776,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (WARN_ON_ONCE(!type)) return -ENOENT; + if (!obj->ops->update) + return 0; + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj); @@ -9467,9 +9470,10 @@ static void nft_obj_commit_update(struct nft_trans *trans) obj = nft_trans_obj(trans); newobj = nft_trans_obj_newobj(trans); - if (obj->ops->update) - obj->ops->update(obj, newobj); + if (WARN_ON_ONCE(!obj->ops->update)) + return; + obj->ops->update(obj, newobj); nft_obj_destroy(&trans->ctx, newobj); } diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index d170758a1eb5..7010541fcca6 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -325,9 +325,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, struct nft_hook *hook, *found = NULL; int n = 0; - if (event != NETDEV_UNREGISTER) - return; - list_for_each_entry(hook, &basechain->hook_list, list) { if (hook->ops.dev == dev) found = hook; @@ -367,8 +364,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, .net = dev_net(dev), }; - if (event != NETDEV_UNREGISTER && - event != NETDEV_CHANGENAME) + if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; nft_net = nft_pernet(ctx.net); diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index de9d1980df69..92b984fa8175 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -210,12 +210,12 @@ static void nft_connlimit_destroy(const struct nft_ctx *ctx, nft_connlimit_do_destroy(ctx, priv); } -static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_connlimit *priv_dst = nft_expr_priv(dst); struct nft_connlimit *priv_src = nft_expr_priv(src); - priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC); + priv_dst->list = kmalloc(sizeof(*priv_dst->list), gfp); if (!priv_dst->list) return -ENOMEM; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index dccc68a5135a..291ed2026367 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -226,7 +226,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx, nft_counter_do_destroy(priv); } -static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_counter_percpu_priv *priv = nft_expr_priv(src); struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); @@ -236,7 +236,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) nft_counter_fetch(priv, &total); - cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC); + cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp); if (cpu_stats == NULL) return -ENOMEM; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index c09dba57354c..b4ada3ab2167 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -35,7 +35,7 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv, for (i = 0; i < priv->num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); - if (nft_expr_clone(expr, priv->expr_array[i]) < 0) + if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0) return -1; elem_expr->size += priv->expr_array[i]->ops->size; diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 8e6d7eaf9dc8..de1b6066bfa8 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -102,12 +102,12 @@ static void nft_last_destroy(const struct nft_ctx *ctx, kfree(priv->last); } -static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_last_priv *priv_dst = nft_expr_priv(dst); struct nft_last_priv *priv_src = nft_expr_priv(src); - priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); + priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp); if (!priv_dst->last) return -ENOMEM; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index cefa25e0dbb0..21d26b79b460 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -150,7 +150,7 @@ static void nft_limit_destroy(const struct nft_ctx *ctx, } static int nft_limit_clone(struct nft_limit_priv *priv_dst, - const struct nft_limit_priv *priv_src) + const struct nft_limit_priv *priv_src, gfp_t gfp) { priv_dst->tokens_max = priv_src->tokens_max; priv_dst->rate = priv_src->rate; @@ -158,7 +158,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst, priv_dst->burst = priv_src->burst; priv_dst->invert = priv_src->invert; - priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC); + priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp); if (!priv_dst->limit) return -ENOMEM; @@ -223,14 +223,15 @@ static void nft_limit_pkts_destroy(const struct nft_ctx *ctx, nft_limit_destroy(ctx, &priv->limit); } -static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src, + gfp_t gfp) { struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src); priv_dst->cost = priv_src->cost; - return nft_limit_clone(&priv_dst->limit, &priv_src->limit); + return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp); } static struct nft_expr_type nft_limit_type; @@ -281,12 +282,13 @@ static void nft_limit_bytes_destroy(const struct nft_ctx *ctx, nft_limit_destroy(ctx, priv); } -static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src, + gfp_t gfp) { struct nft_limit_priv *priv_dst = nft_expr_priv(dst); struct nft_limit_priv *priv_src = nft_expr_priv(src); - return nft_limit_clone(priv_dst, priv_src); + return nft_limit_clone(priv_dst, priv_src, gfp); } static const struct nft_expr_ops nft_limit_bytes_ops = { diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 3ba12a7471b0..9b2d7463d3d3 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -233,7 +233,7 @@ static void nft_quota_destroy(const struct nft_ctx *ctx, return nft_quota_do_destroy(ctx, priv); } -static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_quota *priv_dst = nft_expr_priv(dst); struct nft_quota *priv_src = nft_expr_priv(src); @@ -241,7 +241,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) priv_dst->quota = priv_src->quota; priv_dst->flags = priv_src->flags; - priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); + priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), gfp); if (!priv_dst->consumed) return -ENOMEM; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 24d977138572..14d88394bcb7 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -73,14 +73,14 @@ void nft_rt_get_eval(const struct nft_expr *expr, if (nft_pf(pkt) != NFPROTO_IPV4) goto err; - *dest = (__force u32)rt_nexthop((const struct rtable *)dst, + *dest = (__force u32)rt_nexthop(dst_rtable(dst), ip_hdr(skb)->daddr); break; case NFT_RT_NEXTHOP6: if (nft_pf(pkt) != NFPROTO_IPV6) goto err; - memcpy(dest, rt6_nexthop((struct rt6_info *)dst, + memcpy(dest, rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr), sizeof(struct in6_addr)); break; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 187138afac45..15a236bebb46 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -504,6 +504,7 @@ out: * pipapo_get() - Get matching element reference given key data * @net: Network namespace * @set: nftables API set representation + * @m: storage containing active/existing elements * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask * @tstamp: timestamp to check for expired elements @@ -517,17 +518,15 @@ out: */ static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_pipapo_match *m, const u8 *data, u8 genmask, u64 tstamp, gfp_t gfp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); - struct nft_pipapo *priv = nft_set_priv(set); unsigned long *res_map, *fill_map = NULL; - const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; int i; - m = priv->clone; if (m->bsize_max == 0) return ret; @@ -612,9 +611,11 @@ static struct nft_elem_priv * nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *m = rcu_dereference(priv->match); struct nft_pipapo_elem *e; - e = pipapo_get(net, set, (const u8 *)elem->key.val.data, + e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data, nft_genmask_cur(net), get_jiffies_64(), GFP_ATOMIC); if (IS_ERR(e)) @@ -1247,6 +1248,40 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return 0; } +static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set) +{ +#ifdef CONFIG_PROVE_LOCKING + const struct net *net = read_pnet(&set->net); + + return lockdep_is_held(&nft_pernet(net)->commit_mutex); +#else + return true; +#endif +} + +static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old); + +/** + * pipapo_maybe_clone() - Build clone for pending data changes, if not existing + * @set: nftables API set representation + * + * Return: newly created or existing clone, if any. NULL on allocation failure + */ +static struct nft_pipapo_match *pipapo_maybe_clone(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *m; + + if (priv->clone) + return priv->clone; + + m = rcu_dereference_protected(priv->match, + nft_pipapo_transaction_mutex_held(set)); + priv->clone = pipapo_clone(m); + + return priv->clone; +} + /** * nft_pipapo_insert() - Validate and insert ranged elements * @net: Network namespace @@ -1263,8 +1298,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *start = (const u8 *)elem->key.val.data, *end; - struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *m = priv->clone; + struct nft_pipapo_match *m = pipapo_maybe_clone(set); u8 genmask = nft_genmask_next(net); struct nft_pipapo_elem *e, *dup; u64 tstamp = nft_net_tstamp(net); @@ -1272,12 +1306,15 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const u8 *start_p, *end_p; int i, bsize_max, err = 0; + if (!m) + return -ENOMEM; + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) end = (const u8 *)nft_set_ext_key_end(ext)->data; else end = start; - dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL); + dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL); if (!IS_ERR(dup)) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1299,7 +1336,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) == -ENOENT) { /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp, + dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp, GFP_KERNEL); } @@ -1332,8 +1369,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, } /* Insert */ - priv->dirty = true; - bsize_max = m->bsize_max; nft_pipapo_for_each_field(f, i, m) { @@ -1384,7 +1419,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, * pipapo_clone() - Clone matching data to create new working copy * @old: Existing matching data * - * Return: copy of matching data passed as 'old', error pointer on failure + * Return: copy of matching data passed as 'old' or NULL. */ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) { @@ -1394,7 +1429,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL); if (!new) - return ERR_PTR(-ENOMEM); + return NULL; new->field_count = old->field_count; new->bsize_max = old->bsize_max; @@ -1466,7 +1501,7 @@ out_scratch: free_percpu(new->scratch); kfree(new); - return ERR_PTR(-ENOMEM); + return NULL; } /** @@ -1698,8 +1733,6 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) * NFT_SET_ELEM_DEAD_BIT. */ if (__nft_set_elem_expired(&e->ext, tstamp)) { - priv->dirty = true; - gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) return; @@ -1777,57 +1810,30 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) static void nft_pipapo_commit(struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *new_clone, *old; - - if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) - pipapo_gc(set, priv->clone); + struct nft_pipapo_match *old; - if (!priv->dirty) + if (!priv->clone) return; - new_clone = pipapo_clone(priv->clone); - if (IS_ERR(new_clone)) - return; + if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) + pipapo_gc(set, priv->clone); - priv->dirty = false; + old = rcu_replace_pointer(priv->match, priv->clone, + nft_pipapo_transaction_mutex_held(set)); + priv->clone = NULL; - old = rcu_access_pointer(priv->match); - rcu_assign_pointer(priv->match, priv->clone); if (old) call_rcu(&old->rcu, pipapo_reclaim_match); - - priv->clone = new_clone; -} - -static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set) -{ -#ifdef CONFIG_PROVE_LOCKING - const struct net *net = read_pnet(&set->net); - - return lockdep_is_held(&nft_pernet(net)->commit_mutex); -#else - return true; -#endif } static void nft_pipapo_abort(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *new_clone, *m; - - if (!priv->dirty) - return; - - m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set)); - new_clone = pipapo_clone(m); - if (IS_ERR(new_clone)) + if (!priv->clone) return; - - priv->dirty = false; - pipapo_free_match(priv->clone); - priv->clone = new_clone; + priv->clone = NULL; } /** @@ -1851,52 +1857,38 @@ static void nft_pipapo_activate(const struct net *net, } /** - * pipapo_deactivate() - Check that element is in set, mark as inactive + * nft_pipapo_deactivate() - Search for element and make it inactive * @net: Network namespace * @set: nftables API set representation - * @data: Input key data - * @ext: nftables API extension pointer, used to check for end element - * - * This is a convenience function that can be called from both - * nft_pipapo_deactivate() and nft_pipapo_flush(), as they are in fact the same - * operation. + * @elem: nftables API element representation containing key data * * Return: deactivated element if found, NULL otherwise. */ -static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, - const u8 *data, const struct nft_set_ext *ext) +static struct nft_elem_priv * +nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_pipapo_match *m = pipapo_maybe_clone(set); struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net), - nft_net_tstamp(net), GFP_KERNEL); + /* removal must occur on priv->clone, if we are low on memory + * we have no choice and must fail the removal request. + */ + if (!m) + return NULL; + + e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data, + nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL); if (IS_ERR(e)) return NULL; nft_set_elem_change_active(net, set, &e->ext); - return e; -} - -/** - * nft_pipapo_deactivate() - Call pipapo_deactivate() to make element inactive - * @net: Network namespace - * @set: nftables API set representation - * @elem: nftables API element representation containing key data - * - * Return: deactivated element if found, NULL otherwise. - */ -static struct nft_elem_priv * -nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); - - return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext); + return &e->priv; } /** - * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive + * nft_pipapo_flush() - make element inactive * @net: Network namespace * @set: nftables API set representation * @elem_priv: nftables API element representation containing key data @@ -2093,7 +2085,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); if (last && f->mt[rulemap[i].to].e == e) { - priv->dirty = true; pipapo_drop(m, rulemap); return; } @@ -2106,35 +2097,23 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, } /** - * nft_pipapo_walk() - Walk over elements + * nft_pipapo_do_walk() - Walk over elements in m * @ctx: nftables API context * @set: nftables API set representation + * @m: matching data pointing to key mapping array * @iter: Iterator * * As elements are referenced in the mapping array for the last field, directly * scan that array: there's no need to follow rule mappings from the first - * field. + * field. @m is protected either by RCU read lock or by transaction mutex. */ -static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_iter *iter) +static void nft_pipapo_do_walk(const struct nft_ctx *ctx, struct nft_set *set, + const struct nft_pipapo_match *m, + struct nft_set_iter *iter) { - struct nft_pipapo *priv = nft_set_priv(set); - const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; unsigned int i, r; - WARN_ON_ONCE(iter->type != NFT_ITER_READ && - iter->type != NFT_ITER_UPDATE); - - rcu_read_lock(); - if (iter->type == NFT_ITER_READ) - m = rcu_dereference(priv->match); - else - m = priv->clone; - - if (unlikely(!m)) - goto out; - for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) ; @@ -2151,14 +2130,49 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) - goto out; + return; cont: iter->count++; } +} -out: - rcu_read_unlock(); +/** + * nft_pipapo_walk() - Walk over elements + * @ctx: nftables API context + * @set: nftables API set representation + * @iter: Iterator + * + * Test if destructive action is needed or not, clone active backend if needed + * and call the real function to work on the data. + */ +static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_pipapo *priv = nft_set_priv(set); + const struct nft_pipapo_match *m; + + switch (iter->type) { + case NFT_ITER_UPDATE: + m = pipapo_maybe_clone(set); + if (!m) { + iter->err = -ENOMEM; + return; + } + + nft_pipapo_do_walk(ctx, set, m, iter); + break; + case NFT_ITER_READ: + rcu_read_lock(); + m = rcu_dereference(priv->match); + nft_pipapo_do_walk(ctx, set, m, iter); + rcu_read_unlock(); + break; + default: + iter->err = -EINVAL; + WARN_ON_ONCE(1); + break; + } } /** @@ -2267,21 +2281,10 @@ static int nft_pipapo_init(const struct nft_set *set, f->mt = NULL; } - /* Create an initial clone of matching data for next insertion */ - priv->clone = pipapo_clone(m); - if (IS_ERR(priv->clone)) { - err = PTR_ERR(priv->clone); - goto out_free; - } - - priv->dirty = false; - rcu_assign_pointer(priv->match, m); return 0; -out_free: - free_percpu(m->scratch); out_scratch: kfree(m); @@ -2326,33 +2329,18 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; - int cpu; m = rcu_dereference_protected(priv->match, true); - if (m) { - rcu_barrier(); - - for_each_possible_cpu(cpu) - pipapo_free_scratch(m, cpu); - free_percpu(m->scratch); - pipapo_free_fields(m); - kfree(m); - priv->match = NULL; - } if (priv->clone) { - m = priv->clone; - - nft_set_pipapo_match_destroy(ctx, set, m); - - for_each_possible_cpu(cpu) - pipapo_free_scratch(priv->clone, cpu); - free_percpu(priv->clone->scratch); - - pipapo_free_fields(priv->clone); - kfree(priv->clone); + nft_set_pipapo_match_destroy(ctx, set, priv->clone); + pipapo_free_match(priv->clone); priv->clone = NULL; + } else { + nft_set_pipapo_match_destroy(ctx, set, m); } + + pipapo_free_match(m); } /** diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 24cd1ff73f98..0d2e40e10f7f 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -155,14 +155,12 @@ struct nft_pipapo_match { * @match: Currently in-use matching data * @clone: Copy where pending insertions and deletions are kept * @width: Total bytes to be matched for one packet, including padding - * @dirty: Working copy has pending insertions or deletions * @last_gc: Timestamp of last garbage collection run, jiffies */ struct nft_pipapo { struct nft_pipapo_match __rcu *match; struct nft_pipapo_match *clone; int width; - bool dirty; unsigned long last_gc; }; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 1ba4f58e1d35..cd9160bbc919 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -965,6 +965,7 @@ int netlbl_enabled(void) * @sk: the socket to label * @family: protocol family * @secattr: the security attributes + * @sk_locked: true if caller holds the socket lock * * Description: * Attach the correct label to the given socket using the security attributes @@ -977,7 +978,8 @@ int netlbl_enabled(void) */ int netlbl_sock_setattr(struct sock *sk, u16 family, - const struct netlbl_lsm_secattr *secattr) + const struct netlbl_lsm_secattr *secattr, + bool sk_locked) { int ret_val; struct netlbl_dom_map *dom_entry; @@ -997,7 +999,7 @@ int netlbl_sock_setattr(struct sock *sk, case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, dom_entry->def.cipso, - secattr); + secattr, sk_locked); break; case NETLBL_NLTYPE_UNLABELED: ret_val = 0; @@ -1091,6 +1093,28 @@ int netlbl_sock_getattr(struct sock *sk, } /** + * netlbl_sk_lock_check - Check if the socket lock has been acquired. + * @sk: the socket to be checked + * + * Return: true if socket @sk is locked or if lock debugging is disabled at + * runtime or compile-time; false otherwise + * + */ +#ifdef CONFIG_LOCKDEP +bool netlbl_sk_lock_check(struct sock *sk) +{ + if (debug_locks) + return lockdep_sock_is_held(sk); + return true; +} +#else +bool netlbl_sk_lock_check(struct sock *sk) +{ + return true; +} +#endif + +/** * netlbl_conn_setattr - Label a connected socket using the correct protocol * @sk: the socket to label * @addr: the destination address @@ -1126,7 +1150,8 @@ int netlbl_conn_setattr(struct sock *sk, switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - entry->cipso, secattr); + entry->cipso, secattr, + netlbl_sk_lock_check(sk)); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 104a80b75477..6ee148f0e6d0 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -772,8 +772,8 @@ out_release: return err; } -static int nr_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int nr_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sk_buff *skb; struct sock *newsk; @@ -805,7 +805,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags, if (skb) break; - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { err = -EWOULDBLOCK; break; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 70480869ad1c..bd2b17b219ae 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -285,22 +285,14 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, return 0; } -static inline void __nr_remove_node(struct nr_node *nr_node) +static void nr_remove_node_locked(struct nr_node *nr_node) { + lockdep_assert_held(&nr_node_list_lock); + hlist_del_init(&nr_node->node_node); nr_node_put(nr_node); } -#define nr_remove_node_locked(__node) \ - __nr_remove_node(__node) - -static void nr_remove_node(struct nr_node *nr_node) -{ - spin_lock_bh(&nr_node_list_lock); - __nr_remove_node(nr_node); - spin_unlock_bh(&nr_node_list_lock); -} - static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh) { hlist_del_init(&nr_neigh->neigh_node); @@ -339,6 +331,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n return -EINVAL; } + spin_lock_bh(&nr_node_list_lock); nr_node_lock(nr_node); for (i = 0; i < nr_node->count; i++) { if (nr_node->routes[i].neighbour == nr_neigh) { @@ -352,7 +345,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n nr_node->count--; if (nr_node->count == 0) { - nr_remove_node(nr_node); + nr_remove_node_locked(nr_node); } else { switch (i) { case 0: @@ -367,12 +360,14 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n nr_node_put(nr_node); } nr_node_unlock(nr_node); + spin_unlock_bh(&nr_node_list_lock); return 0; } } nr_neigh_put(nr_neigh); nr_node_unlock(nr_node); + spin_unlock_bh(&nr_node_list_lock); nr_node_put(nr_node); return -EINVAL; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 79fb2d3f477b..7dc0fa628f2e 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -140,7 +140,6 @@ static struct ctl_table nr_table[] = { .extra1 = &min_reset, .extra2 = &max_reset }, - { } }; int __init nr_register_sysctl(void) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index d5344563e525..57a2f97004e1 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -447,7 +447,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } static int llcp_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *new_sk; @@ -463,7 +463,7 @@ static int llcp_sock_accept(struct socket *sock, struct socket *newsock, goto error; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); /* Wait for an incoming connection. */ add_wait_queue_exclusive(sk_sleep(sk), &wait); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 0d26c8ec9993..f456a5911e7d 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1463,6 +1463,19 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, ndev->ops->n_core_ops); } +static bool nci_valid_size(struct sk_buff *skb) +{ + BUILD_BUG_ON(NCI_CTRL_HDR_SIZE != NCI_DATA_HDR_SIZE); + unsigned int hdr_size = NCI_CTRL_HDR_SIZE; + + if (skb->len < hdr_size || + !nci_plen(skb->data) || + skb->len < hdr_size + nci_plen(skb->data)) { + return false; + } + return true; +} + /* ---- NCI TX Data worker thread ---- */ static void nci_tx_work(struct work_struct *work) @@ -1516,9 +1529,9 @@ static void nci_rx_work(struct work_struct *work) nfc_send_to_raw_sock(ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); - if (!nci_plen(skb->data)) { + if (!nci_valid_size(skb)) { kfree_skb(skb); - break; + continue; } /* Process frame */ diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index f4a38bd6a7e0..bfb7758063f3 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -77,13 +77,15 @@ EXPORT_SYMBOL_GPL(nsh_pop); static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { + unsigned int outer_hlen, mac_len, nsh_len; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; - unsigned int nsh_len, mac_len; - __be16 proto; + __be16 outer_proto, proto; skb_reset_network_header(skb); + outer_proto = skb->protocol; + outer_hlen = skb_mac_header_len(skb); mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) @@ -113,10 +115,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, } for (skb = segs; skb; skb = skb->next) { - skb->protocol = htons(ETH_P_NSH); - __skb_push(skb, nsh_len); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; + skb->protocol = outer_proto; + __skb_push(skb, nsh_len + outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, outer_hlen); skb->mac_len = mac_len; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 6fcd7e2ca81f..964225580824 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -936,6 +936,12 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, pskb_trim(skb, ovs_mac_header_len(key)); } + /* Need to set the pkt_type to involve the routing layer. The + * packet movement through the OVS datapath doesn't generally + * use routing, but this is needed for tunnel cases. + */ + skb->pkt_type = PACKET_OUTGOING; + if (likely(!mru || (skb->len <= mru + vport->dev->hard_header_len))) { ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 33b21a0c0548..8a848ce72e29 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -561,7 +561,6 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, */ key->tp.src = htons(icmp->icmp6_type); key->tp.dst = htons(icmp->icmp6_code); - memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -570,6 +569,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, struct nd_msg *nd; int offset; + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); + /* In order to process neighbor discovery options, we need the * entire packet. */ diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h index 3eb35d9eb700..74d75aaebef4 100644 --- a/net/openvswitch/openvswitch_trace.h +++ b/net/openvswitch/openvswitch_trace.h @@ -43,8 +43,8 @@ TRACE_EVENT(ovs_do_execute_action, TP_fast_assign( __entry->dpaddr = dp; - __assign_str(dp_name, ovs_dp_name(dp)); - __assign_str(dev_name, skb->dev->name); + __assign_str(dp_name); + __assign_str(dev_name); __entry->skbaddr = skb; __entry->len = skb->len; __entry->data_len = skb->data_len; @@ -113,8 +113,8 @@ TRACE_EVENT(ovs_dp_upcall, TP_fast_assign( __entry->dpaddr = dp; - __assign_str(dp_name, ovs_dp_name(dp)); - __assign_str(dev_name, skb->dev->name); + __assign_str(dp_name); + __assign_str(dev_name); __entry->skbaddr = skb; __entry->len = skb->len; __entry->data_len = skb->data_len; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8c6d3fbb4ed8..ea3ebc160e25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2522,8 +2522,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); - if (!packet_read_pending(&po->tx_ring)) - complete(&po->skb_completion); + complete(&po->skb_completion); } sock_wfree(skb); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 3dd5f52bc1b5..53a858478e22 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -759,8 +759,8 @@ static void pep_sock_close(struct sock *sk, long timeout) sock_put(sk); } -static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, - bool kern) +static struct sock *pep_sock_accept(struct sock *sk, + struct proto_accept_arg *arg) { struct pep_sock *pn = pep_sk(sk), *newpn; struct sock *newsk = NULL; @@ -772,8 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, u8 pipe_handle, enabled, n_sb; u8 aligned = 0; - skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, - errp); + skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &arg->err); if (!skb) return NULL; @@ -836,7 +836,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, /* Create a new to-be-accepted sock */ newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, - kern); + arg->kern); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; @@ -878,7 +878,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, drop: release_sock(sk); kfree_skb(skb); - *errp = err; + arg->err = err; return newsk; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 59aebe296890..7008d402499d 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -178,7 +178,7 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, rtm->rtm_type = RTN_UNICAST; rtm->rtm_flags = 0; if (nla_put_u8(skb, RTA_DST, dst) || - nla_put_u32(skb, RTA_OIF, dev->ifindex)) + nla_put_u32(skb, RTA_OIF, READ_ONCE(dev->ifindex))) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(1) + nla_total_size(4), GFP_KERNEL); if (skb == NULL) goto errout; @@ -263,6 +263,7 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + int err = 0; u8 addr; rcu_read_lock(); @@ -272,16 +273,16 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (!dev) continue; - if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE) < 0) - goto out; + err = fill_route(skb, dev, addr << 2, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE); + if (err < 0) + break; } - -out: rcu_read_unlock(); cb->args[0] = addr; - return skb->len; + return err; } int __init phonet_netlink_register(void) @@ -301,6 +302,6 @@ int __init phonet_netlink_register(void) rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0); rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE, - NULL, route_dumpit, 0); + NULL, route_dumpit, RTNL_FLAG_DUMP_UNLOCKED); return 0; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 1018340d89a7..5ce0b3ee5def 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -292,18 +292,17 @@ out: } static int pn_socket_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sock *newsk; - int err; if (unlikely(sk->sk_state != TCP_LISTEN)) return -EINVAL; - newsk = sk->sk_prot->accept(sk, flags, &err, kern); + newsk = sk->sk_prot->accept(sk, arg); if (!newsk) - return err; + return arg->err; lock_sock(newsk); sock_graft(newsk, newsock); diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 0d0bf41381c2..82fc22467a09 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -81,7 +81,6 @@ static struct ctl_table phonet_table[] = { .mode = 0644, .proc_handler = proc_local_port_range, }, - { } }; int __init phonet_sysctl_init(void) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index abb0c70ffc8b..654a3cc0d347 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -725,6 +725,24 @@ int qrtr_ns_init(void) if (ret < 0) goto err_wq; + /* As the qrtr ns socket owner and creator is the same module, we have + * to decrease the qrtr module reference count to guarantee that it + * remains zero after the ns socket is created, otherwise, executing + * "rmmod" command is unable to make the qrtr module deleted after the + * qrtr module is inserted successfully. + * + * However, the reference count is increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of qrtr socket's proto_ops struct; another is to increment the + * reference count of owner of qrtr proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(qrtr_ns.sock->ops->owner); + module_put(qrtr_ns.sock->sk->sk_prot_creator->owner); + return 0; err_wq: @@ -739,6 +757,15 @@ void qrtr_ns_remove(void) { cancel_work_sync(&qrtr_ns.work); destroy_workqueue(qrtr_ns.workqueue); + + /* sock_release() expects the two references that were put during + * qrtr_ns_init(). This function is only called during module remove, + * so try_stop_module() has already set the refcnt to 0. Use + * __module_get() instead of try_module_get() to successfully take two + * references. + */ + __module_get(qrtr_ns.sock->ops->owner); + __module_get(qrtr_ns.sock->sk->sk_prot_creator->owner); sock_release(qrtr_ns.sock); } EXPORT_SYMBOL_GPL(qrtr_ns_remove); diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index e4e41b3afce7..2af678e71e3c 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -103,7 +103,6 @@ static struct ctl_table rds_ib_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; void rds_ib_sysctl_exit(void) diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index e381bbcd9cc1..025f518a4349 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -89,7 +89,6 @@ static struct ctl_table rds_sysctl_rds_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } }; void rds_sysctl_exit(void) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2dba7505b414..d8111ac83bb6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -86,7 +86,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = { .proc_handler = rds_tcp_skbuf_handler, .extra1 = &rds_tcp_min_rcvbuf, }, - { } }; u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 05008ce5c421..d89bd8d0c354 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -105,6 +105,10 @@ int rds_tcp_accept_one(struct socket *sock) int conn_state; struct rds_conn_path *cp; struct in6_addr *my_addr, *peer_addr; + struct proto_accept_arg arg = { + .flags = O_NONBLOCK, + .kern = true, + }; #if !IS_ENABLED(CONFIG_IPV6) struct in6_addr saddr, daddr; #endif @@ -119,7 +123,7 @@ int rds_tcp_accept_one(struct socket *sock) if (ret) goto out; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); + ret = sock->ops->accept(sock, new_sock, &arg); if (ret < 0) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ef81d019b20f..59050caab65c 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -919,8 +919,8 @@ out_release: return err; } -static int rose_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int rose_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sk_buff *skb; struct sock *newsk; @@ -953,7 +953,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags, if (skb) break; - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { err = -EWOULDBLOCK; break; } diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index d391d7758f52..d801315b7083 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -112,7 +112,6 @@ static struct ctl_table rose_table[] = { .extra1 = &min_window, .extra2 = &max_window }, - { } }; void __init rose_register_sysctl(void) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 08c0a32db8c7..08de24658f4f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -697,7 +697,7 @@ struct rxrpc_call { * packets) rather than bytes. */ #define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN -#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4) +#define RXRPC_MIN_CWND 4 u8 cong_cwnd; /* Congestion window size */ u8 cong_extra; /* Extra to send for congestion management */ u8 cong_ssthresh; /* Slow-start threshold */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 01fa71e8b1f7..f9e983a12c14 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -174,12 +174,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; - if (RXRPC_TX_SMSS > 2190) - call->cong_cwnd = 2; - else if (RXRPC_TX_SMSS > 1095) - call->cong_cwnd = 3; - else - call->cong_cwnd = 4; + call->cong_cwnd = RXRPC_MIN_CWND; call->cong_ssthresh = RXRPC_TX_MAX_WINDOW; call->rxnet = rxnet; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 0af4642aeec4..1539d315afe7 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -119,18 +119,13 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo switch (srx->transport.family) { case AF_INET: if (peer->srx.transport.sin.sin_port != - srx->transport.sin.sin_port || - peer->srx.transport.sin.sin_addr.s_addr != - srx->transport.sin.sin_addr.s_addr) + srx->transport.sin.sin_port) goto not_found; break; #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (peer->srx.transport.sin6.sin6_port != - srx->transport.sin6.sin6_port || - memcmp(&peer->srx.transport.sin6.sin6_addr, - &srx->transport.sin6.sin6_addr, - sizeof(struct in6_addr)) != 0) + srx->transport.sin6.sin6_port) goto not_found; break; #endif diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3dedb8c0618c..16d49a861dbb 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -9,6 +9,17 @@ #include "ar-internal.h" +/* Override priority when generating ACKs for received DATA */ +static const u8 rxrpc_ack_priority[RXRPC_ACK__INVALID] = { + [RXRPC_ACK_IDLE] = 1, + [RXRPC_ACK_DELAY] = 2, + [RXRPC_ACK_REQUESTED] = 3, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_EXCEEDS_WINDOW] = 5, + [RXRPC_ACK_NOSPACE] = 6, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 7, +}; + static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, enum rxrpc_abort_reason why) { @@ -365,7 +376,7 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, * Process a DATA packet. */ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, - bool *_notify) + bool *_notify, rxrpc_serial_t *_ack_serial, int *_ack_reason) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct sk_buff *oos; @@ -418,8 +429,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, /* Send an immediate ACK if we fill in a hole */ else if (!skb_queue_empty(&call->rx_oos_queue)) ack_reason = RXRPC_ACK_DELAY; - else - call->ackr_nr_unacked++; window++; if (after(window, wtop)) { @@ -497,12 +506,16 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, } send_ack: - if (ack_reason >= 0) - rxrpc_send_ACK(call, ack_reason, serial, - rxrpc_propose_ack_input_data); - else - rxrpc_propose_delay_ACK(call, serial, - rxrpc_propose_ack_input_data); + if (ack_reason >= 0) { + if (rxrpc_ack_priority[ack_reason] > rxrpc_ack_priority[*_ack_reason]) { + *_ack_serial = serial; + *_ack_reason = ack_reason; + } else if (rxrpc_ack_priority[ack_reason] == rxrpc_ack_priority[*_ack_reason] && + ack_reason == RXRPC_ACK_REQUESTED) { + *_ack_serial = serial; + *_ack_reason = ack_reason; + } + } } /* @@ -513,9 +526,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb struct rxrpc_jumbo_header jhdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp; struct sk_buff *jskb; + rxrpc_serial_t ack_serial = 0; unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len - offset; bool notify = false; + int ack_reason = 0; while (sp->hdr.flags & RXRPC_JUMBO_PACKET) { if (len < RXRPC_JUMBO_SUBPKTLEN) @@ -535,7 +550,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb jsp = rxrpc_skb(jskb); jsp->offset = offset; jsp->len = RXRPC_JUMBO_DATALEN; - rxrpc_input_data_one(call, jskb, ¬ify); + rxrpc_input_data_one(call, jskb, ¬ify, &ack_serial, &ack_reason); rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket); sp->hdr.flags = jhdr.flags; @@ -548,7 +563,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb sp->offset = offset; sp->len = len; - rxrpc_input_data_one(call, skb, ¬ify); + rxrpc_input_data_one(call, skb, ¬ify, &ack_serial, &ack_reason); + + if (ack_reason > 0) { + rxrpc_send_ACK(call, ack_reason, ack_serial, + rxrpc_propose_ack_input_data); + } else { + call->ackr_nr_unacked++; + rxrpc_propose_delay_ACK(call, sp->hdr.serial, + rxrpc_propose_ack_input_data); + } if (notify) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); @@ -685,9 +709,6 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb call->tx_winsize = rwind; } - if (call->cong_ssthresh > rwind) - call->cong_ssthresh = rwind; - mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); peer = call->peer; diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index f2701068ed9e..6716c021a532 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -19,7 +19,7 @@ static int none_init_connection_security(struct rxrpc_connection *conn, */ static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { - return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 0, gfp); + return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 1, gfp); } static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index f1a68270862d..48a1475e6b06 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -155,7 +155,7 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem switch (call->conn->security_level) { default: space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); - return rxrpc_alloc_data_txbuf(call, space, 0, gfp); + return rxrpc_alloc_data_txbuf(call, space, 1, gfp); case RXRPC_SECURITY_AUTH: shdr = sizeof(struct rxkad_level1_hdr); break; diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index c9bedd0e2d86..9bf9a1f6e4cb 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -127,7 +127,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&four, }, - { } }; int __init rxrpc_sysctl_init(void) diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index e0679658d9de..c3913d8a50d3 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -21,20 +21,20 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ { struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; - size_t total, hoff = 0; + size_t total, hoff; void *buf; txb = kmalloc(sizeof(*txb), gfp); if (!txb) return NULL; - if (data_align) - hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); + hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); total = hoff + sizeof(*whdr) + data_size; + data_align = umax(data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); - buf = __page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, - ~(data_align - 1) & ~(L1_CACHE_BYTES - 1)); + buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, + data_align); mutex_unlock(&call->conn->tx_data_alloc_lock); if (!buf) { kfree(txb); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 60239378d43f..74afc210527d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, * before again attaching a qdisc. */ if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { - dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN); netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); } @@ -1389,6 +1389,7 @@ err_out4: ops->destroy(sch); qdisc_put_stab(rtnl_dereference(sch->stab)); err_out3: + lockdep_unregister_key(&sch->root_lock_key); netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4a2c763e2d11..2a637a17061b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -506,19 +506,22 @@ static void dev_watchdog(struct timer_list *t) unsigned int timedout_ms = 0; unsigned int i; unsigned long trans_start; + unsigned long oldest_start = jiffies; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq; txq = netdev_get_tx_queue(dev, i); trans_start = READ_ONCE(txq->trans_start); - if (netif_xmit_stopped(txq) && - time_after(jiffies, (trans_start + - dev->watchdog_timeo))) { + if (!netif_xmit_stopped(txq)) + continue; + if (time_after(jiffies, trans_start + dev->watchdog_timeo)) { timedout_ms = jiffies_to_msecs(jiffies - trans_start); atomic_long_inc(&txq->trans_timeout); break; } + if (time_after(oldest_start, trans_start)) + oldest_start = trans_start; } if (unlikely(timedout_ms)) { @@ -531,7 +534,7 @@ static void dev_watchdog(struct timer_list *t) netif_unfreeze_queues(dev); } if (!mod_timer(&dev->watchdog_timer, - round_jiffies(jiffies + + round_jiffies(oldest_start + dev->watchdog_timeo))) release = false; } @@ -945,7 +948,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); gnet_stats_basic_sync_init(&sch->bstats); + lockdep_register_key(&sch->root_lock_key); spin_lock_init(&sch->q.lock); + lockdep_set_class(&sch->q.lock, &sch->root_lock_key); if (ops->static_flags & TCQ_F_CPUSTATS) { sch->cpu_bstats = @@ -980,6 +985,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, return sch; errout1: + lockdep_unregister_key(&sch->root_lock_key); kfree(sch); errout: return ERR_PTR(err); @@ -1068,6 +1074,7 @@ static void __qdisc_destroy(struct Qdisc *qdisc) if (ops->destroy) ops->destroy(qdisc); + lockdep_unregister_key(&qdisc->root_lock_key); module_put(ops->owner); netdev_put(dev, &qdisc->dev_tracker); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 93e6fb56f3b5..ff3de37874e4 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1039,13 +1039,6 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } -static void htb_set_lockdep_class_child(struct Qdisc *q) -{ - static struct lock_class_key child_key; - - lockdep_set_class(qdisc_lock(q), &child_key); -} - static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) { return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); @@ -1132,7 +1125,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, return -ENOMEM; } - htb_set_lockdep_class_child(qdisc); q->direct_qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } @@ -1468,7 +1460,6 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, } if (q->offload) { - htb_set_lockdep_class_child(new); /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ qdisc_refcount_inc(new); old_q = htb_graft_helper(dev_queue, new); @@ -1733,11 +1724,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); - if (q->offload) { - if (new_q) - htb_set_lockdep_class_child(new_q); + if (q->offload) htb_parent_to_leaf_offload(sch, dev_queue, new_q); - } } sch_tree_lock(sch); @@ -1947,13 +1935,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, classid, NULL); if (q->offload) { - if (new_q) { - htb_set_lockdep_class_child(new_q); - /* One ref for cl->leaf.q, the other for - * dev_queue->qdisc. - */ + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + if (new_q) qdisc_refcount_inc(new_q); - } old_q = htb_graft_helper(dev_queue, new_q); /* No qdisc_put needed. */ WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index e66f4afb920d..3b9245a3c767 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -608,6 +608,7 @@ static void sfq_perturbation(struct timer_list *t) struct Qdisc *sch = q->sch; spinlock_t *root_lock; siphash_key_t nkey; + int period; get_random_bytes(&nkey, sizeof(nkey)); rcu_read_lock(); @@ -618,8 +619,12 @@ static void sfq_perturbation(struct timer_list *t) sfq_rehash(sch); spin_unlock(root_lock); - if (q->perturb_period) - mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + /* q->perturb_period can change under us from + * sfq_change() and sfq_destroy(). + */ + period = READ_ONCE(q->perturb_period); + if (period) + mod_timer(&q->perturb_timer, jiffies + period); rcu_read_unlock(); } @@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) q->quantum = ctl->quantum; q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); } - q->perturb_period = ctl->perturb_period * HZ; + WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); if (ctl->flows) q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { @@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - q->perturb_period = 0; + WRITE_ONCE(q->perturb_period, 0); del_timer_sync(&q->perturb_timer); sfq_free(q->ht); sfq_free(q->slots); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 59304611dc00..8badec6d82a2 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); - if (q->q.qlen < dev->tx_queue_len) { + if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) { __skb_queue_tail(&q->q, skb); return NET_XMIT_SUCCESS; } @@ -424,7 +424,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) } while ((q = NEXT_SLAVE(q)) != m->slaves); } - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); return 0; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 24368f755ab1..f7b809c0d142 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -415,7 +415,7 @@ out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; - rt = (struct rt6_info *)dst; + rt = dst_rt6_info(dst); t->dst_cookie = rt6_get_cookie(rt); pr_debug("rt6_dst:%pI6/%d rt6_src:%pI6\n", &rt->rt6i_dst.addr, rt->rt6i_dst.plen, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e849f368ed91..5a7436a13b74 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -552,7 +552,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk, struct flowi *fl) { union sctp_addr *saddr = &t->saddr; - struct rtable *rt = (struct rtable *)t->dst; + struct rtable *rt = dst_rtable(t->dst); if (rt) { saddr->v4.sin_family = AF_INET; @@ -1085,7 +1085,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_reset_inner_mac_header(skb); skb_reset_inner_transport_header(skb); skb_set_inner_ipproto(skb, IPPROTO_SCTP); - udp_tunnel_xmit_skb((struct rtable *)dst, sk, skb, fl4->saddr, + udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr, fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, sctp_sk(sk)->udp_port, t->encap_port, false, false); return 0; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 08fdf1251f46..5adf0c0a6c1a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -38,6 +38,7 @@ #include <linux/inet.h> #include <linux/slab.h> #include <net/sock.h> +#include <net/proto_memory.h> #include <net/inet_ecn.h> #include <linux/skbuff.h> #include <net/sctp/sctp.h> diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e416b6d3d270..c009383369b2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4847,7 +4847,7 @@ static int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) +static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -4871,7 +4871,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) goto out; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); error = sctp_wait_for_accept(sk, timeo); if (error) @@ -4882,7 +4882,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) */ asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - newsk = sp->pf->create_accept_sk(sk, asoc, kern); + newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern); if (!newsk) { error = -ENOMEM; goto out; @@ -4899,7 +4899,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) out: release_sock(sk); - *err = error; + arg->err = error; return newsk; } @@ -7119,6 +7119,7 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len, struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_assoc_ids *ids; + size_t ids_size; u32 num = 0; if (sctp_style(sk, TCP)) @@ -7131,11 +7132,11 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len, num++; } - if (len < sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num) + ids_size = struct_size(ids, gaids_assoc_id, num); + if (len < ids_size) return -EINVAL; - len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num; - + len = ids_size; ids = kmalloc(len, GFP_USER | __GFP_NOWARN); if (unlikely(!ids)) return -ENOMEM; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 25bdf17c7262..61c6f3027e7f 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -80,8 +80,6 @@ static struct ctl_table sctp_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - - { /* sentinel */ } }; /* The following index defines are used in sctp_sysctl_net_register(). @@ -384,8 +382,6 @@ static struct ctl_table sctp_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &pf_expose_max, }, - - { /* sentinel */ } }; static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, @@ -597,6 +593,7 @@ static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write, int sctp_sysctl_net_register(struct net *net) { + size_t table_size = ARRAY_SIZE(sctp_net_table); struct ctl_table *table; int i; @@ -604,7 +601,7 @@ int sctp_sysctl_net_register(struct net *net) if (!table) return -ENOMEM; - for (i = 0; table[i].data; i++) + for (i = 0; i < table_size; i++) table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max; @@ -613,8 +610,7 @@ int sctp_sysctl_net_register(struct net *net) table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans; net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp", - table, - ARRAY_SIZE(sctp_net_table)); + table, table_size); if (net->sctp.sysctl_header == NULL) { kfree(table); return -ENOMEM; diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 746be3996768..ba5e6a2dd2fd 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -20,3 +20,16 @@ config SMC_DIAG smcss. if unsure, say Y. + +config SMC_LO + bool "SMC intra-OS shortcut with loopback-ism" + depends on SMC + default n + help + SMC_LO enables the creation of an Emulated-ISM device named + loopback-ism in SMC and makes use of it for transferring data + when communication occurs within the same OS. This helps in + convenient testing of SMC-D since loopback-ism is independent + of architecture or hardware. + + if unsure, say N. diff --git a/net/smc/Makefile b/net/smc/Makefile index 875efcd126a2..2c510d543058 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o smc-y += smc_tracepoint.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o +smc-$(CONFIG_SMC_LO) += smc_loopback.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e8dcd28a554c..e50a286fd0fb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -53,6 +53,7 @@ #include "smc_stats.h" #include "smc_tracepoint.h" #include "smc_sysctl.h" +#include "smc_loopback.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -1435,6 +1436,14 @@ static int smc_connect_ism(struct smc_sock *smc, } smc_conn_save_peer_info(smc, aclc); + + if (smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd)) { + rc = smcd_buf_attach(smc); + if (rc) { + rc = SMC_CLC_DECL_MEM; /* try to fallback */ + goto connect_abort; + } + } smc_close_init(smc); smc_rx_init(smc); smc_tx_init(smc); @@ -2539,6 +2548,14 @@ static void smc_listen_work(struct work_struct *work) mutex_unlock(&smc_server_lgr_pending); } smc_conn_save_peer_info(new_smc, cclc); + + if (ini->is_smcd && + smc_ism_support_dmb_nocopy(new_smc->conn.lgr->smcd)) { + rc = smcd_buf_attach(new_smc); + if (rc) + goto out_decl; + } + smc_listen_out_connected(new_smc); SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini); goto out_free; @@ -2672,7 +2689,7 @@ out: } static int smc_accept(struct socket *sock, struct socket *new_sock, - int flags, bool kern) + struct proto_accept_arg *arg) { struct sock *sk = sock->sk, *nsk; DECLARE_WAITQUEUE(wait, current); @@ -2691,7 +2708,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, } /* Wait for an incoming connection */ - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = smc_accept_dequeue(sk, new_sock))) { set_current_state(TASK_INTERRUPTIBLE); @@ -2718,7 +2735,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, if (rc) goto out; - if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { + if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) { /* wait till data arrives on the socket */ timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * MSEC_PER_SEC); @@ -3555,15 +3572,23 @@ static int __init smc_init(void) goto out_sock; } + rc = smc_loopback_init(); + if (rc) { + pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc); + goto out_ib; + } + rc = tcp_register_ulp(&smc_ulp_ops); if (rc) { pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); - goto out_ib; + goto out_lo; } static_branch_enable(&tcp_have_smc); return 0; +out_lo: + smc_loopback_exit(); out_ib: smc_ib_unregister_client(); out_sock: @@ -3601,6 +3626,7 @@ static void __exit smc_exit(void) tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); + smc_loopback_exit(); smc_ib_unregister_client(); smc_ism_exit(); destroy_workqueue(smc_close_wq); diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 3c06625ceb20..619b3bab3824 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -18,6 +18,7 @@ #include "smc_tx.h" #include "smc_rx.h" #include "smc_close.h" +#include "smc_ism.h" /********************************** send *************************************/ @@ -255,6 +256,14 @@ int smcd_cdc_msg_send(struct smc_connection *conn) return rc; smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + + if (smc_ism_support_dmb_nocopy(conn->lgr->smcd)) + /* if local sndbuf shares the same memory region with + * peer DMB, then don't update the tx_curs_fin + * and sndbuf_space until peer has consumed the data. + */ + return 0; + /* Calculate transmitted data and increment free send buffer space */ diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, &conn->tx_curs_sent); @@ -266,7 +275,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn) smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn); smc_tx_sndbuf_nonfull(smc); - return rc; + return 0; } /********************************* receive ***********************************/ @@ -323,7 +332,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, { union smc_host_cursor cons_old, prod_old; struct smc_connection *conn = &smc->conn; - int diff_cons, diff_prod; + int diff_cons, diff_prod, diff_tx; smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn); smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn); @@ -339,6 +348,29 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, atomic_add(diff_cons, &conn->peer_rmbe_space); /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */ smp_mb__after_atomic(); + + /* if local sndbuf shares the same memory region with + * peer RMB, then update tx_curs_fin and sndbuf_space + * here since peer has already consumed the data. + */ + if (conn->lgr->is_smcd && + smc_ism_support_dmb_nocopy(conn->lgr->smcd)) { + /* Calculate consumed data and + * increment free send buffer space. + */ + diff_tx = smc_curs_diff(conn->sndbuf_desc->len, + &conn->tx_curs_fin, + &conn->local_rx_ctrl.cons); + /* increase local sndbuf space and fin_curs */ + smp_mb__before_atomic(); + atomic_add(diff_tx, &conn->sndbuf_space); + /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ + smp_mb__after_atomic(); + smc_curs_copy(&conn->tx_curs_fin, + &conn->local_rx_ctrl.cons, conn); + + smc_tx_sndbuf_nonfull(smc); + } } diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old, diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 9b84d5897aa5..fafdb97adfad 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1149,6 +1149,20 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, } } +static void smcd_buf_detach(struct smc_connection *conn) +{ + struct smcd_dev *smcd = conn->lgr->smcd; + u64 peer_token = conn->peer_token; + + if (!conn->sndbuf_desc) + return; + + smc_ism_detach_dmb(smcd, peer_token); + + kfree(conn->sndbuf_desc); + conn->sndbuf_desc = NULL; +} + static void smc_buf_unuse(struct smc_connection *conn, struct smc_link_group *lgr) { @@ -1192,6 +1206,8 @@ void smc_conn_free(struct smc_connection *conn) if (lgr->is_smcd) { if (!list_empty(&lgr->list)) smc_ism_unset_conn(conn); + if (smc_ism_support_dmb_nocopy(lgr->smcd)) + smcd_buf_detach(conn); tasklet_kill(&conn->rx_tsklet); } else { smc_cdc_wait_pend_tx_wr(conn); @@ -1445,6 +1461,8 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) smc_sk_wake_ups(smc); if (conn->lgr->is_smcd) { smc_ism_unset_conn(conn); + if (smc_ism_support_dmb_nocopy(conn->lgr->smcd)) + smcd_buf_detach(conn); if (soft) tasklet_kill(&conn->rx_tsklet); else @@ -2464,12 +2482,18 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) int rc; /* create send buffer */ + if (is_smcd && + smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd)) + goto create_rmb; + rc = __smc_buf_create(smc, is_smcd, false); if (rc) return rc; + +create_rmb: /* create rmb */ rc = __smc_buf_create(smc, is_smcd, true); - if (rc) { + if (rc && smc->conn.sndbuf_desc) { down_write(&smc->conn.lgr->sndbufs_lock); list_del(&smc->conn.sndbuf_desc->list); up_write(&smc->conn.lgr->sndbufs_lock); @@ -2479,6 +2503,41 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) return rc; } +int smcd_buf_attach(struct smc_sock *smc) +{ + struct smc_connection *conn = &smc->conn; + struct smcd_dev *smcd = conn->lgr->smcd; + u64 peer_token = conn->peer_token; + struct smc_buf_desc *buf_desc; + int rc; + + buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); + if (!buf_desc) + return -ENOMEM; + + /* The ghost sndbuf_desc describes the same memory region as + * peer RMB. Its lifecycle is consistent with the connection's + * and it will be freed with the connections instead of the + * link group. + */ + rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc); + if (rc) + goto free; + + smc->sk.sk_sndbuf = buf_desc->len; + buf_desc->cpu_addr = + (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg); + buf_desc->len -= sizeof(struct smcd_cdc_msg); + conn->sndbuf_desc = buf_desc; + conn->sndbuf_desc->used = 1; + atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len); + return 0; + +free: + kfree(buf_desc); + return rc; +} + static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) { int i; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 1f175376037b..d93cf51dbd7c 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -557,6 +557,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, void smc_smcd_terminate_all(struct smcd_dev *dev); void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); int smc_buf_create(struct smc_sock *smc, bool is_smcd); +int smcd_buf_attach(struct smc_sock *smc); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link, struct smc_clc_msg_accept_confirm *clc); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 97704a9e84c7..9297dc20bfe2 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -209,13 +209,18 @@ int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, if (IS_ERR(rt)) goto out; if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) - goto out; - neigh = rt->dst.ops->neigh_lookup(&rt->dst, NULL, &fl4.daddr); - if (neigh) { - memcpy(nexthop_mac, neigh->ha, ETH_ALEN); - *uses_gateway = rt->rt_uses_gateway; - return 0; - } + goto out_rt; + neigh = dst_neigh_lookup(&rt->dst, &fl4.daddr); + if (!neigh) + goto out_rt; + memcpy(nexthop_mac, neigh->ha, ETH_ALEN); + *uses_gateway = rt->rt_uses_gateway; + neigh_release(neigh); + ip_rt_put(rt); + return 0; + +out_rt: + ip_rt_put(rt); out: return -ENOENT; } diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index ac88de2a06a0..84f98e18c7db 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -91,6 +91,11 @@ bool smc_ism_is_v2_capable(void) return smc_ism_v2_capable; } +void smc_ism_set_v2_capable(void) +{ + smc_ism_v2_capable = true; +} + /* Set a connection using this DMBE. */ void smc_ism_set_conn(struct smc_connection *conn) { @@ -126,6 +131,8 @@ int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid) if (!vlanid) /* No valid vlan id */ return -EINVAL; + if (!smcd->ops->add_vlan_id) + return -EOPNOTSUPP; /* create new vlan entry, in case we need it */ new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL); @@ -171,6 +178,8 @@ int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid) if (!vlanid) /* No valid vlan id */ return -EINVAL; + if (!smcd->ops->del_vlan_id) + return -EOPNOTSUPP; spin_lock_irqsave(&smcd->lock, flags); list_for_each_entry(vlan, &smcd->vlan, list) { @@ -222,7 +231,6 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc) int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, struct smc_buf_desc *dmb_desc) { -#if IS_ENABLED(CONFIG_ISM) struct smcd_dmb dmb; int rc; @@ -231,7 +239,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, dmb.sba_idx = dmb_desc->sba_idx; dmb.vlan_id = lgr->vlan_id; dmb.rgid = lgr->peer_gid.gid; - rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client); + rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, lgr->smcd->client); if (!rc) { dmb_desc->sba_idx = dmb.sba_idx; dmb_desc->token = dmb.dmb_tok; @@ -240,9 +248,46 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, dmb_desc->len = dmb.dmb_len; } return rc; -#else - return 0; -#endif +} + +bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd) +{ + /* for now only loopback-ism supports + * merging sndbuf with peer DMB to avoid + * data copies between them. + */ + return (smcd->ops->support_dmb_nocopy && + smcd->ops->support_dmb_nocopy(smcd)); +} + +int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token, + struct smc_buf_desc *dmb_desc) +{ + struct smcd_dmb dmb; + int rc = 0; + + if (!dev->ops->attach_dmb) + return -EINVAL; + + memset(&dmb, 0, sizeof(dmb)); + dmb.dmb_tok = token; + rc = dev->ops->attach_dmb(dev, &dmb); + if (!rc) { + dmb_desc->sba_idx = dmb.sba_idx; + dmb_desc->token = dmb.dmb_tok; + dmb_desc->cpu_addr = dmb.cpu_addr; + dmb_desc->dma_addr = dmb.dma_addr; + dmb_desc->len = dmb.dmb_len; + } + return rc; +} + +int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token) +{ + if (!dev->ops->detach_dmb) + return -EINVAL; + + return dev->ops->detach_dmb(dev, token); } static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd, @@ -322,6 +367,8 @@ static void smc_nl_prep_smcd_dev(struct smcd_dev_list *dev_list, list_for_each_entry(smcd, &dev_list->list, list) { if (num < snum) goto next; + if (smc_ism_is_loopback(smcd)) + goto next; if (smc_nl_handle_smcd_dev(smcd, skb, cb)) goto errout; next: @@ -372,7 +419,8 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id); break; case ISM_EVENT_CODE_TESTLINK: /* Activity timer */ - if (ev_info.code == ISM_EVENT_REQUEST) { + if (ev_info.code == ISM_EVENT_REQUEST && + wrk->smcd->ops->signal_event) { ev_info.code = ISM_EVENT_RESPONSE; wrk->smcd->ops->signal_event(wrk->smcd, &peer_gid, @@ -436,7 +484,7 @@ static struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, static void smcd_register_dev(struct ism_dev *ism) { const struct smcd_ops *ops = ism_get_smcd_ops(); - struct smcd_dev *smcd; + struct smcd_dev *smcd, *fentry; if (!ops) return; @@ -446,20 +494,28 @@ static void smcd_register_dev(struct ism_dev *ism) if (!smcd) return; smcd->priv = ism; + smcd->client = &smc_ism_client; ism_set_priv(ism, &smc_ism_client, smcd); if (smc_pnetid_by_dev_port(&ism->pdev->dev, 0, smcd->pnetid)) smc_pnetid_by_table_smcd(smcd); + if (smcd->ops->supports_v2()) + smc_ism_set_v2_capable(); mutex_lock(&smcd_dev_list.mutex); - if (list_empty(&smcd_dev_list.list)) { - if (smcd->ops->supports_v2()) - smc_ism_v2_capable = true; - } - /* sort list: devices without pnetid before devices with pnetid */ - if (smcd->pnetid[0]) + /* sort list: + * - devices without pnetid before devices with pnetid; + * - loopback-ism always at the very beginning; + */ + if (!smcd->pnetid[0]) { + fentry = list_first_entry_or_null(&smcd_dev_list.list, + struct smcd_dev, list); + if (fentry && smc_ism_is_loopback(fentry)) + list_add(&smcd->list, &fentry->list); + else + list_add(&smcd->list, &smcd_dev_list.list); + } else { list_add_tail(&smcd->list, &smcd_dev_list.list); - else - list_add(&smcd->list, &smcd_dev_list.list); + } mutex_unlock(&smcd_dev_list.mutex); pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n", @@ -541,6 +597,8 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr) if (lgr->peer_shutdown) return 0; + if (!lgr->smcd->ops->signal_event) + return 0; memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE); ev_info.vlan_id = lgr->vlan_id; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 165cd013404b..6763133dd8d0 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -48,10 +48,15 @@ int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id); int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size, struct smc_buf_desc *dmb_desc); int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc); +bool smc_ism_support_dmb_nocopy(struct smcd_dev *smcd); +int smc_ism_attach_dmb(struct smcd_dev *dev, u64 token, + struct smc_buf_desc *dmb_desc); +int smc_ism_detach_dmb(struct smcd_dev *dev, u64 token); int smc_ism_signal_shutdown(struct smc_link_group *lgr); void smc_ism_get_system_eid(u8 **eid); u16 smc_ism_get_chid(struct smcd_dev *dev); bool smc_ism_is_v2_capable(void); +void smc_ism_set_v2_capable(void); int smc_ism_init(void); void smc_ism_exit(void); int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); @@ -84,4 +89,9 @@ static inline bool smc_ism_is_emulated(struct smcd_dev *smcd) return __smc_ism_is_emulated(chid); } +static inline bool smc_ism_is_loopback(struct smcd_dev *smcd) +{ + return (smcd->ops->get_chid(smcd) == 0xFFFF); +} + #endif diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c new file mode 100644 index 000000000000..3c5f64ca4115 --- /dev/null +++ b/net/smc/smc_loopback.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shared Memory Communications Direct over loopback-ism device. + * + * Functions for loopback-ism device. + * + * Copyright (c) 2024, Alibaba Inc. + * + * Author: Wen Gu <guwen@linux.alibaba.com> + * Tony Lu <tonylu@linux.alibaba.com> + * + */ + +#include <linux/device.h> +#include <linux/types.h> +#include <net/smc.h> + +#include "smc_cdc.h" +#include "smc_ism.h" +#include "smc_loopback.h" + +#define SMC_LO_V2_CAPABLE 0x1 /* loopback-ism acts as ISMv2 */ +#define SMC_LO_SUPPORT_NOCOPY 0x1 +#define SMC_DMA_ADDR_INVALID (~(dma_addr_t)0) + +static const char smc_lo_dev_name[] = "loopback-ism"; +static struct smc_lo_dev *lo_dev; + +static void smc_lo_generate_ids(struct smc_lo_dev *ldev) +{ + struct smcd_gid *lgid = &ldev->local_gid; + uuid_t uuid; + + uuid_gen(&uuid); + memcpy(&lgid->gid, &uuid, sizeof(lgid->gid)); + memcpy(&lgid->gid_ext, (u8 *)&uuid + sizeof(lgid->gid), + sizeof(lgid->gid_ext)); + + ldev->chid = SMC_LO_RESERVED_CHID; +} + +static int smc_lo_query_rgid(struct smcd_dev *smcd, struct smcd_gid *rgid, + u32 vid_valid, u32 vid) +{ + struct smc_lo_dev *ldev = smcd->priv; + + /* rgid should be the same as lgid */ + if (!ldev || rgid->gid != ldev->local_gid.gid || + rgid->gid_ext != ldev->local_gid.gid_ext) + return -ENETUNREACH; + return 0; +} + +static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb, + void *client_priv) +{ + struct smc_lo_dmb_node *dmb_node, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + int sba_idx, rc; + + /* check space for new dmb */ + for_each_clear_bit(sba_idx, ldev->sba_idx_mask, SMC_LO_MAX_DMBS) { + if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask)) + break; + } + if (sba_idx == SMC_LO_MAX_DMBS) + return -ENOSPC; + + dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL); + if (!dmb_node) { + rc = -ENOMEM; + goto err_bit; + } + + dmb_node->sba_idx = sba_idx; + dmb_node->len = dmb->dmb_len; + dmb_node->cpu_addr = kzalloc(dmb_node->len, GFP_KERNEL | + __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC); + if (!dmb_node->cpu_addr) { + rc = -ENOMEM; + goto err_node; + } + dmb_node->dma_addr = SMC_DMA_ADDR_INVALID; + refcount_set(&dmb_node->refcnt, 1); + +again: + /* add new dmb into hash table */ + get_random_bytes(&dmb_node->token, sizeof(dmb_node->token)); + write_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_node->token) { + if (tmp_node->token == dmb_node->token) { + write_unlock_bh(&ldev->dmb_ht_lock); + goto again; + } + } + hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token); + write_unlock_bh(&ldev->dmb_ht_lock); + atomic_inc(&ldev->dmb_cnt); + + dmb->sba_idx = dmb_node->sba_idx; + dmb->dmb_tok = dmb_node->token; + dmb->cpu_addr = dmb_node->cpu_addr; + dmb->dma_addr = dmb_node->dma_addr; + dmb->dmb_len = dmb_node->len; + + return 0; + +err_node: + kfree(dmb_node); +err_bit: + clear_bit(sba_idx, ldev->sba_idx_mask); + return rc; +} + +static void __smc_lo_unregister_dmb(struct smc_lo_dev *ldev, + struct smc_lo_dmb_node *dmb_node) +{ + /* remove dmb from hash table */ + write_lock_bh(&ldev->dmb_ht_lock); + hash_del(&dmb_node->list); + write_unlock_bh(&ldev->dmb_ht_lock); + + clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask); + kvfree(dmb_node->cpu_addr); + kfree(dmb_node); + + if (atomic_dec_and_test(&ldev->dmb_cnt)) + wake_up(&ldev->ldev_release); +} + +static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) +{ + struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + + /* find dmb from hash table */ + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) { + if (tmp_node->token == dmb->dmb_tok) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock_bh(&ldev->dmb_ht_lock); + + if (refcount_dec_and_test(&dmb_node->refcnt)) + __smc_lo_unregister_dmb(ldev, dmb_node); + return 0; +} + +static int smc_lo_support_dmb_nocopy(struct smcd_dev *smcd) +{ + return SMC_LO_SUPPORT_NOCOPY; +} + +static int smc_lo_attach_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) +{ + struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + + /* find dmb_node according to dmb->dmb_tok */ + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) { + if (tmp_node->token == dmb->dmb_tok) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock_bh(&ldev->dmb_ht_lock); + + if (!refcount_inc_not_zero(&dmb_node->refcnt)) + /* the dmb is being unregistered, but has + * not been removed from the hash table. + */ + return -EINVAL; + + /* provide dmb information */ + dmb->sba_idx = dmb_node->sba_idx; + dmb->dmb_tok = dmb_node->token; + dmb->cpu_addr = dmb_node->cpu_addr; + dmb->dma_addr = dmb_node->dma_addr; + dmb->dmb_len = dmb_node->len; + return 0; +} + +static int smc_lo_detach_dmb(struct smcd_dev *smcd, u64 token) +{ + struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + + /* find dmb_node according to dmb->dmb_tok */ + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) { + if (tmp_node->token == token) { + dmb_node = tmp_node; + break; + } + } + if (!dmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + read_unlock_bh(&ldev->dmb_ht_lock); + + if (refcount_dec_and_test(&dmb_node->refcnt)) + __smc_lo_unregister_dmb(ldev, dmb_node); + return 0; +} + +static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, + unsigned int idx, bool sf, unsigned int offset, + void *data, unsigned int size) +{ + struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node; + struct smc_lo_dev *ldev = smcd->priv; + struct smc_connection *conn; + + if (!sf) + /* since sndbuf is merged with peer DMB, there is + * no need to copy data from sndbuf to peer DMB. + */ + return 0; + + read_lock_bh(&ldev->dmb_ht_lock); + hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) { + if (tmp_node->token == dmb_tok) { + rmb_node = tmp_node; + break; + } + } + if (!rmb_node) { + read_unlock_bh(&ldev->dmb_ht_lock); + return -EINVAL; + } + memcpy((char *)rmb_node->cpu_addr + offset, data, size); + read_unlock_bh(&ldev->dmb_ht_lock); + + conn = smcd->conn[rmb_node->sba_idx]; + if (!conn || conn->killed) + return -EPIPE; + tasklet_schedule(&conn->rx_tsklet); + return 0; +} + +static int smc_lo_supports_v2(void) +{ + return SMC_LO_V2_CAPABLE; +} + +static void smc_lo_get_local_gid(struct smcd_dev *smcd, + struct smcd_gid *smcd_gid) +{ + struct smc_lo_dev *ldev = smcd->priv; + + smcd_gid->gid = ldev->local_gid.gid; + smcd_gid->gid_ext = ldev->local_gid.gid_ext; +} + +static u16 smc_lo_get_chid(struct smcd_dev *smcd) +{ + return ((struct smc_lo_dev *)smcd->priv)->chid; +} + +static struct device *smc_lo_get_dev(struct smcd_dev *smcd) +{ + return &((struct smc_lo_dev *)smcd->priv)->dev; +} + +static const struct smcd_ops lo_ops = { + .query_remote_gid = smc_lo_query_rgid, + .register_dmb = smc_lo_register_dmb, + .unregister_dmb = smc_lo_unregister_dmb, + .support_dmb_nocopy = smc_lo_support_dmb_nocopy, + .attach_dmb = smc_lo_attach_dmb, + .detach_dmb = smc_lo_detach_dmb, + .add_vlan_id = NULL, + .del_vlan_id = NULL, + .set_vlan_required = NULL, + .reset_vlan_required = NULL, + .signal_event = NULL, + .move_data = smc_lo_move_data, + .supports_v2 = smc_lo_supports_v2, + .get_local_gid = smc_lo_get_local_gid, + .get_chid = smc_lo_get_chid, + .get_dev = smc_lo_get_dev, +}; + +static struct smcd_dev *smcd_lo_alloc_dev(const struct smcd_ops *ops, + int max_dmbs) +{ + struct smcd_dev *smcd; + + smcd = kzalloc(sizeof(*smcd), GFP_KERNEL); + if (!smcd) + return NULL; + + smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *), + GFP_KERNEL); + if (!smcd->conn) + goto out_smcd; + + smcd->ops = ops; + + spin_lock_init(&smcd->lock); + spin_lock_init(&smcd->lgr_lock); + INIT_LIST_HEAD(&smcd->vlan); + INIT_LIST_HEAD(&smcd->lgr_list); + init_waitqueue_head(&smcd->lgrs_deleted); + return smcd; + +out_smcd: + kfree(smcd); + return NULL; +} + +static int smcd_lo_register_dev(struct smc_lo_dev *ldev) +{ + struct smcd_dev *smcd; + + smcd = smcd_lo_alloc_dev(&lo_ops, SMC_LO_MAX_DMBS); + if (!smcd) + return -ENOMEM; + ldev->smcd = smcd; + smcd->priv = ldev; + smc_ism_set_v2_capable(); + mutex_lock(&smcd_dev_list.mutex); + list_add(&smcd->list, &smcd_dev_list.list); + mutex_unlock(&smcd_dev_list.mutex); + pr_warn_ratelimited("smc: adding smcd device %s\n", + dev_name(&ldev->dev)); + return 0; +} + +static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev) +{ + struct smcd_dev *smcd = ldev->smcd; + + pr_warn_ratelimited("smc: removing smcd device %s\n", + dev_name(&ldev->dev)); + smcd->going_away = 1; + smc_smcd_terminate_all(smcd); + mutex_lock(&smcd_dev_list.mutex); + list_del_init(&smcd->list); + mutex_unlock(&smcd_dev_list.mutex); + kfree(smcd->conn); + kfree(smcd); +} + +static int smc_lo_dev_init(struct smc_lo_dev *ldev) +{ + smc_lo_generate_ids(ldev); + rwlock_init(&ldev->dmb_ht_lock); + hash_init(ldev->dmb_ht); + atomic_set(&ldev->dmb_cnt, 0); + init_waitqueue_head(&ldev->ldev_release); + + return smcd_lo_register_dev(ldev); +} + +static void smc_lo_dev_exit(struct smc_lo_dev *ldev) +{ + smcd_lo_unregister_dev(ldev); + if (atomic_read(&ldev->dmb_cnt)) + wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt)); +} + +static void smc_lo_dev_release(struct device *dev) +{ + struct smc_lo_dev *ldev = + container_of(dev, struct smc_lo_dev, dev); + + kfree(ldev); +} + +static int smc_lo_dev_probe(void) +{ + struct smc_lo_dev *ldev; + int ret; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return -ENOMEM; + + ldev->dev.parent = NULL; + ldev->dev.release = smc_lo_dev_release; + device_initialize(&ldev->dev); + dev_set_name(&ldev->dev, smc_lo_dev_name); + + ret = smc_lo_dev_init(ldev); + if (ret) + goto free_dev; + + lo_dev = ldev; /* global loopback device */ + return 0; + +free_dev: + put_device(&ldev->dev); + return ret; +} + +static void smc_lo_dev_remove(void) +{ + if (!lo_dev) + return; + + smc_lo_dev_exit(lo_dev); + put_device(&lo_dev->dev); /* device_initialize in smc_lo_dev_probe */ +} + +int smc_loopback_init(void) +{ + return smc_lo_dev_probe(); +} + +void smc_loopback_exit(void) +{ + smc_lo_dev_remove(); +} diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h new file mode 100644 index 000000000000..6dd4292dae56 --- /dev/null +++ b/net/smc/smc_loopback.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications Direct over loopback-ism device. + * + * SMC-D loopback-ism device structure definitions. + * + * Copyright (c) 2024, Alibaba Inc. + * + * Author: Wen Gu <guwen@linux.alibaba.com> + * Tony Lu <tonylu@linux.alibaba.com> + * + */ + +#ifndef _SMC_LOOPBACK_H +#define _SMC_LOOPBACK_H + +#include <linux/device.h> +#include <linux/err.h> +#include <net/smc.h> + +#if IS_ENABLED(CONFIG_SMC_LO) +#define SMC_LO_MAX_DMBS 5000 +#define SMC_LO_DMBS_HASH_BITS 12 +#define SMC_LO_RESERVED_CHID 0xFFFF + +struct smc_lo_dmb_node { + struct hlist_node list; + u64 token; + u32 len; + u32 sba_idx; + void *cpu_addr; + dma_addr_t dma_addr; + refcount_t refcnt; +}; + +struct smc_lo_dev { + struct smcd_dev *smcd; + struct device dev; + u16 chid; + struct smcd_gid local_gid; + atomic_t dmb_cnt; + rwlock_t dmb_ht_lock; + DECLARE_BITMAP(sba_idx_mask, SMC_LO_MAX_DMBS); + DECLARE_HASHTABLE(dmb_ht, SMC_LO_DMBS_HASH_BITS); + wait_queue_head_t ldev_release; +}; + +int smc_loopback_init(void); +void smc_loopback_exit(void); +#else +static inline int smc_loopback_init(void) +{ + return 0; +} + +static inline void smc_loopback_exit(void) +{ +} +#endif + +#endif /* _SMC_LOOPBACK_H */ diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 4e8baa2e7ea4..13f2bc092db1 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -90,11 +90,11 @@ static struct ctl_table smc_table[] = { .extra1 = &conns_per_lgr_min, .extra2 = &conns_per_lgr_max, }, - { } }; int __net_init smc_sysctl_net_init(struct net *net) { + size_t table_size = ARRAY_SIZE(smc_table); struct ctl_table *table; table = smc_table; @@ -105,12 +105,12 @@ int __net_init smc_sysctl_net_init(struct net *net) if (!table) goto err_alloc; - for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++) + for (i = 0; i < table_size; i++) table[i].data += (void *)net - (void *)&init_net; } net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table, - ARRAY_SIZE(smc_table)); + table_size); if (!net->smc.smc_hdr) goto err_reg; diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index 9fc5e586d24a..a9a6e3c1113a 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -60,7 +60,7 @@ DECLARE_EVENT_CLASS(smc_msg_event, __entry->smc = smc; __entry->net_cookie = sock_net(sk)->net_cookie; __entry->len = len; - __assign_str(name, smc->conn.lnk->ibname); + __assign_str(name); ), TP_printk("smc=%p net=%llu len=%zu dev=%s", @@ -104,7 +104,7 @@ TRACE_EVENT(smcr_link_down, __entry->lgr = lgr; __entry->net_cookie = lgr->net->net_cookie; __entry->state = lnk->state; - __assign_str(name, lnk->ibname); + __assign_str(name); __entry->location = location; ), diff --git a/net/socket.c b/net/socket.c index e5f3af49a8b6..e416920e9399 100644 --- a/net/socket.c +++ b/net/socket.c @@ -88,7 +88,7 @@ #include <linux/xattr.h> #include <linux/nospec.h> #include <linux/indirect_call_wrapper.h> -#include <linux/io_uring.h> +#include <linux/io_uring/net.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -1890,7 +1890,7 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) return __sys_listen(fd, backlog); } -struct file *do_accept(struct file *file, unsigned file_flags, +struct file *do_accept(struct file *file, struct proto_accept_arg *arg, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags) { @@ -1926,8 +1926,8 @@ struct file *do_accept(struct file *file, unsigned file_flags, if (err) goto out_fd; - err = ops->accept(sock, newsock, sock->file->f_flags | file_flags, - false); + arg->flags |= sock->file->f_flags; + err = ops->accept(sock, newsock, arg); if (err < 0) goto out_fd; @@ -1953,6 +1953,7 @@ out_fd: static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags) { + struct proto_accept_arg arg = { }; struct file *newfile; int newfd; @@ -1966,7 +1967,7 @@ static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_s if (unlikely(newfd < 0)) return newfd; - newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen, + newfile = do_accept(file, &arg, upeer_sockaddr, upeer_addrlen, flags); if (IS_ERR(newfile)) { put_unused_fd(newfd); @@ -3580,6 +3581,10 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; const struct proto_ops *ops = READ_ONCE(sock->ops); + struct proto_accept_arg arg = { + .flags = flags, + .kern = true, + }; int err; err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, @@ -3587,7 +3592,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) if (err < 0) goto done; - err = ops->accept(sock, *newsock, flags, true); + err = ops->accept(sock, *newsock, &arg); if (err < 0) { sock_release(*newsock); *newsock = NULL; diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h index c53b329092d4..4ebc1b7043d9 100644 --- a/net/sunrpc/auth_gss/auth_gss_internal.h +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -23,7 +23,7 @@ simple_get_bytes(const void *p, const void *end, void *res, size_t len) } static inline const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) +simple_get_netobj_noprof(const void *p, const void *end, struct xdr_netobj *dest) { const void *q; unsigned int len; @@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); if (len) { - dest->data = kmemdup(p, len, GFP_KERNEL); + dest->data = kmemdup_noprof(p, len, GFP_KERNEL); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); } else @@ -43,3 +43,5 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) dest->len = len; return q; } + +#define simple_get_netobj(...) alloc_hooks(simple_get_netobj_noprof(__VA_ARGS__)) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 24de94184700..96ab50eda9c2 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1033,17 +1033,11 @@ null_verifier: static void gss_free_in_token_pages(struct gssp_in_token *in_token) { - u32 inlen; int i; i = 0; - inlen = in_token->page_len; - while (inlen) { - if (in_token->pages[i]) - put_page(in_token->pages[i]); - inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen; - } - + while (in_token->pages[i]) + put_page(in_token->pages[i++]); kfree(in_token->pages); in_token->pages = NULL; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 28f3749f6dc6..cfd1b1bf7e35 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1071,6 +1071,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .authflavor = old->cl_auth->au_flavor, .cred = old->cl_cred, .stats = old->cl_stats, + .timeout = old->cl_timeout, }; struct rpc_clnt *clnt; int err; @@ -2698,8 +2699,19 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr) goto out_msg_denied; error = rpcauth_checkverf(task, xdr); - if (error) + if (error) { + struct rpc_cred *cred = task->tk_rqstp->rq_cred; + + if (!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { + rpcauth_invalcred(task); + if (!task->tk_cred_retry) + goto out_err; + task->tk_cred_retry--; + trace_rpc__stale_creds(task); + return -EKEYREJECTED; + } goto out_verifier; + } p = xdr_inline_decode(xdr, sizeof(*p)); if (!p) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b33e429336fb..2b4b1276d4e8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1265,8 +1265,6 @@ svc_generic_init_request(struct svc_rqst *rqstp, if (rqstp->rq_proc >= versp->vs_nproc) goto err_bad_proc; rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc]; - if (!procp) - goto err_bad_proc; /* Initialize storage for argp and resp */ memset(rqstp->rq_argp, 0, procp->pc_argzero); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b4a85a227bd7..dd86d7f1e97e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -46,7 +46,6 @@ static LIST_HEAD(svc_xprt_class_list); /* SMP locking strategy: * - * svc_pool->sp_lock protects most of the fields of that pool. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. * The "service mutex" protects svc_serv->sv_nrthread. @@ -211,51 +210,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, } EXPORT_SYMBOL_GPL(svc_xprt_init); -static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, - struct svc_serv *serv, - struct net *net, - const int family, - const unsigned short port, - int flags) -{ - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_ANY), - .sin_port = htons(port), - }; -#if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 sin6 = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_ANY_INIT, - .sin6_port = htons(port), - }; -#endif - struct svc_xprt *xprt; - struct sockaddr *sap; - size_t len; - - switch (family) { - case PF_INET: - sap = (struct sockaddr *)&sin; - len = sizeof(sin); - break; -#if IS_ENABLED(CONFIG_IPV6) - case PF_INET6: - sap = (struct sockaddr *)&sin6; - len = sizeof(sin6); - break; -#endif - default: - return ERR_PTR(-EAFNOSUPPORT); - } - - xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); - if (IS_ERR(xprt)) - trace_svc_xprt_create_err(serv->sv_program->pg_name, - xcl->xcl_name, sap, len, xprt); - return xprt; -} - /** * svc_xprt_received - start next receiver thread * @xprt: controlling transport @@ -294,9 +248,8 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) } static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, - struct net *net, const int family, - const unsigned short port, int flags, - const struct cred *cred) + struct net *net, struct sockaddr *sap, + size_t len, int flags, const struct cred *cred) { struct svc_xprt_class *xcl; @@ -312,8 +265,11 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags); + newxprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); if (IS_ERR(newxprt)) { + trace_svc_xprt_create_err(serv->sv_program->pg_name, + xcl->xcl_name, sap, len, + newxprt); module_put(xcl->xcl_owner); return PTR_ERR(newxprt); } @@ -330,6 +286,48 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, } /** + * svc_xprt_create_from_sa - Add a new listener to @serv from socket address + * @serv: target RPC service + * @xprt_name: transport class name + * @net: network namespace + * @sap: socket address pointer + * @flags: SVC_SOCK flags + * @cred: credential to bind to this transport + * + * Return local xprt port on success or %-EPROTONOSUPPORT on failure + */ +int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name, + struct net *net, struct sockaddr *sap, + int flags, const struct cred *cred) +{ + size_t len; + int err; + + switch (sap->sa_family) { + case AF_INET: + len = sizeof(struct sockaddr_in); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + len = sizeof(struct sockaddr_in6); + break; +#endif + default: + return -EAFNOSUPPORT; + } + + err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, cred); + if (err == -EPROTONOSUPPORT) { + request_module("svc%s", xprt_name); + err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, + cred); + } + + return err; +} +EXPORT_SYMBOL_GPL(svc_xprt_create_from_sa); + +/** * svc_xprt_create - Add a new listener to @serv * @serv: target RPC service * @xprt_name: transport class name @@ -339,23 +337,41 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, * @flags: SVC_SOCK flags * @cred: credential to bind to this transport * - * Return values: - * %0: New listener added successfully - * %-EPROTONOSUPPORT: Requested transport type not supported + * Return local xprt port on success or %-EPROTONOSUPPORT on failure */ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) { - int err; + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + .sin_port = htons(port), + }; +#if IS_ENABLED(CONFIG_IPV6) + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; +#endif + struct sockaddr *sap; - err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); - if (err == -EPROTONOSUPPORT) { - request_module("svc%s", xprt_name); - err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); + switch (family) { + case PF_INET: + sap = (struct sockaddr *)&sin; + break; +#if IS_ENABLED(CONFIG_IPV6) + case PF_INET6: + sap = (struct sockaddr *)&sin6; + break; +#endif + default: + return -EAFNOSUPPORT; } - return err; + + return svc_xprt_create_from_sa(serv, xprt_name, net, sap, flags, cred); } EXPORT_SYMBOL_GPL(svc_xprt_create); @@ -1260,6 +1276,40 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) } /** + * svc_find_listener - find an RPC transport instance + * @serv: pointer to svc_serv to search + * @xcl_name: C string containing transport's class name + * @net: owner net pointer + * @sa: sockaddr containing address + * + * Return the transport instance pointer for the endpoint accepting + * connections/peer traffic from the specified transport class, + * and matching sockaddr. + */ +struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name, + struct net *net, const struct sockaddr *sa) +{ + struct svc_xprt *xprt; + struct svc_xprt *found = NULL; + + spin_lock_bh(&serv->sv_lock); + list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { + if (xprt->xpt_net != net) + continue; + if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) + continue; + if (!rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) + continue; + found = xprt; + svc_xprt_get(xprt); + break; + } + spin_unlock_bh(&serv->sv_lock); + return found; +} +EXPORT_SYMBOL_GPL(svc_find_listener); + +/** * svc_find_xprt - find an RPC transport instance * @serv: pointer to svc_serv to search * @xcl_name: C string containing transport's class name diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 93941ab12549..5f3170a1c9bb 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -160,7 +160,6 @@ static struct ctl_table debug_table[] = { .mode = 0444, .proc_handler = proc_do_xprt, }, - { } }; void diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index f86970733eb0..474f7a98fe9e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -209,7 +209,6 @@ static struct ctl_table svcrdma_parm_table[] = { .extra1 = &zero, .extra2 = &zero, }, - { }, }; static void svc_rdma_proc_cleanup(void) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 29b0562d62e7..9a8ce5df83ca 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -137,7 +137,6 @@ static struct ctl_table xr_tunables_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { }, }; #endif diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4f8d7efa469f..432557a553e7 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -244,7 +244,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: pr_info("rpcrdma: removing device %s for %pISpc\n", ep->re_id->device->name, sap); - fallthrough; + switch (xchg(&ep->re_connect_status, -ENODEV)) { + case 0: goto wake_connect_worker; + case 1: goto disconnected; + } + return 0; case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; goto disconnected; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bb9b747d58a1..dfc353eea8ed 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -160,7 +160,6 @@ static struct ctl_table xs_tunables_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { }, }; /* @@ -2664,6 +2663,7 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work) .xprtsec = { .policy = RPC_XPRTSEC_NONE, }, + .stats = upper_clnt->cl_stats, }; unsigned int pflags = current->flags; struct rpc_clnt *lower_clnt; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index c9189a970eec..6488ead9e464 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -244,6 +244,99 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, return 0; } +static void switchdev_obj_id_to_helpful_msg(struct net_device *dev, + enum switchdev_obj_id obj_id, + int err, bool add) +{ + const char *action = add ? "add" : "del"; + const char *reason = ""; + const char *problem; + const char *obj_str; + + switch (obj_id) { + case SWITCHDEV_OBJ_ID_UNDEFINED: + obj_str = "Undefined object"; + problem = "Attempted operation is undefined, indicating a possible programming\n" + "error.\n"; + break; + case SWITCHDEV_OBJ_ID_PORT_VLAN: + obj_str = "VLAN entry"; + problem = "Failure in VLAN settings on this port might disrupt network\n" + "segmentation or traffic isolation, affecting network partitioning.\n"; + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + obj_str = "Port Multicast Database entry"; + problem = "Failure in updating the port's Multicast Database could lead to\n" + "multicast forwarding issues.\n"; + break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + obj_str = "Host Multicast Database entry"; + problem = "Failure in updating the host's Multicast Database may impact multicast\n" + "group memberships or traffic delivery, affecting multicast\n" + "communication.\n"; + break; + case SWITCHDEV_OBJ_ID_MRP: + obj_str = "Media Redundancy Protocol configuration for port"; + problem = "Failure to set MRP ring ID on this port prevents communication with\n" + "the specified redundancy ring, resulting in an inability to engage\n" + "in MRP-based network operations.\n"; + break; + case SWITCHDEV_OBJ_ID_RING_TEST_MRP: + obj_str = "MRP Test Frame Operations for port"; + problem = "Failure to generate/monitor MRP test frames may lead to inability to\n" + "assess the ring's operational integrity and fault response, hindering\n" + "proactive network management.\n"; + break; + case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: + obj_str = "MRP Ring Role Configuration"; + problem = "Improper MRP ring role configuration may create conflicts in the ring,\n" + "disrupting communication for all participants, or isolate the local\n" + "system from the ring, hindering its ability to communicate with other\n" + "participants.\n"; + break; + case SWITCHDEV_OBJ_ID_RING_STATE_MRP: + obj_str = "MRP Ring State Configuration"; + problem = "Failure to correctly set the MRP ring state can result in network\n" + "loops or leave segments without communication. In a Closed state,\n" + "it maintains loop prevention by blocking one MRM port, while an Open\n" + "state activates in response to failures, changing port states to\n" + "preserve network connectivity.\n"; + break; + case SWITCHDEV_OBJ_ID_IN_TEST_MRP: + obj_str = "MRP_InTest Frame Generation Configuration"; + problem = "Failure in managing MRP_InTest frame generation can misjudge the\n" + "interconnection ring's state, leading to incorrect blocking or\n" + "unblocking of the I/C port. This misconfiguration might result\n" + "in unintended network loops or isolate critical network segments,\n" + "compromising network integrity and reliability.\n"; + break; + case SWITCHDEV_OBJ_ID_IN_ROLE_MRP: + obj_str = "Interconnection Ring Role Configuration"; + problem = "Failure in incorrect assignment of interconnection ring roles\n" + "(MIM/MIC) can impair the formation of the interconnection rings.\n"; + break; + case SWITCHDEV_OBJ_ID_IN_STATE_MRP: + obj_str = "Interconnection Ring State Configuration"; + problem = "Failure in updating the interconnection ring state can lead in\n" + "case of Open state to incorrect blocking or unblocking of the\n" + "I/C port, resulting in unintended network loops or isolation\n" + "of critical network\n"; + break; + default: + obj_str = "Unknown object"; + problem = "Indicating a possible programming error.\n"; + } + + switch (err) { + case -ENOSPC: + reason = "Current HW/SW setup lacks sufficient resources.\n"; + break; + } + + netdev_err(dev, "Failed to %s %s (object id=%d) with error: %pe (%d).\n%s%s\n", + action, obj_str, obj_id, ERR_PTR(err), err, problem, reason); +} + static void switchdev_port_obj_add_deferred(struct net_device *dev, const void *data) { @@ -254,8 +347,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev, err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, dev, obj, NULL); if (err && err != -EOPNOTSUPP) - netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", - err, obj->id); + switchdev_obj_id_to_helpful_msg(dev, obj->id, err, true); if (obj->complete) obj->complete(dev, err, obj->complete_priv); } @@ -304,8 +396,7 @@ static void switchdev_port_obj_del_deferred(struct net_device *dev, err = switchdev_port_obj_del_now(dev, obj); if (err && err != -EOPNOTSUPP) - netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", - err, obj->id); + switchdev_obj_id_to_helpful_msg(dev, obj->id, err, false); if (obj->complete) obj->complete(dev, err, obj->complete_priv); } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 051ed5f6fc93..f5017012a049 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -40,7 +40,7 @@ static int is_seen(struct ctl_table_set *set) /* Return standard mode bits for table entry. */ static int net_ctl_permissions(struct ctl_table_header *head, - struct ctl_table *table) + const struct ctl_table *table) { struct net *net = container_of(head->set, struct net, sysctls); @@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head, } static void net_ctl_set_ownership(struct ctl_table_header *head, - struct ctl_table *table, kuid_t *uid, kgid_t *gid) { struct net *net = container_of(head->set, struct net, sysctls); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 5c9fd4791c4b..76284fc538eb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - *buf = NULL; if (skb_has_frag_list(frag) && __skb_linearize(frag)) goto err; + *buf = NULL; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; @@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (!head) goto err; + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { @@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *headbuf = NULL; return 1; } - *buf = NULL; return 0; err: kfree_skb(*buf); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 798397b6811e..2d58ecae4e21 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -146,8 +146,6 @@ static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, - bool kern); static void tipc_sk_timeout(struct timer_list *t); static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); @@ -2711,13 +2709,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * tipc_accept - wait for connection request * @sock: listening socket * @new_sock: new socket that is to be connected - * @flags: file-related flags associated with socket - * @kern: caused by kernel or by userspace? + * @arg: arguments for accept * * Return: 0 on success, errno otherwise */ -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, - bool kern) +static int tipc_accept(struct socket *sock, struct socket *new_sock, + struct proto_accept_arg *arg) { struct sock *new_sk, *sk = sock->sk; struct tipc_sock *new_tsock; @@ -2733,14 +2730,14 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, res = -EINVAL; goto exit; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); res = tipc_wait_for_accept(sock, timeo); if (res) goto exit; buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 9fb65c988f7f..30d2e06e3d8c 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -91,7 +91,6 @@ static struct ctl_table tipc_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - {} }; int tipc_register_sysctl(void) diff --git a/net/tipc/trace.h b/net/tipc/trace.h index 04af83f0500c..865142ed0ab4 100644 --- a/net/tipc/trace.h +++ b/net/tipc/trace.h @@ -145,7 +145,7 @@ DECLARE_EVENT_CLASS(tipc_skb_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); tipc_skb_dump(skb, more, __get_str(buf)); ), @@ -172,7 +172,7 @@ DECLARE_EVENT_CLASS(tipc_list_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); tipc_list_dump(list, more, __get_str(buf)); ), @@ -200,7 +200,7 @@ DECLARE_EVENT_CLASS(tipc_sk_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); __entry->portid = tipc_sock_get_portid(sk); tipc_sk_dump(sk, dqueues, __get_str(buf)); if (skb) @@ -254,7 +254,7 @@ DECLARE_EVENT_CLASS(tipc_link_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME); tipc_link_dump(l, dqueues, __get_str(buf)); ), @@ -337,7 +337,7 @@ DECLARE_EVENT_CLASS(tipc_node_class, ), TP_fast_assign( - __assign_str(header, header); + __assign_str(header); __entry->addr = tipc_node_get_addr(n); tipc_node_dump(n, more, __get_str(buf)); ), @@ -374,7 +374,7 @@ DECLARE_EVENT_CLASS(tipc_fsm_class, ), TP_fast_assign( - __assign_str(name, name); + __assign_str(name); __entry->os = os; __entry->ns = ns; __entry->evt = evt; @@ -409,8 +409,8 @@ TRACE_EVENT(tipc_l2_device_event, ), TP_fast_assign( - __assign_str(dev_name, dev->name); - __assign_str(b_name, b->name); + __assign_str(dev_name); + __assign_str(b_name); __entry->evt = evt; __entry->b_up = test_bit(0, &b->up); __entry->carrier = netif_carrier_ok(dev); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index f892b0903dba..b849a3d133a0 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -174,7 +174,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, local_bh_disable(); ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { - struct rtable *rt = (struct rtable *)ndst; + struct rtable *rt = dst_rtable(ndst); if (!rt) { struct flowi4 fl = { diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 9237dded4467..f9e3d3d90dcf 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -278,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in, for (i = 0; remaining > 0; i++) { skb_frag_t *frag = &record->frags[i]; - __skb_frag_ref(frag, false); + __skb_frag_ref(frag); sg_set_page(sg_in + i, skb_frag_page(frag), skb_frag_size(frag), skb_frag_off(frag)); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b4674f03d71a..90b7f253d363 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -816,9 +816,17 @@ struct tls_context *tls_ctx_create(struct sock *sk) return NULL; mutex_init(&ctx->tx_lock); - rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); ctx->sk = sk; + /* Release semantic of rcu_assign_pointer() ensures that + * ctx->sk_proto is visible before changing sk->sk_prot in + * update_sk_prot(), and prevents reading uninitialized value in + * tls_{getsockopt, setsockopt}. Note that we do not need a + * read barrier in tls_{getsockopt,setsockopt} as there is an + * address dependency between sk->sk_proto->{getsockopt,setsockopt} + * and ctx->sk_proto. + */ + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); return ctx; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dc1651541723..e4af6616e1df 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -755,7 +755,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); -static int unix_accept(struct socket *, struct socket *, int, bool); +static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg); static int unix_getname(struct socket *, struct sockaddr *, int); static __poll_t unix_poll(struct file *, struct socket *, poll_table *); static __poll_t unix_dgram_poll(struct file *, struct socket *, @@ -1689,19 +1689,18 @@ static void unix_sock_inherit_flags(const struct socket *old, set_bit(SOCK_PASSSEC, &new->flags); } -static int unix_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int unix_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sk_buff *skb; struct sock *tsk; - int err; - err = -EOPNOTSUPP; + arg->err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; - err = -EINVAL; + arg->err = -EINVAL; if (sk->sk_state != TCP_LISTEN) goto out; @@ -1709,12 +1708,12 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, * so that no locks are necessary. */ - skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, - &err); + skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &arg->err); if (!skb) { /* This means receive shutdown. */ - if (err == 0) - err = -EINVAL; + if (arg->err == 0) + arg->err = -EINVAL; goto out; } @@ -1732,7 +1731,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, return 0; out: - return err; + return arg->err; } @@ -2171,13 +2170,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other maybe_add_creds(skb, sock, other); skb_get(skb); + scm_stat_add(other, skb); + + spin_lock(&other->sk_receive_queue.lock); if (ousk->oob_skb) consume_skb(ousk->oob_skb); - WRITE_ONCE(ousk->oob_skb, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); - scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); sk_send_sigurg(other); unix_state_unlock(other); other->sk_data_ready(other); @@ -2224,7 +2225,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out_err; } - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto pipe_err; while (sent < len) { @@ -2568,8 +2569,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_lock(&u->iolock); unix_state_lock(sk); + spin_lock(&sk->sk_receive_queue.lock); if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); mutex_unlock(&u->iolock); return -EINVAL; @@ -2581,6 +2584,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) WRITE_ONCE(u->oob_skb, NULL); else skb_get(oob_skb); + + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); @@ -2609,6 +2614,10 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, consume_skb(skb); skb = NULL; } else { + struct sk_buff *unlinked_skb = NULL; + + spin_lock(&sk->sk_receive_queue.lock); + if (skb == u->oob_skb) { if (copied) { skb = NULL; @@ -2620,13 +2629,19 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, } else if (flags & MSG_PEEK) { skb = NULL; } else { - skb_unlink(skb, &sk->sk_receive_queue); + __skb_unlink(skb, &sk->sk_receive_queue); WRITE_ONCE(u->oob_skb, NULL); - if (!WARN_ON_ONCE(skb_unref(skb))) - kfree_skb(skb); + unlinked_skb = skb; skb = skb_peek(&sk->sk_receive_queue); } } + + spin_unlock(&sk->sk_receive_queue.lock); + + if (unlinked_skb) { + WARN_ON_ONCE(skb_unref(unlinked_skb)); + kfree_skb(unlinked_skb); + } } return skb; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d76450133e4f..dfe94a90ece4 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -158,13 +158,11 @@ static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) unix_update_graph(unix_edge_successor(edge)); } -static bool gc_in_progress; - static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge) { struct unix_vertex *vertex = edge->predecessor->vertex; - if (!gc_in_progress) + if (!fpl->dead) unix_update_graph(unix_edge_successor(edge)); list_del(&edge->vertex_entry); @@ -240,7 +238,7 @@ void unix_del_edges(struct scm_fp_list *fpl) unix_del_edge(fpl, edge); } while (i < fpl->count_unix); - if (!gc_in_progress) { + if (!fpl->dead) { receiver = fpl->edges[0].successor; receiver->scm_stat.nr_unix_fds -= fpl->count_unix; } @@ -344,6 +342,18 @@ enum unix_recv_queue_lock_class { U_RECVQ_LOCK_EMBRYO, }; +static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist) +{ + skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist); + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + WARN_ON_ONCE(skb_unref(u->oob_skb)); + u->oob_skb = NULL; + } +#endif +} + static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist) { struct unix_vertex *vertex; @@ -367,18 +377,11 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist /* listener -> embryo order, the inversion never happens. */ spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO); - skb_queue_splice_init(embryo_queue, hitlist); + unix_collect_queue(unix_sk(skb->sk), hitlist); spin_unlock(&embryo_queue->lock); } } else { - skb_queue_splice_init(queue, hitlist); - -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - if (u->oob_skb) { - kfree_skb(u->oob_skb); - u->oob_skb = NULL; - } -#endif + unix_collect_queue(u, hitlist); } spin_unlock(&queue->lock); @@ -559,9 +562,12 @@ static void unix_walk_scc_fast(struct sk_buff_head *hitlist) list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); } +static bool gc_in_progress; + static void __unix_gc(struct work_struct *work) { struct sk_buff_head hitlist; + struct sk_buff *skb; spin_lock(&unix_gc_lock); @@ -579,6 +585,11 @@ static void __unix_gc(struct work_struct *work) spin_unlock(&unix_gc_lock); + skb_queue_walk(&hitlist, skb) { + if (UNIXCB(skb).fp) + UNIXCB(skb).fp->dead = true; + } + __skb_queue_purge(&hitlist); skip_gc: WRITE_ONCE(gc_in_progress, false); diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 44996af61999..357b3e5f3847 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -19,7 +19,6 @@ static struct ctl_table unix_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { } }; int __net_init unix_sysctl_register(struct net *net) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 54ba7316f808..4b040285aa78 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1500,8 +1500,8 @@ out: return err; } -static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int vsock_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *listener; int err; @@ -1528,7 +1528,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, /* Wait for children sockets to appear; these are the new sockets * created upon connection establishment. */ - timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK); + timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK); prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); while ((connected = vsock_dequeue_accept(listener)) == NULL && diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index ee5d306a96d0..43d405298857 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -859,7 +859,6 @@ static struct virtio_driver virtio_vsock_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtio_vsock_probe, .remove = virtio_vsock_remove, diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 72074fd36df4..1d49cc8b6da1 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -25,7 +25,7 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),) cfg80211-y += extra-certs.o endif -$(obj)/shipped-certs.c: $(sort $(wildcard $(srctree)/$(src)/certs/*.hex)) +$(obj)/shipped-certs.c: $(sort $(wildcard $(src)/certs/*.hex)) @$(kecho) " GEN $@" $(Q)(echo '#include "reg.h"'; \ echo 'const u8 shipped_regdb_certs[] = {'; \ diff --git a/net/wireless/core.c b/net/wireless/core.c index 3fb1b637352a..4b1f45e3070e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work) if (wk) { list_del_init(&wk->entry); if (!list_empty(&rdev->wiphy_work_list)) - schedule_work(work); + queue_work(system_unbound_wq, work); spin_unlock_irq(&rdev->wiphy_work_lock); wk->func(&rdev->wiphy, wk); diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index e106dcea3977..c569c37da317 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.burst_period = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]) out->ftm.burst_period = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); + nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP]; if (out->ftm.asap && !capa->ftm.asap) { @@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.num_bursts_exp = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]) out->ftm.num_bursts_exp = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); if (capa->ftm.max_bursts_exponent >= 0 && out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) { @@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.burst_duration = 15; if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]) out->ftm.burst_duration = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); out->ftm.ftms_per_burst = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) @@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.ftmr_retries = 3; if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]) out->ftm.ftmr_retries = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI]; if (out->ftm.request_lci && !capa->ftm.request_lci) { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 43897a5269b6..755af47b88b9 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2021-2023 Intel Corporation + * Copyright (C) 2018, 2021-2024 Intel Corporation */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -458,6 +458,10 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev, struct cfg80211_scan_request *request) { int ret; + + if (WARN_ON_ONCE(!request->n_ssids && request->ssids)) + return -EINVAL; + trace_rdev_scan(&rdev->wiphy, request); ret = rdev->ops->scan(&rdev->wiphy, request); trace_rdev_return_int(&rdev->wiphy, ret); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 127853877a0a..2f2a3163968a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -812,6 +812,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) LIST_HEAD(coloc_ap_list); bool need_scan_psc = true; const struct ieee80211_sband_iftype_data *iftd; + size_t size, offs_ssids, offs_6ghz_params, offs_ies; rdev_req->scan_6ghz = true; @@ -877,10 +878,15 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) spin_unlock_bh(&rdev->bss_lock); } - request = kzalloc(struct_size(request, channels, n_channels) + - sizeof(*request->scan_6ghz_params) * count + - sizeof(*request->ssids) * rdev_req->n_ssids, - GFP_KERNEL); + size = struct_size(request, channels, n_channels); + offs_ssids = size; + size += sizeof(*request->ssids) * rdev_req->n_ssids; + offs_6ghz_params = size; + size += sizeof(*request->scan_6ghz_params) * count; + offs_ies = size; + size += rdev_req->ie_len; + + request = kzalloc(size, GFP_KERNEL); if (!request) { cfg80211_free_coloc_ap_list(&coloc_ap_list); return -ENOMEM; @@ -888,8 +894,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) *request = *rdev_req; request->n_channels = 0; - request->scan_6ghz_params = - (void *)&request->channels[n_channels]; + request->n_6ghz_params = 0; + if (rdev_req->n_ssids) { + /* + * Add the ssids from the parent scan request to the new + * scan request, so the driver would be able to use them + * in its probe requests to discover hidden APs on PSC + * channels. + */ + request->ssids = (void *)request + offs_ssids; + memcpy(request->ssids, rdev_req->ssids, + sizeof(*request->ssids) * request->n_ssids); + } + request->scan_6ghz_params = (void *)request + offs_6ghz_params; + + if (rdev_req->ie_len) { + void *ie = (void *)request + offs_ies; + + memcpy(ie, rdev_req->ie, rdev_req->ie_len); + request->ie = ie; + } /* * PSC channels should not be scanned in case of direct scan with 1 SSID @@ -978,17 +1002,8 @@ skip: if (request->n_channels) { struct cfg80211_scan_request *old = rdev->int_scan_req; - rdev->int_scan_req = request; - /* - * Add the ssids from the parent scan request to the new scan - * request, so the driver would be able to use them in its - * probe requests to discover hidden APs on PSC channels. - */ - request->ssids = (void *)&request->channels[request->n_channels]; - request->n_ssids = rdev_req->n_ssids; - memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) * - request->n_ssids); + rdev->int_scan_req = request; /* * If this scan follows a previous scan, save the scan start @@ -2128,7 +2143,8 @@ static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen, struct ieee80211_he_operation *he_oper; tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen); - if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) { + if (tmp && tmp->datalen >= sizeof(*he_oper) + 1 && + tmp->datalen >= ieee80211_he_oper_size(tmp->data + 1)) { const struct ieee80211_he_6ghz_oper *he_6ghz_oper; he_oper = (void *)&tmp->data[1]; diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 565511a3f461..62f26618f674 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -5,7 +5,7 @@ * * Copyright 2005-2006 Jiri Benc <jbenc@suse.cz> * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2020-2021, 2023 Intel Corporation + * Copyright (C) 2020-2021, 2023-2024 Intel Corporation */ #include <linux/device.h> @@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev) if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); rdev->suspended = false; - schedule_work(&rdev->wiphy_work); + queue_work(system_unbound_wq, &rdev->wiphy_work); wiphy_unlock(&rdev->wiphy); if (ret) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 9bf987519811..87986170d1b1 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -372,7 +372,7 @@ TRACE_EVENT(rdev_add_virtual_intf, ), TP_fast_assign( WIPHY_ASSIGN; - __assign_str(vir_intf_name, name ? name : "<noname>"); + __assign_str(vir_intf_name); __entry->type = type; ), TP_printk(WIPHY_PR_FMT ", virtual intf name: %s, type: %d", diff --git a/net/wireless/util.c b/net/wireless/util.c index 2bde8a354631..082c6f9c5416 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2549,6 +2549,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; + int ret; wdev = dev->ieee80211_ptr; if (!wdev) @@ -2560,7 +2561,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, memset(sinfo, 0, sizeof(*sinfo)); - return rdev_get_station(rdev, dev, mac_addr, sinfo); + wiphy_lock(&rdev->wiphy); + ret = rdev_get_station(rdev, dev, mac_addr, sinfo); + wiphy_unlock(&rdev->wiphy); + + return ret; } EXPORT_SYMBOL(cfg80211_get_station); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d18d51412cc0..8dda4178497c 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -871,8 +871,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout) return rc; } -static int x25_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int x25_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sock *newsk; diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index e9802afa43d0..643f50874dfe 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -71,7 +71,6 @@ static struct ctl_table x25_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { }, }; int __init x25_register_sysctl(void) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index ce60ecd48a4d..c0e0204b9630 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -338,7 +338,6 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi dma_map->netdev = netdev; dma_map->dev = dev; - dma_map->dma_need_sync = false; dma_map->dma_pages_cnt = nr_pages; refcount_set(&dma_map->users, 1); list_add(&dma_map->list, &umem->xsk_dma_list); @@ -424,7 +423,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_ pool->dev = dma_map->dev; pool->dma_pages_cnt = dma_map->dma_pages_cnt; - pool->dma_need_sync = dma_map->dma_need_sync; memcpy(pool->dma_pages, dma_map->dma_pages, pool->dma_pages_cnt * sizeof(*pool->dma_pages)); @@ -460,8 +458,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, __xp_dma_unmap(dma_map, attrs); return -ENOMEM; } - if (dma_need_sync(dev, dma)) - dma_map->dma_need_sync = true; dma_map->dma_pages[i] = dma; } @@ -557,11 +553,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data_meta = xskb->xdp.data; xskb->xdp.flags = 0; - if (pool->dma_need_sync) { - dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, - pool->frame_len, - DMA_BIDIRECTIONAL); - } + if (pool->dev) + xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len); + return &xskb->xdp; } EXPORT_SYMBOL(xp_alloc); @@ -633,7 +627,7 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { u32 nb_entries1 = 0, nb_entries2; - if (unlikely(pool->dma_need_sync)) { + if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) { struct xdp_buff *buff; /* Slow path */ @@ -693,18 +687,3 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) (addr & ~PAGE_MASK); } EXPORT_SYMBOL(xp_raw_get_dma); - -void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb) -{ - dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, - xskb->pool->frame_len, DMA_BIDIRECTIONAL); -} -EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow); - -void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, - size_t size) -{ - dma_sync_single_range_for_device(pool->dev, dma, 0, - size, DMA_BIDIRECTIONAL); -} -EXPORT_SYMBOL(xp_dma_sync_for_device_slow); diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 655fe4ff8621..703d4172c7d7 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -98,6 +98,7 @@ static const int compat_msg_min[XFRM_NR_MSGTYPES] = { }; static const struct nla_policy compat_policy[XFRMA_MAX+1] = { + [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR }, [XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)}, [XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)}, [XFRMA_LASTUSED] = { .type = NLA_U64}, @@ -129,6 +130,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -277,9 +279,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SET_MARK_MASK: case XFRMA_IF_ID: case XFRMA_MTIMER_THRESH: + case XFRMA_SA_DIR: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -434,7 +437,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6346690d5c69..2455a76a1cff 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -253,6 +253,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return -EINVAL; } + if ((xuo->flags & XFRM_OFFLOAD_INBOUND && x->dir == XFRM_SA_DIR_OUT) || + (!(xuo->flags & XFRM_OFFLOAD_INBOUND) && x->dir == XFRM_SA_DIR_IN)) { + NL_SET_ERR_MSG(extack, "Mismatched SA and offload direction"); + return -EINVAL; + } + is_packet_offload = xuo->flags & XFRM_OFFLOAD_PACKET; /* We don't yet support UDP encapsulation and TFC padding. */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 161f535c8b94..d2ea18dcb0cb 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -389,11 +389,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); @@ -404,11 +408,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - @@ -466,6 +474,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) { x = xfrm_input_state(skb); + if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); + goto drop; + } + if (unlikely(x->km.state != XFRM_STATE_VALID)) { if (x->km.state == XFRM_STATE_ACQ) XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -571,6 +584,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR); + xfrm_state_put(x); + goto drop; + } + skb->mark = xfrm_smark_get(skb->mark, x); sp->xvec[sp->len++] = x; diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 4df5c06e3ece..e50e4bf993fa 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -926,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - return xi->net; + return READ_ONCE(xi->net); } static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6affe5cd85d8..475b904fe68b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2489,6 +2489,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family, policy->if_id); + if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR); + xfrm_state_put(x); + error = -EINVAL; + goto fail; + } if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; @@ -2598,8 +2604,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info *)dst; - path->path_cookie = rt6_get_cookie(rt); + path->path_cookie = rt6_get_cookie(dst_rt6_info(dst)); path->u.rt6.rt6i_nfheader_len = nfheader_len; } } @@ -3593,6 +3598,8 @@ xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb, return pol; pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id); + if (IS_ERR(pol)) + pol = NULL; } return pol; diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 5f9bf8e5c933..eeb984be03a7 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -41,6 +41,8 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), + SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR), + SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index ce56d659c55a..bc56c6305725 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -778,7 +778,8 @@ int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack) } if (x->props.flags & XFRM_STATE_ESN) { - if (replay_esn->replay_window == 0) { + if (replay_esn->replay_window == 0 && + (!x->dir || x->dir == XFRM_SA_DIR_IN)) { NL_SET_ERR_MSG(extack, "ESN replay window must be > 0"); return -EINVAL; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0c306473a79d..649bb739df0d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1292,6 +1292,7 @@ found: if (km_query(x, tmpl, pol) == 0) { spin_lock_bh(&net->xfrm.xfrm_state_lock); x->km.state = XFRM_STATE_ACQ; + x->dir = XFRM_SA_DIR_OUT; list_add(&x->km.all, &net->xfrm.state_all); XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, @@ -1744,6 +1745,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->lastused = orig->lastused; x->new_mapping = 0; x->new_mapping_sport = 0; + x->dir = orig->dir; return x; @@ -1864,8 +1866,14 @@ int xfrm_state_update(struct xfrm_state *x) } if (x1->km.state == XFRM_STATE_ACQ) { + if (x->dir && x1->dir != x->dir) + goto out; + __xfrm_state_insert(x); x = NULL; + } else { + if (x1->dir != x->dir) + goto out; } err = 0; diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index e972930c292b..ca003e8a0376 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -38,7 +38,6 @@ static struct ctl_table xfrm_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - {} }; int __net_init xfrm_sysctl_init(struct net *net) @@ -57,10 +56,8 @@ int __net_init xfrm_sysctl_init(struct net *net) table[3].data = &net->xfrm.sysctl_acq_expires; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - table[0].procname = NULL; + if (net->user_ns != &init_user_ns) table_size = 0; - } net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table, table_size); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 810b520493f3..e83c687bd64e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -130,7 +130,7 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_a } static inline int verify_replay(struct xfrm_usersa_info *p, - struct nlattr **attrs, + struct nlattr **attrs, u8 sa_dir, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; @@ -168,6 +168,30 @@ static inline int verify_replay(struct xfrm_usersa_info *p, return -EINVAL; } + if (sa_dir == XFRM_SA_DIR_OUT) { + if (rs->replay_window) { + NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA"); + return -EINVAL; + } + if (rs->seq || rs->seq_hi) { + NL_SET_ERR_MSG(extack, + "Replay seq and seq_hi should be 0 for output SA"); + return -EINVAL; + } + if (rs->bmp_len) { + NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA"); + return -EINVAL; + } + } + + if (sa_dir == XFRM_SA_DIR_IN) { + if (rs->oseq || rs->oseq_hi) { + NL_SET_ERR_MSG(extack, + "Replay oseq and oseq_hi should be 0 for input SA"); + return -EINVAL; + } + } + return 0; } @@ -176,6 +200,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, struct netlink_ext_ack *extack) { int err; + u8 sa_dir = attrs[XFRMA_SA_DIR] ? nla_get_u8(attrs[XFRMA_SA_DIR]) : 0; err = -EINVAL; switch (p->family) { @@ -334,7 +359,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(attrs, extack))) goto out; - if ((err = verify_replay(p, attrs, extack))) + if ((err = verify_replay(p, attrs, sa_dir, extack))) goto out; err = -EINVAL; @@ -358,6 +383,77 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; goto out; } + + if (sa_dir == XFRM_SA_DIR_OUT) { + NL_SET_ERR_MSG(extack, + "MTIMER_THRESH attribute should not be set on output SA"); + err = -EINVAL; + goto out; + } + } + + if (sa_dir == XFRM_SA_DIR_OUT) { + if (p->flags & XFRM_STATE_DECAP_DSCP) { + NL_SET_ERR_MSG(extack, "Flag DECAP_DSCP should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (p->flags & XFRM_STATE_ICMP) { + NL_SET_ERR_MSG(extack, "Flag ICMP should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (p->flags & XFRM_STATE_WILDRECV) { + NL_SET_ERR_MSG(extack, "Flag WILDRECV should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (p->replay_window) { + NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_REPLAY_VAL]) { + struct xfrm_replay_state *replay; + + replay = nla_data(attrs[XFRMA_REPLAY_VAL]); + + if (replay->seq || replay->bitmap) { + NL_SET_ERR_MSG(extack, + "Replay seq and bitmap should be 0 for output SA"); + err = -EINVAL; + goto out; + } + } + } + + if (sa_dir == XFRM_SA_DIR_IN) { + if (p->flags & XFRM_STATE_NOPMTUDISC) { + NL_SET_ERR_MSG(extack, "Flag NOPMTUDISC should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_SA_EXTRA_FLAGS]) { + u32 xflags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); + + if (xflags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) { + NL_SET_ERR_MSG(extack, "Flag DONT_ENCAP_DSCP should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (xflags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP) { + NL_SET_ERR_MSG(extack, "Flag OSEQ_MAY_WRAP should not be set for input SA"); + err = -EINVAL; + goto out; + } + + } } out: @@ -734,6 +830,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_IF_ID]) x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (attrs[XFRMA_SA_DIR]) + x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -1182,8 +1281,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } - if (x->mapping_maxage) + if (x->mapping_maxage) { ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); + if (ret) + goto out; + } + if (x->dir) + ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); out: return ret; } @@ -1618,6 +1722,9 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) goto out; + if (attrs[XFRMA_SA_DIR]) + x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); + resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); @@ -2402,7 +2509,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) + nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ - + nla_total_size(4); /* XFRM_AE_ETHR */ + + nla_total_size(4) /* XFRM_AE_ETHR */ + + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */ } static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -2459,6 +2567,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) goto out_cancel; + if (x->dir) { + err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); + if (err) + goto out_cancel; + } + nlmsg_end(skb, nlh); return 0; @@ -3018,6 +3132,7 @@ EXPORT_SYMBOL_GPL(xfrm_msg_min); #undef XMSGSIZE const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR }, [XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)}, [XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)}, [XFRMA_LASTUSED] = { .type = NLA_U64}, @@ -3049,6 +3164,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), }; EXPORT_SYMBOL_GPL(xfrma_policy); @@ -3097,6 +3213,24 @@ static const struct xfrm_link { [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default }, }; +static int xfrm_reject_unused_attr(int type, struct nlattr **attrs, + struct netlink_ext_ack *extack) +{ + if (attrs[XFRMA_SA_DIR]) { + switch (type) { + case XFRM_MSG_NEWSA: + case XFRM_MSG_UPDSA: + case XFRM_MSG_ALLOCSPI: + break; + default: + NL_SET_ERR_MSG(extack, "Invalid attribute SA_DIR"); + return -EINVAL; + } + } + + return 0; +} + static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -3156,6 +3290,12 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto err; + if (!link->nla_pol || link->nla_pol == xfrma_policy) { + err = xfrm_reject_unused_attr((type + XFRM_MSG_BASE), attrs, extack); + if (err < 0) + goto err; + } + if (link->doit == NULL) { err = -EINVAL; goto err; @@ -3189,8 +3329,9 @@ static void xfrm_netlink_rcv(struct sk_buff *skb) static inline unsigned int xfrm_expire_msgsize(void) { - return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) - + nla_total_size(sizeof(struct xfrm_mark)); + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + + nla_total_size(sizeof(struct xfrm_mark)) + + nla_total_size(sizeof_field(struct xfrm_state, dir)); } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) @@ -3217,6 +3358,12 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct if (err) return err; + if (x->dir) { + err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); + if (err) + return err; + } + nlmsg_end(skb, nlh); return 0; } @@ -3324,6 +3471,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->mapping_maxage) l += nla_total_size(sizeof(x->mapping_maxage)); + if (x->dir) + l += nla_total_size(sizeof(x->dir)); + return l; } |